@Override public Dataset mapStringToModel(JsonRequest request) throws ParseException { if(!(request instanceof ArffJsonRequest)) { throw new ParseException("Not an instance of "+ArffJsonRequest.class, -1); } try { ArffJsonRequest arff = (ArffJsonRequest) request; ArffReader ar = new ArffReader(new StringReader(request.toString())); Instances ins = ar.getData(); ins.setClassIndex(arff.getClassIndex() >= 0 ? arff.getClassIndex() : ins.numAttributes()-1); return new Dataset(ins); } catch (Exception e) { ParseException pe = new ParseException("Cannot convert JSON stream to ARFF", -1); pe.initCause(e); throw pe; } }
public Instances loadInstancesFromArffFile(String filename) throws IOException { LOGGER.trace("Loading data from ARFF file [{}].", filename); FileReader fileReader = new FileReader(filename); BufferedReader bufferedReader = new BufferedReader(fileReader); ArffReader arffReader = new ArffReader(bufferedReader); Instances data = arffReader.getData(); data.setClassIndex(data.numAttributes() - 1); bufferedReader.close(); fileReader.close(); return data; }
@Override public Instances getDataSet() throws IOException { if (m_sourceReader == null) { throw new IOException("No source has been specified"); } if (getRetrieval() == INCREMENTAL) { throw new IOException( "Cannot mix getting instances in both incremental and batch modes"); } setRetrieval(BATCH); if (m_structure == null) { getStructure(); } while (readData(true)) { ; } m_dataDumper.flush(); m_dataDumper.close(); // make final structure makeStructure(); Reader sr = new BufferedReader(new FileReader(m_tempFile)); ArffReader initialArff = new ArffReader(sr, m_structure, 0, m_fieldSeparatorAndEnclosures); Instances initialInsts = initialArff.getData(); sr.close(); initialArff = null; return initialInsts; }
protected Instances readData(String data) throws IOException{ String trainFileName = "src/main/resources/"+data+".arff"; BufferedReader reader = new BufferedReader(new FileReader(trainFileName)); ArffReader arff = new ArffReader(reader); Instances instances = arff.getData(); reader.close(); return instances; }
/** * This method loads a dataset in ARFF format. If the file does not exist, * or it has a wrong format, the attribute trainData is null. * * @param fileName * The name of the file that stores the dataset. */ public void loadDataset(String fileName) { try { BufferedReader reader = new BufferedReader(new FileReader(fileName)); ArffReader arff = new ArffReader(reader); trainData = arff.getData(); System.out.println("===== Loaded dataset: " + fileName + " ====="); reader.close(); } catch (IOException e) { System.out.println("Problem found when reading: " + fileName); } }
public static void mergeAndWrite(String relationName, String destPath, String... dataSetPaths) throws IOException { ArffSaver saver = new ArffSaver(); saver.setFile(new File(destPath)); saver.setRetrieval(Saver.INCREMENTAL); boolean first = true; for (String p : dataSetPaths) { ArffReader reader = new ArffReader(new BufferedReader(new FileReader(p))); Instances dataSet = reader.getData(); if (first) { dataSet.setRelationName(relationName); saver.setStructure(dataSet); first = false; } for (int i = 0; i < dataSet.numInstances(); ++i) { saver.writeIncremental(dataSet.instance(i)); } } saver.getWriter().flush(); }
/** Make a lattice over the given variables of the dataset.*/ public static ChordalysisModeller.Data makeModelData(Instances structure, ArffReader loader) throws IOException { return makeModelData(structure, loader, true); }
/** Make a lattice over the given variables of the dataset.*/ public static ChordalysisModeller.Data makeModelData(Instances structure, ArffReader loader, boolean hasMissingValues) throws IOException { // Access the size: int nbInstances = 0; int nbVariables = structure.numAttributes(); // Create the array for the model int[] variables = new int[nbVariables]; // Create the bitset for the lattice int[] nbValuesForAttribute = new int[nbVariables]; // Also used by the model BitSet[][] presence = new BitSet[nbVariables][]; // --- 1 --- // For each attribute: for (int a = 0; a < nbVariables; a++) { // --- For the model: variables[a] = a; // --- For the lattice: nbValuesForAttribute[a] = structure.attribute(a).numValues(); // --- --- Handle the case of missing values: +1 for missing if (hasMissingValues) { nbValuesForAttribute[a]++; } // --- --- Build the bitset presence[a] = new BitSet[nbValuesForAttribute[a]]; for (int v = 0; v < presence[a].length; v++) { presence[a][v] = new BitSet(); } } // --- 2 --- // For each instance (only for the lattice)... Instance row; while ((row = loader.readInstance(structure)) != null) { boolean skipRow = false; for (int a = 0; a < nbVariables; a++) { int indexOfValue; if (row.isMissing(a)) { if (hasMissingValues) { indexOfValue = structure.attribute(a).numValues() ; } else { System.err.println("Found missing while I was told I wouldn't; ignoring whole row"); skipRow = true; break; } } else { String value = row.stringValue(a); indexOfValue = row.attribute(a).indexOfValue(value); } presence[a][indexOfValue].set(nbInstances); } if (!skipRow) { nbInstances++; } } // --- 3 --- // Create the data: return new ChordalysisModeller.Data( new DecomposableModel(variables, nbValuesForAttribute), new Lattice(nbVariables, nbInstances, nbValuesForAttribute, presence) ); }
public static Instances loadDataSet(File dataSetPath) throws IOException, FileNotFoundException { ArffReader reader = new ArffReader(new BufferedReader(new FileReader(dataSetPath))); Instances dataSet = reader.getData(); dataSet.setClassIndex(dataSet.numAttributes() - 1); return dataSet; }
/** * Reads a single instance from the reader and appends it to the dataset. * Automatically expands the dataset if it is not large enough to hold the * instance. This method does not check for carriage return at the end of the * line. * * @param reader the reader * @return false if end of file has been reached * @throws IOException if the information is not read successfully * @deprecated instead of using this method in conjunction with the * <code>readInstance(Reader)</code> method, one should use the * <code>ArffLoader</code> or <code>DataSource</code> class * instead. * @see weka.core.converters.ArffLoader * @see weka.core.converters.ConverterUtils.DataSource */ @Deprecated public boolean readInstance(Reader reader) throws IOException { ArffReader arff = new ArffReader(reader, this, m_Lines, 1); Instance inst = arff.readInstance(arff.getData(), false); m_Lines = arff.getLineNo(); if (inst != null) { add(inst); return true; } else { return false; } }
/** * Reads a single instance from the reader and appends it * to the dataset. Automatically expands the dataset if it * is not large enough to hold the instance. This method does * not check for carriage return at the end of the line. * * @param reader the reader * @return false if end of file has been reached * @throws IOException if the information is not read * successfully * @deprecated instead of using this method in conjunction with the * <code>readInstance(Reader)</code> method, one should use the * <code>ArffLoader</code> or <code>DataSource</code> class instead. * @see weka.core.converters.ArffLoader * @see weka.core.converters.ConverterUtils.DataSource */ @Deprecated public boolean readInstance(Reader reader) throws IOException { ArffReader arff = new ArffReader(reader, this, m_Lines, 1); Instance inst = arff.readInstance(arff.getData(), false); m_Lines = arff.getLineNo(); if (inst != null) { add(inst); return true; } else { return false; } }
/** * Reads a single instance from the reader and appends it to the dataset. * Automatically expands the dataset if it is not large enough to hold the * instance. This method does not check for carriage return at the end of the * line. * * @param reader the reader * @return false if end of file has been reached * @throws IOException if the information is not read successfully * @see weka.core.converters.ArffLoader * @see weka.core.converters.ConverterUtils.DataSource * @deprecated instead of using this method in conjunction with the * <code>readInstance(Reader)</code> method, one should use the * <code>ArffLoader</code> or <code>DataSource</code> class * instead. */ @Deprecated public boolean readInstance(Reader reader) throws IOException { ArffReader arff = new ArffReader(reader, this, m_Lines, 1); Instance inst = arff.readInstance(arff.getData(), false); m_Lines = arff.getLineNo(); if (inst != null) { add(inst); return true; } else { return false; } }