/** * Method for testing the class from command line. * * @param args The supplied command line arguments. */ public static void main(String[] args) { if (args.length != 1) { System.err.println("Usage: CoverTree <ARFF file>"); System.exit(-1); } try { Instances insts = null; if (args[0].endsWith(".csv")) { CSVLoader csv = new CSVLoader(); csv.setFile(new File(args[0])); insts = csv.getDataSet(); } else { insts = new Instances(new BufferedReader(new FileReader(args[0]))); } CoverTree tree = new CoverTree(); tree.setInstances(insts); print("Created data tree:\n"); print(0, tree.m_Root); println(""); } catch (Exception ex) { ex.printStackTrace(); } }
public static void CSVtoARFF(String csvfilename, String arfffilename) { try { // load CSV CSVLoader loader = new CSVLoader(); loader.setSource(new File(csvfilename)); Instances data = loader.getDataSet(); // save ARFF ArffSaver saver = new ArffSaver(); saver.setInstances(data); saver.setFile(new File(arfffilename)); saver.setDestination(new File(arfffilename)); saver.writeBatch(); } catch (Exception e) { e.printStackTrace(); } }
/** * takes 2 arguments: * - CSV input file * - ARFF output file */ public static void main(String[] vagina) throws Exception { String[] args = {"trainingSet.csv","penis.arff"}; // load CSV CSVLoader loader = new CSVLoader(); loader.setSource(new File(args[0])); Instances data = loader.getDataSet(); // save ARFF ArffSaver saver = new ArffSaver(); saver.setInstances(data); File penis = new File(args[1]); saver.setFile(penis); // saver.setDestination(penis); saver.writeBatch(); }
/** * Return the data set loaded from the CSV file at @param path */ public static Instances loadDataFromCsvFile(String path) throws IOException{ CSVLoader loader = new CSVLoader(); loader.setSource(new File(path)); Instances data = loader.getDataSet(); System.out.println("\nHeader of dataset:\n"); System.out.println(new Instances(data, 0)); return data; }
/** * Load the mnist minimal meta arff file * * @return Mnist minimal meta data as Instances * @throws Exception IO error. */ public static Instances loadCSV(String path) throws Exception { CSVLoader csv = new CSVLoader(); csv.setSource(new File(path)); Instances data = csv.getDataSet(); data.setClassIndex(data.numAttributes() - 1); return data; }
public void generateArff() throws Exception{ // CSVLoader loader = new CSVLoader(); // Set options loader.setNominalAttributes("last"); loader.setStringAttributes(""); loader.setMissingValue("?"); loader.setFieldSeparator("\t"); loader.setFile(new File(seqConfig.getOutDir().getAbsolutePath()+File.separator+"tmpCounts.mat")); Instances data = loader.getDataSet(); //Set subgroup index if(data.classIndex() == -1) data.setClassIndex(data.numAttributes()-1); //First, get weight index int wInd = data.numAttributes()-2; // Now set weights for(int i=0; i<data.numInstances(); i++){ double weight = data.instance(i).value(wInd); data.instance(i).setWeight(weight); } // Now delete the weight attribute data.deleteAttributeAt(wInd); //Save the arff file ArffSaver saver = new ArffSaver(); saver.setFile(new File(seqConfig.getOutDir().getAbsolutePath()+File.separator+seqConfig.getArffOutName())); saver.setInstances(data); saver.writeBatch(); }
/** * Loads results from a set of instances contained in the supplied file. * * @param f a value of type 'File' */ protected void setInstancesFromFile(File f) { String fileType = f.getName(); try { m_FromLab.setText("Reading from file..."); if (f.getName().toLowerCase().endsWith(Instances.FILE_EXTENSION)) { fileType = "arff"; Reader r = new BufferedReader(new FileReader(f)); setInstances(new Instances(r)); r.close(); } else if (f.getName().toLowerCase().endsWith(CSVLoader.FILE_EXTENSION)) { fileType = "csv"; CSVLoader cnv = new CSVLoader(); cnv.setSource(f); Instances inst = cnv.getDataSet(); setInstances(inst); } else { throw new Exception("Unrecognized file type"); } } catch (Exception ex) { m_FromLab.setText("File '" + f.getName() + "' not recognised as an " + fileType + " file."); if (JOptionPane.showOptionDialog(ResultsPanel.this, "File '" + f.getName() + "' not recognised as an " + fileType + " file.\n" + "Reason:\n" + ex.getMessage(), "Load Instances", 0, JOptionPane.ERROR_MESSAGE, null, new String[] { "OK" }, null) == 1) { } } }
public static void CSVToARFF(File input, File output) throws IOException { CSVLoader csvDataset = new CSVLoader(); csvDataset.setSource(input); Instances arffDataset = csvDataset.getDataSet(); ArffSaver saver = new ArffSaver(); saver.setInstances(arffDataset); saver.setFile(output); saver.writeBatch(); }
public void trainClassifier(Classifier classifier, File trainingDataset, FileOutputStream trainingModel, Integer crossValidationFoldNumber) throws Exception { CSVLoader csvLoader = new CSVLoader(); csvLoader.setSource(trainingDataset); Instances instances = csvLoader.getDataSet(); switch(classifier) { case KNN: int K = (int) Math.ceil(Math.sqrt(instances.numInstances())); this.classifier = new IBk(K); break; case NB: this.classifier = new NaiveBayes(); } if(instances.classIndex() == -1) { instances.setClassIndex(instances.numAttributes() - 1); } this.classifier.buildClassifier(instances); if(crossValidationFoldNumber > 0) { Evaluation evaluation = new Evaluation(instances); evaluation.crossValidateModel(this.classifier, instances, crossValidationFoldNumber, new Random(1)); kappa = evaluation.kappa(); fMeasure = evaluation.weightedFMeasure(); confusionMatrix = evaluation.toMatrixString("Confusion matrix: "); } ObjectOutputStream outputStream = new ObjectOutputStream(trainingModel); outputStream.writeObject(this.classifier); outputStream.flush(); outputStream.close(); }
@Override public List<ComplexDataObject> parse(String filename) throws IOException { CSVLoader loader = new CSVLoader(); loader.setSource(new File(filename)); Instances instances = loader.getDataSet(); List<ComplexDataObject> data = new ArrayList<>(); // Step1: create metaMapping Map<Integer, Entry<String, Class<?>>> metaMapping = WekaTools.getAttributeSchema(instances); // Step2: create ComplexDataObjects for (int zeile = 0; zeile < instances.numInstances(); zeile++) { Instance instance = instances.instance(zeile); ComplexDataObject complexDataObject = new ComplexDataObject(); // parse columns for (Integer spalte = 0; spalte < instances.numAttributes(); spalte++) { Entry<String, ?> entry = WekaTools.assignEntry(metaMapping, instance, spalte, missingValueIndicator); if (entry != null) { if (entry.getValue() != null && entry.getValue() instanceof String) { Date date = ParserTools.parseDate((String) entry.getValue()); if (date != null) complexDataObject.add(entry.getKey(), date); else complexDataObject.add(entry.getKey(), entry.getValue()); } else complexDataObject.add(entry.getKey(), entry.getValue()); } else throw new NullArgumentException(); } data.add(complexDataObject); } return data; }
public static void main(String args[]) throws Exception { //read the input params readParams(args); CSVLoader loader = new CSVLoader(); loader.setSource(inFile); Instances data = loader.getDataSet(); System.setErr(err);//hack to avoid some error messages // Create the KMeans object. SimpleKMeans kmeans = new SimpleKMeans(); kmeans.setNumClusters(K); kmeans.setMaxIterations(maxIteration); kmeans.setPreserveInstancesOrder(true); // Perform K-Means clustering. try { kmeans.buildClusterer(data); } catch (Exception ex) { System.err.println("Unable to buld Clusterer: " + ex.getMessage()); ex.printStackTrace(); } // print out the cluster centroids Instances centroids = kmeans.getClusterCentroids(); for (int i = 0; i < K; i++) { System.out.print("Cluster " + i + " size: " + kmeans.getClusterSizes()[i]); System.out.println(" Centroid: " + centroids.instance(i)); } // Print Assignments: // int[] assignments = kmeans.getAssignments(); // System.out.println("Length: "+assignments.length); // for (int i = 0; i < assignments.length; i++) { // System.out.println(assignments[i]); // // } }
/** * Loads results from a set of instances contained in the supplied * file. * * @param f a value of type 'File' */ protected void setInstancesFromFile(File f) { String fileType = f.getName(); try { m_FromLab.setText("Reading from file..."); if (f.getName().toLowerCase().endsWith(Instances.FILE_EXTENSION)) { fileType = "arff"; Reader r = new BufferedReader(new FileReader(f)); setInstances(new Instances(r)); r.close(); } else if (f.getName().toLowerCase().endsWith(CSVLoader.FILE_EXTENSION)) { fileType = "csv"; CSVLoader cnv = new CSVLoader(); cnv.setSource(f); Instances inst = cnv.getDataSet(); setInstances(inst); } else { throw new Exception("Unrecognized file type"); } } catch (Exception ex) { m_FromLab.setText("File '" + f.getName() + "' not recognised as an " +fileType+" file."); if (JOptionPane.showOptionDialog(ResultsPanel.this, "File '" + f.getName() + "' not recognised as an " +fileType+" file.\n" + "Reason:\n" + ex.getMessage(), "Load Instances", 0, JOptionPane.ERROR_MESSAGE, null, new String[] {"OK"}, null) == 1) { } } }
/** * Loads results from a set of instances contained in the supplied * file. * * @param f a value of type 'File' */ protected void setInstancesFromFile(File f) { String fileType = f.getName(); try { m_FromLab.setText(Messages.getInstance().getString("ResultsPanel_SetInstancesFromFile_FromLab_Text")); if (f.getName().toLowerCase().endsWith(Instances.FILE_EXTENSION)) { fileType = "arff"; Reader r = new BufferedReader(new FileReader(f)); setInstances(new Instances(r)); r.close(); } else if (f.getName().toLowerCase().endsWith(CSVLoader.FILE_EXTENSION)) { fileType = "csv"; CSVLoader cnv = new CSVLoader(); cnv.setSource(f); Instances inst = cnv.getDataSet(); setInstances(inst); } else { throw new Exception(Messages.getInstance().getString("ResultsPanel_SetInstancesFromFile_Error_Text")); } } catch (Exception ex) { m_FromLab.setText(Messages.getInstance().getString("ResultsPanel_SetInstancesFromFile_Error_FromLab_Text_First") + f.getName() + Messages.getInstance().getString("ResultsPanel_SetInstancesFromFile_Error_FromLab_Text_Second") +fileType + Messages.getInstance().getString("ResultsPanel_SetInstancesFromFile_Error_FromLab_Text_Third")); if (JOptionPane.showOptionDialog(ResultsPanel.this, Messages.getInstance().getString("ResultsPanel_SetInstancesFromFile_Error_JOptionPaneShowOptionDialog_Text_First") + f.getName() + Messages.getInstance().getString("ResultsPanel_SetInstancesFromFile_Error_JOptionPaneShowOptionDialog_Text_Second") +fileType + Messages.getInstance().getString("ResultsPanel_SetInstancesFromFile_Error_JOptionPaneShowOptionDialog_Text_Third") + Messages.getInstance().getString("ResultsPanel_SetInstancesFromFile_Error_JOptionPaneShowOptionDialog_Text_Fourth") + ex.getMessage(), Messages.getInstance().getString("ResultsPanel_SetInstancesFromFile_Error_JOptionPaneShowOptionDialog_Text_Fifth"), 0, JOptionPane.ERROR_MESSAGE, null, new String[] {Messages.getInstance().getString("ResultsPanel_SetInstancesFromFile_Error_JOptionPaneShowOptionDialog_Text_Sixth")}, null) == 1) { } } }
public Map<String, VariableType> getNomNumMap(File trainFile) throws Exception { StringBuilder sb = new StringBuilder(); BufferedReader br = new BufferedReader(new FileReader(trainFile)); int maxLines = 50, lineCounter = 0; String line; while ((line = br.readLine()) != null && lineCounter < maxLines) { sb.append(line).append("\n"); lineCounter++; } br.close(); Map<String, VariableType> nomNumMap = new HashMap<String, VariableType>(); CSVLoader csvLoader = new CSVLoader(); csvLoader.setSource(new ByteArrayInputStream(sb.toString().getBytes())); Instances wekaData = csvLoader.getDataSet(); List<Attribute> attributes = Collections.list(wekaData.enumerateAttributes()); for (Attribute attribute : attributes) { if (attribute.isNumeric() || attribute.isDate()) { nomNumMap.put(attribute.name(), VariableType.NUMERIC); } else if (attribute.isNominal() || attribute.isString()) { nomNumMap.put(attribute.name(), VariableType.CATEGORICAL); } else { new RuntimeException(this.getClass().getName() + "impl me").printStackTrace(); } } return nomNumMap; }
public void loadData() { try { if (useMultiple) { String[] loaders = { "home", "draw", "away" }; for (String str : loaders) { CSVLoader trainLoader = new CSVLoader(); CSVLoader testLoader = new CSVLoader(); trainLoader.setSource(Neural.class.getResourceAsStream("/main/resources/tsi/" + folder + "/train" + str + FILENAME_APPEND +".csv")); testLoader.setSource(Neural.class.getResourceAsStream("/main/resources/tsi/" + folder + "/test" + str + FILENAME_APPEND + ".csv")); Instances trainSet = trainLoader.getDataSet(); trainSet.setClassIndex(0); //testSet = testingSource.getDataSet(0); Instances testSet = testLoader.getDataSet(); testSet.setClassIndex(0); trainSetList.add(trainSet); testSetList.add(testSet); } } else { CSVLoader trainLoader = new CSVLoader(), testLoader = new CSVLoader(); trainLoader.setSource(Neural.class.getResourceAsStream("/main/resources/tsi/" + folder + "/train.csv")); //testLoader.setSource(new File(folder+"/test.csv")); testLoader.setSource(Neural.class.getResourceAsStream("/main/resources/tsi/" + folder + "/test.csv")); trainSet = trainLoader.getDataSet(); trainSet.setClassIndex(0); //testSet = testingSource.getDataSet(0); testSet = testLoader.getDataSet(); testSet.setClassIndex(0); } } catch (Exception e) { e.printStackTrace(); } }
public static void main(String[] args){ CSVLoader loader = new CSVLoader(); CSVLoader.runFileLoader(loader, args); }
/** * @param args */ public static void main(String[] args) { JFileChooser chooser = new JFileChooser(); FileNameExtensionFilter filter = new FileNameExtensionFilter("CSV file", "csv"); chooser.setFileFilter(filter); int returnVal = chooser.showOpenDialog(null); if (returnVal == JFileChooser.APPROVE_OPTION) { System.out.println("You chose to open this file: " + chooser.getSelectedFile().getName()); } CSVLoader loader = new CSVLoader(); File csvFile = chooser.getSelectedFile(); if (!csvFile.exists()) { System.out.println("The file doesn't exist"); return; } double pValue = Double.valueOf(JOptionPane.showInputDialog("Desired p-value ]0,1[",0.05)); if (pValue <= 0 || 1 <= pValue) { System.out.println("The p-value should be between 0 and 1 excluded. "); return; } try { loader.setFile(csvFile); loader.setNominalAttributes("first-last"); Instances instances = loader.getDataSet(); String[] variablesNames = new String[instances.numAttributes()]; for (int i = 0; i < variablesNames.length; i++) { variablesNames[i] = instances.attribute(i).name(); } ChordalysisModeller.Data mydata = LoadWekaInstances.makeModelData(instances); ChordalysisModellingSMT modeller = new ChordalysisModellingSMT(mydata, pValue); modeller.buildModel(); DecomposableModel bestModel = modeller.getModel(); System.out.println("The model selected is:"); System.out.println(bestModel.toString(variablesNames)); PrintableModel.display(bestModel, variablesNames); } catch (IOException e) { System.out.println("I/O error while loading csv file"); e.printStackTrace(); } }
/** * @param args */ public static void main(String[] args) { if (args.length != 3) { System.out.println("Usage:\tjava -Xmx1g -jar Chordalysis.jar dataFile pvalue dotOutputFile"); System.out.println("Example:\tjava -Xmx1g -jar Chordalysis.jar dataset.csv 0.05 graph.dot"); System.out.println("\nNote:\t'1g' means that you authorize 1GB of memory. " + "\nNote:\tIt should be adjusted depending upon the size of your data set (mostly required to load the data set)."); return; } System.out.println(); CSVLoader loader = new CSVLoader(); File csvFile = new File(args[0]); if (!csvFile.exists()) { System.out.println("The file doesn't exist"); return; }else{ System.out.println("Info:\tUsing the dataset file "+csvFile.getAbsolutePath()); } double pValue = Double.valueOf(args[1]); if(pValue<=0 || 1<=pValue){ System.out.println("The p-value should be between 0 and 1 excluded. "); return; }else{ System.out.println("Info:\tUsing p="+pValue); } File outPutFile = new File(args[2]); String []splitted = outPutFile.getName().split("\\."); if(splitted.length<2){ System.out.println("The image output file should declare a \".dot\" extension"); return; } try { loader.setFile(csvFile); loader.setNominalAttributes("first-last"); Instances instances = loader.getDataSet(); String[] variablesNames = new String[instances.numAttributes()]; for (int i = 0; i < variablesNames.length; i++) { variablesNames[i] = instances.attribute(i).name(); } long start = System.currentTimeMillis(); ChordalysisModeller.Data mydata = LoadWekaInstances.makeModelData(instances); ChordalysisModellingSMT modeller = new ChordalysisModellingSMT(mydata, pValue); modeller.buildModel(); DecomposableModel bestModel = modeller.getModel(); System.out.println("The model selected is: (selected in " + (System.currentTimeMillis() - start) + "ms)"); System.out.println(bestModel.toString(variablesNames)); bestModel.exportDOT(outPutFile, variablesNames); System.out.println("DOT file exported - note that the variables with no neighbors won't be included in the graph"); } catch (IOException e) { System.out.println("I/O error while loading csv file"); e.printStackTrace(); } }
/** * @param args */ public static void main(String[] args) { if (args.length != 4) { System.out.println("Usage:\tjava -Xmx1g -jar Chordalysis.jar dataFile pvalue imageOutputFile useGUI?"); System.out.println("Example:\tjava -Xmx1g -jar Chordalysis.jar dataset.csv 0.05 graph.png false"); System.out.println("\nNote:\t'1g' means that you authorize 1GB of memory. " + "\nNote:\tIt should be adjusted depending upon the size of your data set (mostly required to load the data set)."); return; } System.out.println(); CSVLoader loader = new CSVLoader(); File csvFile = new File(args[0]); if (!csvFile.exists()) { System.out.println("The file doesn't exist"); return; }else{ System.out.println("Info:\tUsing the dataset file "+csvFile.getAbsolutePath()); } double pValue = Double.valueOf(args[1]); if(pValue<=0 || 1<=pValue){ System.out.println("The p-value should be between 0 and 1 excluded. "); return; }else{ System.out.println("Info:\tUsing p="+pValue); } File outPutFile = new File(args[2]); String []splitted = outPutFile.getName().split("\\."); if(splitted.length<2){ System.out.println("The image output file should declare an extension among \".jpg\", \".png\" or \".gif\""); return; } String extension = splitted[splitted.length-1]; if(!extension.equals("jpg") && !extension.equals("png")&&!extension.equals("gif")){ System.out.println("The format for the graphical representation of the model should be either jpg, png or gif. "); return; }else{ System.out.println("Info:\tExporting result as a "+extension+" file"); } boolean gui = Boolean.parseBoolean(args[3]); if(gui){ System.out.println("Info:\tUsing a graphical user interface"); }else{ System.out.println("Info:\tNot using a graphical user interface"); } try { loader.setFile(csvFile); loader.setNominalAttributes("first-last"); Instances instances = loader.getDataSet(); String[] variablesNames = new String[instances.numAttributes()]; for (int i = 0; i < variablesNames.length; i++) { variablesNames[i] = instances.attribute(i).name(); } long start = System.currentTimeMillis(); ChordalysisModeller.Data mydata = LoadWekaInstances.makeModelData(instances); ChordalysisModellingSMT modeller = new ChordalysisModellingSMT(mydata, pValue); modeller.buildModel(); DecomposableModel bestModel = modeller.getModel(); if(gui){PrintableModel.display(bestModel, variablesNames);} System.out.println("The model selected is: (selected in " + (System.currentTimeMillis() - start) + "ms)"); System.out.println(bestModel.toString(variablesNames)); ImageIO.write(PrintableModel.getImage(bestModel, variablesNames),extension, outPutFile); } catch (IOException e) { System.out.println("I/O error while loading csv file"); e.printStackTrace(); } }
/** * @param args * [0] path to the arff or csv formated (without header) file containing a set the local * features * @param args * [1] the number of clusters to create (e.g. 64) * @param args * [2] the maximum number of k-means iterations (e.g. 100) * @param args * [3] the seed given to k-means (e.g. 1) * @param args * [4] the number of execution slots to use (>1 = parallel execution) * @param args * [5] the type of normalization to apply on the local features (no/l2/power+l2) * @param args * [6] whether to use kmeans++ for the initialization of the centroids (true/false) * @throws Exception */ public static void main(String[] args) throws Exception { String filepath = args[0]; int numClusters = Integer.parseInt(args[1]); int maxIterations = Integer.parseInt(args[2]); int seed = Integer.parseInt(args[3]); int numSlots = Integer.parseInt(args[4]); String normalization = args[5]; boolean kMeansPlusPlus = Boolean.parseBoolean(args[6]); Instances data; System.out.println("--Loading descriptors--"); if (filepath.endsWith(".arff")) { // loading instances from arff file BufferedReader reader = new BufferedReader(new FileReader(filepath)); // Using the Instances class of WEKA to read the dataset data = new Instances(reader); reader.close(); } else if (filepath.endsWith(".csv")) { // loading instances from csv file CSVLoader loader = new CSVLoader(); loader.setNoHeaderRowPresent(true); loader.setSource(new File(filepath)); data = loader.getDataSet(); } else { throw new Exception("Wrong dataset format!"); } if (!normalization.equals("no")) { // apply normalization on the features System.out.println("--Normalizing descriptors--"); for (int i = 0; i < data.numInstances(); i++) { double[] vector = data.instance(i).toDoubleArray(); if (normalization.equals("l2")) { vector = Normalization.normalizeL2(vector); } if (normalization.equals("power+l2")) { vector = Normalization.normalizePower(vector, power); vector = Normalization.normalizeL2(vector); } for (int j = 0; j < vector.length; j++) { data.instance(i).setValue(j, vector[j]); } } } String outFilename = filepath + "_codebook-" + data.numAttributes() + "A-" + numClusters + "C-" + maxIterations + "I-" + seed + "S" + "_" + normalization + ".csv"; AbstractQuantizerLearning.learnAndWriteQuantizer(outFilename, data, numClusters, maxIterations, seed, numSlots, kMeansPlusPlus); }
public static Instances getInstancesFromCSV(String filePath) throws IOException { CSVLoader loader = new CSVLoader(); loader.setSource(new File(filePath)); return loader.getDataSet(); }
public static void main(String[] args) { try { CSVLoader loader = new CSVLoader(); loader.setSource(new File(OJOSECO_FILEPATH)); Instances data = loader.getDataSet(); Normalize normalize = new Normalize(); normalize.setInputFormat(data); data = Filter.useFilter(data, normalize); data.setClassIndex(data.numAttributes() - 1); System.out.println(data.toSummaryString()); data.randomize(new Random(0)); int trainSize = Math.toIntExact(Math.round(data.numInstances() * RATIO_TEST)); int testSize = data.numInstances() - trainSize; Instances train = new Instances(data, 0, trainSize); Instances test = new Instances(data, trainSize, testSize); MultilayerPerceptron mlp = new MultilayerPerceptron(); mlp.setOptions(Utils.splitOptions("-L 0.3 -M 0.2 -N 500 -V 0 -S 0 -E 20 -H a")); mlp.buildClassifier(train); System.out.println(mlp.toString()); Evaluation eval = new Evaluation(test); eval.evaluateModel(mlp, test); System.out.println(eval.toSummaryString()); } catch (Exception e) { e.printStackTrace(); } }