public File saveInstancesToArffFile(Instances instances, String filename) throws IOException { LOGGER.trace("Saving data to ARFF file [{}].", filename); File outputFile = new File(filename); if (outputFile.exists()) { outputFile.delete(); outputFile.createNewFile(); } ArffSaver arffSaver = new ArffSaver(); arffSaver.setInstances(instances); arffSaver.setFile(outputFile); arffSaver.writeBatch(); return arffSaver.retrieveFile(); }
/** * Main method for testing this class. * * * should contain the path of input dataset and the name of * target file scheme (see Evaluation) *@param args arguments */ static public void main(String args[]) { if (args.length == 2) { TweetCollectionToArff ta = new SemEvalToArff(); try { Instances dataset = ta.createDataset(args[0]); ArffSaver saver = new ArffSaver(); saver.setInstances(dataset); saver.setFile(new File(args[1])); saver.writeBatch(); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } }
/** * Main method for testing this class. * * should contain the path of input dataset and the name of * target file scheme (see Evaluation) * @param args arguments */ static public void main(String args[]) { if (args.length == 2) { TweetCollectionToArff ta = new HumanCodedToArff(); try { Instances dataset = ta.createDataset(args[0]); ArffSaver saver = new ArffSaver(); saver.setInstances(dataset); saver.setFile(new File(args[1])); saver.writeBatch(); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } }
@Override public void reduce(Iterable<WikiDocumentOutput> values, Collector<EvaluationResult> out) throws Exception { Instances instances; DependencyParser parser = DependencyParser.loadFromModelFile(config.dependencyParserModel()); WekaUtils wekaUtils = new WekaUtils(); instances = wekaUtils.createInstances("AllRelations"); for (WikiDocumentOutput value : values) { wekaUtils.addRelationsToInstances(parser, value.getRelations(), value.getTitle(), value.getqId(), instances, value.getMaxSentenceLength()); } if (config.isWriteInstances()) { File instancesFile = new File(config.getOutputDir() + INSTANCES_ARFF_FILE_NAME); ArffSaver arffSaver = new ArffSaver(); arffSaver.setFile(instancesFile); arffSaver.setInstances(instances); arffSaver.writeBatch(); } //do model once with all data if (config.getWriteSvmModel()) { generateAndWriteFullModel(instances); } process(out, instances); }
/** * When the score changes, rewrite the file. * This is really rare in practice, so don't bother optimizing it. */ private static void dump_from_scratch(Collection<String> names, Timestamp start_time) throws IOException { saved_schema_version = names.size(); FastVector attributes = new FastVector(); // Answer score names for (String name: names) attributes.addElement(new Attribute(name)); Instances data = new Instances("Watsonsim captured question stream", attributes, 0); // Save the results to a file saver = new ArffSaver(); saver.setStructure(data); saver.setRetrieval(Saver.INCREMENTAL); saver.setFile(new File("data/weka-log." + start_time + ".arff")); for (Score row : dataset) saver.writeIncremental(new Instance(1.0, row.getEach(names))); }
public static void CSVtoARFF(String csvfilename, String arfffilename) { try { // load CSV CSVLoader loader = new CSVLoader(); loader.setSource(new File(csvfilename)); Instances data = loader.getDataSet(); // save ARFF ArffSaver saver = new ArffSaver(); saver.setInstances(data); saver.setFile(new File(arfffilename)); saver.setDestination(new File(arfffilename)); saver.writeBatch(); } catch (Exception e) { e.printStackTrace(); } }
/** * takes 2 arguments: * - CSV input file * - ARFF output file */ public static void main(String[] vagina) throws Exception { String[] args = {"trainingSet.csv","penis.arff"}; // load CSV CSVLoader loader = new CSVLoader(); loader.setSource(new File(args[0])); Instances data = loader.getDataSet(); // save ARFF ArffSaver saver = new ArffSaver(); saver.setInstances(data); File penis = new File(args[1]); saver.setFile(penis); // saver.setDestination(penis); saver.writeBatch(); }
public static void buildTrainingDataFromCorpus(String dataSetName, File corpusRoot, FVGenerator fvGenerator, File dest) throws IOException { Collection<File> children = FileUtils.listFiles(corpusRoot, new RegexFileFilter(".+\\.txt", IOCase.INSENSITIVE), DirectoryFileFilter.INSTANCE); ArffSaver saver = new ArffSaver(); saver.setFile(dest); saver.setRetrieval(Saver.INCREMENTAL); boolean first = true; for (File textFile : children) { Instances dataSet = buildTrainingDataFromFile(dataSetName, textFile, fvGenerator); if (first) { saver.setStructure(dataSet); first = false; } for (int i = 0; i < dataSet.numInstances(); ++i) { saver.writeIncremental(dataSet.instance(i)); } } saver.getWriter().flush(); }
public static Instances convertToArff(List<Document> dataSet, List<String> vocabulary, String fileName) { int dataSetSize = dataSet.size(); /* Create features */ ArrayList<Attribute> attributes = new ArrayList<>(); for (int i = 0; i < vocabulary.size(); i++) { attributes.add(new Attribute("word_" + i)); } Attribute classAttribute = new Attribute("Class"); attributes.add(classAttribute); /* Add examples */ System.out.println("Building instances..."); Instances trainingDataSet = new Instances(fileName, attributes, 0); for (int k = 0; k < dataSetSize; k++) { Document document = dataSet.get(k); Instance example = new DenseInstance(attributes.size()); for (int i = 0; i < vocabulary.size(); i++) { String word = vocabulary.get(i); example.setValue(i, Collections.frequency(document.getTerms(), word)); } example.setValue(classAttribute, document.getDocumentClass()); trainingDataSet.add(example); int progress = (int) ((k * 100.0) / dataSetSize); System.out.printf("\rPercent completed: %3d%%", progress); } trainingDataSet.setClass(classAttribute); System.out.println(); System.out.println("Writing to file ..."); try { ArffSaver saver = new ArffSaver(); saver.setInstances(trainingDataSet); saver.setFile(new File(fileName)); saver.writeBatch(); } catch (IOException e) { e.printStackTrace(); } return trainingDataSet; }
/** * Save @param data to the Arff file at @param path */ public static void saveDataToArffFile(String path, Instances data) throws IOException{ System.out.println("\nSaving to file " + path + "..."); ArffSaver saver = new ArffSaver(); saver.setInstances(data); saver.setFile(new File(path)); saver.writeBatch(); }
public void generateArff() throws Exception{ // CSVLoader loader = new CSVLoader(); // Set options loader.setNominalAttributes("last"); loader.setStringAttributes(""); loader.setMissingValue("?"); loader.setFieldSeparator("\t"); loader.setFile(new File(seqConfig.getOutDir().getAbsolutePath()+File.separator+"tmpCounts.mat")); Instances data = loader.getDataSet(); //Set subgroup index if(data.classIndex() == -1) data.setClassIndex(data.numAttributes()-1); //First, get weight index int wInd = data.numAttributes()-2; // Now set weights for(int i=0; i<data.numInstances(); i++){ double weight = data.instance(i).value(wInd); data.instance(i).setWeight(weight); } // Now delete the weight attribute data.deleteAttributeAt(wInd); //Save the arff file ArffSaver saver = new ArffSaver(); saver.setFile(new File(seqConfig.getOutDir().getAbsolutePath()+File.separator+seqConfig.getArffOutName())); saver.setInstances(data); saver.writeBatch(); }
public static void saveInstancesToArff(Instances instances, String path) throws ClassifierException { try { ArffSaver arffSaver = new ArffSaver(); arffSaver.setInstances(instances); File destination = new File(path); arffSaver.setFile(destination); arffSaver.writeBatch(); } catch (IOException e) { throw new ClassifierException("Saving arff file failed", e); } }
protected void saveFoldFiles(Instances Training_Instances, int i, Instances Testing_Instances) throws IOException { // Store instances to related fold files in ARFF subdir (WARNING: It must exist) ArffSaver asSaver = new ArffSaver(); asSaver.setInstances(Training_Instances); asSaver.setFile(new File(String.format("ARFF/train-fold%d.arff", i))); asSaver.writeBatch(); asSaver.setInstances(Testing_Instances); asSaver.setFile(new File(String.format("ARFF/test-fold%d.arff", i))); asSaver.writeBatch(); }
public static void CSVToARFF(File input, File output) throws IOException { CSVLoader csvDataset = new CSVLoader(); csvDataset.setSource(input); Instances arffDataset = csvDataset.getDataSet(); ArffSaver saver = new ArffSaver(); saver.setInstances(arffDataset); saver.setFile(output); saver.writeBatch(); }
/** * simple Weka writer routine. Inspired by Weka's tutorial: * https://weka.wikispaces.com/Save+Instances+to+an+ARFF+File * * @param instances * @param fileName * @throws IOException */ public static void writeToFile(Instances instances, String fileName) throws IOException { if (fileName == null) throw new IllegalArgumentException("WekaWriterTools.writeToFile: file was null."); File file = new File(fileName); ArffSaver saver = new ArffSaver(); saver.setInstances(instances); saver.setFile(file); saver.writeBatch(); }
public static void shuffle(String file, int classindex, String outputFile) throws IOException { // create the stream to read the data ArffFileStream stream = new ArffFileStream(file, classindex); InstancesHeader header = stream.getHeader(); ArrayList<Instance> instanceList = new ArrayList<Instance>(); System.out.println("Loading data ..."); int cnt = 0; while (stream.hasMoreInstances()) { if (++cnt % 10000 == 0) { System.out.println("Read " + cnt + " items."); } instanceList.add(stream.nextInstance()); } System.out.println("Read all items ... shuffling."); Collections.shuffle(instanceList); ArrayList<Attribute> attributeList = new ArrayList<Attribute>(); for (int i = 0; i < header.numAttributes(); i++) { attributeList.add(header.attribute(i)); } Instances dataSet = new Instances("reduced", attributeList, 2); for (Instance inst : instanceList) { dataSet.add(inst); inst.setDataset(dataSet); } System.out.println("Writing output ..."); ArffSaver saver = new ArffSaver(); saver.setInstances(dataSet); saver.setFile(new File(outputFile)); saver.writeBatch(); System.out.println("Done."); }
public void save(String filename) { try { ArffSaver saver = new ArffSaver(); saver.setInstances(data); saver.setFile(new File(filename)); saver.writeBatch(); } catch (IOException e) { log.error("Couldn't save to " + filename + " : " + e.getMessage()); e.printStackTrace(); } }
/** Saves the given data set to file. */ protected void saveData(Instances data) throws IOException { ArffSaver saver = new ArffSaver(); saver.setInstances(data); saver.setFile(dataFile); saver.setDestination(dataFile); saver.writeBatch(); }
/** * toArffFile * * @param path * @throws java.io.IOException */ public void toArffFile(String path) throws IOException { // get instances Instances instances = this.toWekaInstances(); // create file object File outputFile = new File(path); // save arff ArffSaver saver = new ArffSaver(); saver.setInstances(instances); saver.setFile(outputFile); saver.writeBatch(); }
public void ProcessTables(String tableType) { DataBase(); int execCount = 0; try { String SQL = "SELECT * from ArtTable where HasXML='yes' and specPragmatic='"+tableType+"' order by RAND() limit 200"; Statement st = conn.createStatement(); Instances instances = CreateInstances(); FastVector fvWekaAttributes = new FastVector(128); rs = st.executeQuery(SQL); while (rs.next()) { Instance iExample = processTable(rs.getInt(1)); instances.add(iExample); execCount ++; if(execCount>10000){ conn.close(); DataBase(); execCount = 0; } } System.out.println(instances.toString()); ArffSaver saver = new ArffSaver(); saver.setInstances(instances); saver.setFile(new File("spptest.arff")); //saver.setDestination(new File("./data/test.arff")); // **not** necessary in 3.5.4 and later saver.writeBatch(); } catch (Exception ex) { ex.printStackTrace(); } }
public void ProcessTables(int[] table_array) { DataBase(); int execCount = 0; try { String SQL = "SELECT * from ArtTable where HasXML='yes' and idTable in "+Arrays.toString(table_array); SQL = SQL.replace("[", "(").replace("]", ")"); Statement st = conn.createStatement(); Instances instances = CreateInstances(); FastVector fvWekaAttributes = new FastVector(48); rs = st.executeQuery(SQL); while (rs.next()) { Instance iExample = processTable(rs.getInt(1)); instances.add(iExample); execCount ++; if(execCount>10000){ conn.close(); DataBase(); execCount = 0; } } System.out.println(instances.toString()); ArffSaver saver = new ArffSaver(); saver.setInstances(instances); saver.setFile(new File("spptest10.arff")); //saver.setDestination(new File("./data/test.arff")); // **not** necessary in 3.5.4 and later saver.writeBatch(); } catch (Exception ex) { ex.printStackTrace(); } }
/** * Copy a given arff file to a given destination. * * @param source source file * @param destination destination * @throws IOException */ public void copy(File source, String destination) throws IOException { Instances instances = new Instances(new BufferedReader(new FileReader(source))); ArffSaver arffSaver = new ArffSaver(); arffSaver.setInstances(instances); arffSaver.setFile(new File(destination)); arffSaver.writeBatch(); }
public static void saveARFF(Instances dataSet, File dest) throws IOException { ArffSaver saver = new ArffSaver(); saver.setInstances(dataSet); saver.setFile(dest); saver.writeBatch(); saver.getWriter().flush(); }
public static void mergeAndWrite(String relationName, String destPath, String... dataSetPaths) throws IOException { ArffSaver saver = new ArffSaver(); saver.setFile(new File(destPath)); saver.setRetrieval(Saver.INCREMENTAL); boolean first = true; for (String p : dataSetPaths) { ArffReader reader = new ArffReader(new BufferedReader(new FileReader(p))); Instances dataSet = reader.getData(); if (first) { dataSet.setRelationName(relationName); saver.setStructure(dataSet); first = false; } for (int i = 0; i < dataSet.numInstances(); ++i) { saver.writeIncremental(dataSet.instance(i)); } } saver.getWriter().flush(); }
public static void main(String[] args) throws Exception { if (args.length < 3) { System.out.println("USAGE: program dataSetName destARFF sourceFile1 [sourceFile2 [sourceFile3 [...]]]"); System.exit(1); } FVGenerator fvg = new GuidedFVGenerator(); String dataSetName = args[0]; File dest = new File(args[1]); ArffSaver saver = new ArffSaver(); saver.setFile(dest); saver.setRetrieval(Saver.INCREMENTAL); boolean first = true; for (int i = 2; i < args.length; ++i) { File corpusFile = new File(args[i]); Instances dataSet = Actions.buildTrainingDataFromFile(dataSetName, corpusFile, fvg); if (first) { saver.setStructure(dataSet); first = false; } int num = dataSet.numInstances(); System.out.println("Num instances: "+num); for (int j = 0; j < num; ++j) { saver.writeIncremental(dataSet.instance(j)); } } saver.getWriter().flush(); }
public static void save(Instances instances, String name) throws IOException { try { ArffSaver saver = new ArffSaver(); String userHome = System.getProperty( "user.home" ); saver.setFile(new File(String.valueOf(Paths.get(userHome, name)))); saver.setInstances(instances); saver.writeBatch(); } catch (IOException e) { logger.info("Cannot save Arrf file", e); // FIXME } }
public void saveFile(Instances dataSet, String file) throws IOException{ ArffSaver saver = new ArffSaver(); saver.setInstances(dataSet); saver.setFile(new File(file)); saver.writeBatch(); }
public static LinearRegressionSummary createCommonPrediction(final String productID) throws IOException, GitAPIException { logger.info("productID = {}", productID); final Set<RetailAnalytics> set = getAllRetailAnalytics(RETAIL_ANALYTICS_ + productID) .filter(ra -> productID.isEmpty() || ra.getProductId().equals(productID)) //.filter(ra -> ra.getShopSize() == 100 || ra.getShopSize() == 500 || ra.getShopSize() == 1_000 || ra.getShopSize() == 10_000 || ra.getShopSize() == 100_000) // .filter(ra -> ra.getShopSize() > 0) // .filter(ra -> ra.getSellVolumeNumber() > 0) // .filter(ra -> ra.getDemography() > 0) // .filter(ra -> ra.getMarketIdx().isEmpty() || ra.getMarketIdx().equals("E")) .collect(toSet()); logger.info("set.size() = {}", set.size()); if (!set.isEmpty()) { //группируем аналитику по товарам и сохраняем // final Map<String, List<RetailAnalytics>> retailAnalyticsHist = set.parallelStream() // .filter(ra -> ra.getNotoriety() >= 100) // .collect(Collectors.groupingBy(RetailAnalytics::getProductId)); // final ExclusionStrategy es = new HistAnalytExclStrat(); // for (final Map.Entry<String, List<RetailAnalytics>> entry : retailAnalyticsHist.entrySet()) { // final String fileNamePath = GitHubPublisher.localPath + RetailSalePrediction.predict_retail_sales + File.separator // + RetailSalePrediction.RETAIL_ANALYTICS_HIST + File.separator + entry.getKey() + ".json"; // Utils.writeToGson(fileNamePath, squeeze(entry.getValue()), es); // } final Set<String> productIds = set.parallelStream().map(RetailAnalytics::getProductId).collect(Collectors.toSet()); final Set<String> productCategories = set.parallelStream().map(RetailAnalytics::getProductCategory).collect(Collectors.toSet()); try { logger.info("createTrainingSet"); final Instances trainingSet = createTrainingSet(set, productIds, productCategories); // final Standardize standardize = new Standardize(); // standardize.setInputFormat(trainingSetRaw); // final Instances trainingSet = Filter.useFilter(trainingSetRaw, standardize); logger.info("ArffSaver"); final ArffSaver saver = new ArffSaver(); saver.setInstances(trainingSet); saver.setFile(new File(Utils.getDir() + WEKA + File.separator + "common_" + productID + ".arff")); saver.writeBatch(); logger.info("CSVSaver"); final CSVSaver saverCsv = new CSVSaver(); saverCsv.setInstances(trainingSet); saverCsv.setFile(new File(Utils.getDir() + WEKA + File.separator + "common_" + productID + ".csv")); saverCsv.writeBatch(); // final File file = new File(GitHubPublisher.localPath + RetailSalePrediction.predict_retail_sales + File.separator + WEKA + File.separator + "common.arff"); // file.delete(); final LinearRegressionSummary summary = trainLinearRegression(trainingSet, productID); // trainRandomCommittee(trainingSet); // trainDecisionTable(trainingSet); // trainMultilayerPerceptron(trainingSet); // trainRandomForest(trainingSet); // trainRandomTree(trainingSet); // trainLibSvm(trainingSet); // logger.info("begin trainJ48BySet"); // trainJ48BySet(trainingSet); // logger.info("end trainJ48BySet"); // // logger.info("begin trainJ48CrossValidation"); // trainJ48CrossValidation(trainingSet); // logger.info("end trainJ48CrossValidation"); //запоминаем дату обновления данных final DateFormat df = new SimpleDateFormat("dd.MM.yyyy"); Utils.writeToGson(GitHubPublisher.localPath + RetailSalePrediction.predict_retail_sales + File.separator + "updateDate.json", new UpdateDate(df.format(new Date()))); return summary; } catch (final Exception e) { logger.info("productID = {}", productID); logger.error(e.getLocalizedMessage(), e); } } return null; }
public static void createPrediction(final String realm, final Map<String, Set<RetailAnalytics>> retailAnalytics, final Set<String> productIds, final Set<String> productCategories) throws IOException { final String baseDir = Utils.getDir() + Wizard.by_trade_at_cities + File.separator + realm + File.separator; logger.info("stats.size() = " + retailAnalytics.size()); for (final Map.Entry<String, Set<RetailAnalytics>> entry : retailAnalytics.entrySet()) { logger.info(entry.getKey()); logger.info("entry.getValue().size() = " + entry.getValue().size()); if (entry.getValue().isEmpty()) { continue; } try { final Instances trainingSet = createTrainingSet(entry.getValue(), productIds, productCategories); // final ArffSaver saver = new ArffSaver(); saver.setInstances(trainingSet); saver.setFile(new File(baseDir + WEKA + File.separator + entry.getKey() + ".arff")); saver.writeBatch(); // Create a classifier final J48 tree = new J48(); tree.buildClassifier(trainingSet); // ClassifierToJs.saveModel(tree, baseDir + "weka" + File.separator + "java" + File.separator + entry.getKey() + ".model"); // try { // FileUtils.writeStringToFile(new File(baseDir + "weka" + File.separator + "js" + File.separator + entry.getKey() + ".js"), ClassifierToJs.toSource(tree, "PredictProd" + entry.getKey()), "UTF-8"); // } catch (final Exception e) { // logger.error(e.getLocalizedMessage(), e); // } // Print the result à la Weka explorer: // logger.info((cModel.toString()); // TestWeka the model final Evaluation eTest = new Evaluation(trainingSet); eTest.crossValidateModel(tree, trainingSet, 10, new Random(1)); //eTest.evaluateModel(tree, trainingSet); // Print the result à la Weka explorer: logger.info(eTest.toSummaryString()); // Specify that the instance belong to the training set // in order to inherit from the set description // Instance iUse = createInstance(3198, 9669, 5, 0, 1, 0); // iUse.setDataset(trainingSet); // Get the likelihood of each classes // double[] fDistribution = cModel.distributionForInstance(iUse); // logger.info(fDistribution[0]); } catch (final Exception e) { logger.error(e.getLocalizedMessage(), e); } } }
public static void saveWekaInstances(Instances instances, File arffFile) throws IOException { ArffSaver saver = new ArffSaver(); saver.setInstances(instances); saver.setFile(arffFile); saver.writeBatch(); }
public static void saveWekaInstances(Instances instances, OutputStream arffFile) throws IOException { ArffSaver saver = new ArffSaver(); saver.setInstances(instances); saver.setDestination(arffFile); saver.writeBatch(); }
public void paraDt( File arquivoSaida ) { // Gera um objeto Instances ("cabeçalho" para instancias do Weka) Instances dadosTreinamento = featurizador.geraInstances(); Set<Par> paresTermosUsados = new HashSet<Par>(); // Gera as instancias de treinamento para cada sentença for ( Sentenca s : listaSentencas ) { s.removerTermosNaoUtilizados(); // Instancias positivas (relaçoes marcadas) for ( Relacao r : s.getRelacoes() ) { Par p = new Par(s, r.getTermo1(), r.getTermo2()); if ( paresTermosUsados.contains(p) ) continue; dadosTreinamento.add( featurizador.paraInstancia(s, r, r.getTermo1(), r.getTermo2(), "treinamento") ); paresTermosUsados.add( p ); } // Instancias negativas (todas os pares de termos nao marcados como relaçoes) for ( Termo t1 : s.getTermos() ) { for ( Termo t2 : s.getTermos() ) { if ( t1.equals( t2 ) || paresTermosUsados.contains( new Par( s, t1, t2 ) ) ) continue; dadosTreinamento.add( featurizador.paraInstancia( s, null, t1, t2, "negativa") ); } } } // Salva o conjunto de dados no arquivo de saida try { ArffSaver arffSaver = new ArffSaver(); arffSaver.setInstances( dadosTreinamento ); arffSaver.setFile( arquivoSaida ); arffSaver.writeBatch(); Logger.getLogger( "ARS logger" ).log( Level.INFO, "Conjunto de dados de treinamento salvo no arquivo {0}", arquivoSaida.getAbsolutePath() ); } catch (IOException ex) { Logger.getLogger( "ARS logger" ).log(Level.SEVERE, null, ex); } }