Java 类weka.core.Instances 实例源码

项目:Machine-Learning-End-to-Endguide-for-Java-developers    文件:KddCup.java   
public static Instances preProcessData(Instances data) throws Exception{

    /* 
     * Remove useless attributes
     */
    RemoveUseless removeUseless = new RemoveUseless();
    removeUseless.setOptions(new String[] { "-M", "99" });  // threshold
    removeUseless.setInputFormat(data);
    data = Filter.useFilter(data, removeUseless);


    /* 
     * Remove useless attributes
     */
    ReplaceMissingValues fixMissing = new ReplaceMissingValues();
    fixMissing.setInputFormat(data);
    data = Filter.useFilter(data, fixMissing);


    /* 
     * Remove useless attributes
     */
    Discretize discretizeNumeric = new Discretize();
    discretizeNumeric.setOptions(new String[] {
            "-O",
            "-M",  "-1.0", 
            "-B",  "4",  // no of bins
            "-R",  "first-last"}); //range of attributes
    fixMissing.setInputFormat(data);
    data = Filter.useFilter(data, fixMissing);

    /* 
     * Select only informative attributes
     */
    InfoGainAttributeEval eval = new InfoGainAttributeEval();
    Ranker search = new Ranker();
    search.setOptions(new String[] { "-T", "0.001" });  // information gain threshold
    AttributeSelection attSelect = new AttributeSelection();
    attSelect.setEvaluator(eval);
    attSelect.setSearch(search);

    // apply attribute selection
    attSelect.SelectAttributes(data);

    // remove the attributes not selected in the last run
    data = attSelect.reduceDimensionality(data);



    return data;
}
项目:BestConfig    文件:BestConf.java   
public static void getBestPerfFrom(String path){
    try {
        BestConf bestconf = new BestConf();
        Instances trainingSet = DataIOFile.loadDataFromArffFile(path);
        Instance best = trainingSet.firstInstance();
        //set the best configuration to the cluster
        Map<Attribute,Double> attsmap = new HashMap<Attribute,Double>();
        for(int i=0;i<best.numAttributes()-1;i++){
            attsmap.put(best.attribute(i), best.value(i));
        }

        double bestPerf = bestconf.setOptimal(attsmap, "getBestPerfFrom");
        System.out.println("=========================================");
        System.err.println("The actual performance for the best point is : "+bestPerf);
        System.out.println("=========================================");
    } catch (IOException e) {
        e.printStackTrace();
    }
}
项目:sstore-soft    文件:MarkovAttributeSet.java   
public Filter createFilter(Instances data) throws Exception {
    Set<Integer> indexes = new HashSet<Integer>();
    for (int i = 0, cnt = this.size(); i < cnt; i++) {
        indexes.add(this.get(i).index());
    } // FOR

    SortedSet<Integer> to_remove = new TreeSet<Integer>(); 
    for (int i = 0, cnt = data.numAttributes(); i < cnt; i++) {
        if (indexes.contains(i) == false) {
            to_remove.add(i+1);
        }
    } // FOR

    Remove filter = new Remove();
    filter.setInputFormat(data);
    String options[] = { "-R", StringUtil.join(",", to_remove) };
    filter.setOptions(options);
    return (filter);
}
项目:sstore-soft    文件:FeatureClusterer.java   
protected Map<Integer, MarkovGraphsContainer> constructMarkovModels(MarkovAttributeSet aset, Instances data) throws Exception {

    // Create an ExecutionState for this run
    ExecutionState state = (ExecutionState)this.state_pool.borrowObject();
    state.init(this.createClusterer(aset, data));

    // Construct the MarkovGraphs for each Partition/Cluster using the Training Data Set
    this.generateMarkovGraphs(state, data);

    // Generate the MarkovModels for the different partitions+clusters
    this.generateMarkovCostModels(state);

    Map<Integer, MarkovGraphsContainer> ret = new HashMap<Integer, MarkovGraphsContainer>();
    for (int p = 0; p < state.markovs_per_partition.length; p++) {
        ret.put(p, state.markovs_per_partition[p]);
    } // FOR
    return (ret);
}
项目:sstore-soft    文件:TestFeatureClusterer.java   
/**
     * testCreateMarkovAttributeSetFilter
     */
    @Test
    public void testCreateMarkovAttributeSetFilter() throws Exception {
        // Test that we can create a filter from an MarkovAttributeSet
        MarkovAttributeSet aset = new MarkovAttributeSet(data, FeatureUtil.getFeatureKeyPrefix(ParamArrayLengthFeature.class));
        assertEquals(CatalogUtil.getArrayProcParameters(catalog_proc).size(), aset.size());

        Filter filter = aset.createFilter(data);
        Instances newData = Filter.useFilter(data, filter);
        for (int i = 0, cnt = newData.numInstances(); i < cnt; i++) {
            Instance processed = newData.instance(i);
//            System.err.println(processed);
            assertEquals(aset.size(), processed.numAttributes());
        } // WHILE
        assertEquals(data.numInstances(), newData.numInstances());
//        System.err.println("MarkovAttributeSet: " + aset);

    }
项目:CraTer    文件:StatisticalProject.java   
/**
 * <p>To get the distribution of inTrace and outTrace instance in given dataset in <b>path</b>.</p>
 * @param ins Instances of each project
 * @throws Exception 
 */
public static void getDist(String path) throws Exception{

    Instances ins = DataSource.read(path);
    int numAttr = ins.numAttributes();
    ins.setClassIndex(numAttr-1);

    int numIns = ins.numInstances();
    int intrace = 0;
    int outtrace = 0;
    for(int i=0; i<numIns; i++){
        if(ins.get(i).stringValue(ins.attribute(ins.classIndex())).equals("InTrace")){
            intrace++;
        }else{  
            outtrace++;
        }
    }

    System.out.printf("[ %-30s ] inTrace:%4d, outTrace:%4d.\n", path, intrace, outtrace);
}
项目:CraTer    文件:ImbalanceProcessingAve.java   
/***
     * <p>To get 10-fold cross validation in one single arff in <b>path</b></p>
     * <p>Use C4.5 and <b>SMOTE</b> to classify the dataset.</p>
     * @param path dataset path
     * @throws Exception
     */
    public static void getEvalResultbySMOTE(String path, int index) throws Exception{

        Instances ins = DataSource.read(path);
        int numAttr = ins.numAttributes();
        ins.setClassIndex(numAttr - 1);

        SMOTE smote = new SMOTE();
        smote.setInputFormat(ins);

        /** classifiers setting*/
        J48 j48 = new J48();
//      j48.setConfidenceFactor(0.4f);
        j48.buildClassifier(ins);

        FilteredClassifier fc = new FilteredClassifier();
        fc.setClassifier(j48);
        fc.setFilter(smote);

        Evaluation eval = new Evaluation(ins);  
        eval.crossValidateModel(fc, ins, 10, new Random(1));

//      System.out.printf(" %4.3f %4.3f %4.3f", eval.precision(0), eval.recall(0), eval.fMeasure(0));
//      System.out.printf(" %4.3f %4.3f %4.3f", eval.precision(1), eval.recall(1), eval.fMeasure(1));
//      System.out.printf(" %4.3f \n\n", (1-eval.errorRate()));
        results[index][0] = eval.precision(0);
        results[index][1] = eval.recall(0);
        results[index][2] = eval.fMeasure(0);
        results[index][3] = eval.precision(1);
        results[index][4] = eval.recall(1);
        results[index][5] = eval.fMeasure(1);
        results[index][6] = 1-eval.errorRate();

    }
项目:hungrydragon    文件:ModelClassifier.java   
public ModelClassifier() {
    name = new Attribute("name");
    type = new Attribute("type");
    attributes = new ArrayList();
    classVal = new ArrayList();
    classVal.add("Monday");
    classVal.add("Tuesday");
    classVal.add("Wednesday");
    classVal.add("Thursday");
    classVal.add("Friday");
    classVal.add("Saturday");
    classVal.add("Sunday");

    attributes.add(name);
    attributes.add(type);

    attributes.add(new Attribute("class", classVal));
    dataRaw = new Instances("TestInstances", attributes, 0);
    dataRaw.setClassIndex(dataRaw.numAttributes() - 1);
}
项目:emr-nlp-server    文件:CertSVMPredictor.java   
protected double[][] predictDataDistribution(Instances unlabeled) throws Exception {
        // set class attribute
        unlabeled.setClassIndex(unlabeled.numAttributes() - 1);

        // distribution for instance
        double[][] dist = new double[unlabeled.numInstances()][unlabeled.numClasses()];

        // label instances
        for (int i = 0; i < unlabeled.numInstances(); i++) {
//            System.out.println("debug: "+this.getClass().getName()+": classifier: "+m_Classifier.toString());
            LibSVM libsvm = (LibSVM) m_Classifier;
            libsvm.setProbabilityEstimates(true);
            double[] instanceDist = libsvm.distributionForInstance(unlabeled.instance(i));
            dist[i] = instanceDist;
        }

        return dist;
    }
项目:emr-nlp-server    文件:CertSVMPredictor.java   
public double[] predictInstanceDistribution(Reader reader) throws Exception {
    // assume that the file contains only 1 instance
    // load instances
    Instances data = new Instances(reader);
    // remove reportID attribute
    String[] options = weka.core.Utils.splitOptions("-R 1");  // removes the first attribute in instances (should be the document id?)
    String filterName = "weka.filters.unsupervised.attribute.Remove";
    Filter filter = (Filter) Class.forName(filterName).newInstance();
    if (filter instanceof OptionHandler) {
        ((OptionHandler) filter).setOptions(options);
    }
    filter.setInputFormat(data);
    // make the instances
    Instances unlabeled = Filter.useFilter(data, filter);

    double[][] dist = this.predictDataDistribution(unlabeled);
    return dist[0];
}
项目:emr-nlp-server    文件:CertSVMPredictor.java   
public void trainModelFromFile(String fnTrainData) throws Exception {
    // load instances
    Instances data = new Instances(new BufferedReader(new FileReader(fnTrainData)));
    // preprocess instances
    String[] options = weka.core.Utils.splitOptions("-R 1");
    String filterName = "weka.filters.unsupervised.attribute.Remove";
    Filter filter = (Filter) Class.forName(filterName).newInstance();
    if (filter instanceof OptionHandler) {
        ((OptionHandler) filter).setOptions(options);
    }
    filter.setInputFormat(data);
    // make the instances
    Instances unlabeled = Filter.useFilter(data, filter);
    // train model
    this.trainModel(unlabeled);
}
项目:bestconf    文件:LHSSampler.java   
public static void main(String[] args){
    ArrayList<Attribute> atts = new ArrayList<Attribute>();

    /*Properties p1 = new Properties();
    p1.setProperty("range", "[0,1]");
    ProtectedProperties prop1 = new ProtectedProperties(p1);*/

    Properties p2 = new Properties();
    p2.setProperty("range", "[321,1E9]");
    ProtectedProperties prop2 = new ProtectedProperties(p2);

    ArrayList<String> attVals = new ArrayList<String>();
    for (int i = 0; i < 5; i++)
          attVals.add("val" + (i+1));

    //atts.add(new Attribute("att1", prop1));
    atts.add(new Attribute("att2", prop2));
    //atts.add(new Attribute("att3", attVals));
    //Instances data = LHSInitializer.getMultiDimContinuous(atts, 10, false);
    //Instances data = LHSInitializer.getMultiDim(atts, 10, false);
    LHSSampler sampler = new LHSSampler();
    Instances data = sampler.sampleMultiDimContinuous(atts, 1, false);

    System.out.println(data);
}
项目:emr-nlp-server    文件:WSInterface.java   
protected static void verifyCecum() throws Exception {
    // train model from cecum.arff
    String fn_train = Util.getOSPath(new String[]{Storage_Controller.getTrainingFileFolder(),
            "0..cecum.arff"});
    SVMPredictor svm = new SVMPredictor();
    svm.trainModelFromFile(fn_train);
    List<String> reportIDList = XMLUtil
            .getReportIDFromXMLList(Util.getOSPath(new String[] {
                    Storage_Controller.getDocumentListFolder(), "devIDList.xml"}));
    Report_Controller reportController = new Report_Controller();
    Instances testSet = reportController.getWekaTestSet(reportIDList);
    double[][] predTable = svm.predict(testSet);
    for(int i = 0; i < testSet.numInstances(); i++) {
        System.out.print(testSet.instance(i).stringValue(0) + ",");
        System.out.println(predTable[i][0] + "," + predTable[i][1]);
    }
}
项目:GeneralisedRulesAlgorithm    文件:GRules.java   
/**
 * Calculate support value of a given rule on the dataset
 * 
 * @param dataset the dataset
 * @param bodySide left-side or BODY part of the rule
 * @return support value for the rule on the given dataset
 */
public double calculateSupport(Instances dataset, List<Term> bodySide){

    Iterator<Instance> datasetIterator = dataset.iterator();
    int supportCount = 0;

    while(datasetIterator.hasNext()){

        Instance anInstance = datasetIterator.next();

        if(instanceCoveredByTermsList(anInstance,bodySide)){
            supportCount++;
        }

    }
    return !dataset.isEmpty() ? (double) supportCount / (double) dataset.size() : 0.0d;
}
项目:bestconf    文件:LHSInitializer.java   
public static void main(String[] args){
    ArrayList<Attribute> atts = new ArrayList<Attribute>();

    /*Properties p1 = new Properties();
    p1.setProperty("range", "[0,1]");
    ProtectedProperties prop1 = new ProtectedProperties(p1);*/

    Properties p2 = new Properties();
    p2.setProperty("range", "[321,1E9]");
    ProtectedProperties prop2 = new ProtectedProperties(p2);

    ArrayList<String> attVals = new ArrayList<String>();
    for (int i = 0; i < 5; i++)
          attVals.add("val" + (i+1));

    //atts.add(new Attribute("att1", prop1));
    atts.add(new Attribute("att2", prop2));
    //atts.add(new Attribute("att3", attVals));
    //Instances data = LHSInitializer.getMultiDimContinuous(atts, 10, false);
    //Instances data = LHSInitializer.getMultiDim(atts, 10, false);
    Instances data = LHSInitializer.getMultiDimContinuous(atts, 1, false);

    System.out.println(data);
}
项目:twitter-user-gender-classification    文件:Classification.java   
public static void runJ48(Instances trainSet, Instances testSet) {
    System.out.println("#####################  J48  #####################");

    Classifier model = null;
    Train train = new Train(trainSet);

    /*
     * TRAIN
     */
    try {
        model = train.getJ48Model();
    } catch (Exception e) {
        e.printStackTrace();
    }

    /*
     * TEST
     */
    Test test = new Test(trainSet, testSet);
    test.testModel(model);

    System.out.println("#####################  END OF J48  #####################");
    System.out.print("\n\n\n");
}
项目:bestconf    文件:BestConf.java   
public static void getBestPerfFrom(String path){
    try {
        BestConf bestconf = new BestConf();
        Instances trainingSet = DataIOFile.loadDataFromArffFile(path);
        Instance best = trainingSet.firstInstance();
        //set the best configuration to the cluster
        Map<Attribute,Double> attsmap = new HashMap<Attribute,Double>();
        for(int i=0;i<best.numAttributes()-1;i++){
            attsmap.put(best.attribute(i), best.value(i));
        }

        double bestPerf = bestconf.setOptimal(attsmap, "getBestPerfFrom");
        System.out.println("=========================================");
        System.err.println("The actual performance for the best point is : "+bestPerf);
        System.out.println("=========================================");
    } catch (IOException e) {
        e.printStackTrace();
    }
}
项目:twitter-user-gender-classification    文件:Classification.java   
public static void runSMO(Instances trainSet, Instances testSet) {
    System.out.println("#####################  SMO (SVM)  #####################");

    Classifier model = null;
    Train train = new Train(trainSet);

    /*
     * TRAIN
     */
    try {
        model = train.getSMO();
    } catch (Exception e) {
        e.printStackTrace();
    }

    /*
     * TEST
     */
    Test test = new Test(trainSet, testSet);
    test.testModel(model);

    System.out.println("#####################  END OF SMO (SVM)  #####################");
    System.out.print("\n\n\n");
}
项目:weka-xgboost    文件:DMatrixLoader.java   
public static DMatrix instancesToDMatrix(Instances instances) throws XGBoostError {
    long[] rowHeaders = new long[instances.size()+1];
    rowHeaders[0]=0;
    List<Float> dataList = new ArrayList<>();
    List<Integer> colList = new ArrayList<>();
    float[] labels = new float[instances.size()];

    for(int i=0; i<instances.size(); i++) {
        Instance instance = instances.get(i);
        rowHeaders[i] = dataList.size();
        processInstance(instance, dataList, colList);
        labels[i] = (float) instance.classValue();
    }
    rowHeaders[rowHeaders.length - 1] = dataList.size();
    int colNum = instances.numAttributes()-1;
    DMatrix dMatrix = createDMatrix(rowHeaders, dataList, colList, colNum);

    dMatrix.setLabel(labels);
    return dMatrix;

}
项目:BestConfig    文件:COMT2.java   
private static double computeOmegaDelta(M5P model, M5P modelPi, Instances omega) throws Exception{
    double retval = 0., y;
    Enumeration<Instance> enu = omega.enumerateInstances();
    int idxClass = omega.classIndex();
    Instance ins;
    while(enu.hasMoreElements()){
        ins = enu.nextElement();
        y = ins.value(idxClass);
        retval += Math.pow(y-model.classifyInstance(ins), 2)-Math.pow(y-modelPi.classifyInstance(ins), 2);
    }
    return retval;
}
项目:BestConfig    文件:DataIOFile.java   
/**
 * Return the data set loaded from the Arff file at @param path
 */
public static Instances loadDataFromArffFile(String path) throws IOException{
    ArffLoader loader = new ArffLoader();
    loader.setSource(new File(path));
    Instances data = loader.getDataSet();

    System.out.println("\nHeader of dataset:\n");
    System.out.println(new Instances(data, 0));
    return data;
}
项目:s-store    文件:FeatureClusterer.java   
/**
     * 
     * @param trainingData
     * @param round
     * @throws Exception
     */
    protected AbstractClusterer createClusterer(MarkovAttributeSet aset, Instances trainingData) throws Exception {
        if (trace.val) LOG.trace(String.format("Clustering %d %s instances with %d attributes", trainingData.numInstances(), CatalogUtil.getDisplayName(catalog_proc), aset.size()));

        // Create the filter we need so that we only include the attributes in the given MarkovAttributeSet
        Filter filter = aset.createFilter(trainingData);

        // Using our training set to build the clusterer
        int seed = this.rand.nextInt(); 
//        SimpleKMeans inner_clusterer = new SimpleKMeans();
        EM inner_clusterer = new EM();
        String options[] = {
            "-N", Integer.toString(1000), // num_partitions),
            "-S", Integer.toString(seed),
            "-I", Integer.toString(100),

        };
        inner_clusterer.setOptions(options);

        FilteredClusterer filtered_clusterer = new FilteredClusterer();
        filtered_clusterer.setFilter(filter);
        filtered_clusterer.setClusterer(inner_clusterer);

        AbstractClusterer clusterer = filtered_clusterer;
        clusterer.buildClusterer(trainingData);

        return (clusterer);
    }
项目:Machine-Learning-End-to-Endguide-for-Java-developers    文件:ActivityRecognition.java   
public static void main(String[] args) throws Exception{

    String databasePath = "data/features.arff";

    // Load the data in arff format
    Instances data = new Instances(new BufferedReader(new FileReader(databasePath)));

    // Set class the last attribute as class
    data.setClassIndex(data.numAttributes() - 1);

    // Build a basic decision tree model
    String[] options = new String[]{};
    J48 model = new J48();
    model.setOptions(options);
    model.buildClassifier(data);

    // Output decision tree
    System.out.println("Decision tree model:\n"+model);

    // Output source code implementing the decision tree
    System.out.println("Source code:\n"+model.toSource("ActivityRecognitionEngine"));

    // Check accuracy of model using 10-fold cross-validation
    Evaluation eval = new Evaluation(data);
    eval.crossValidateModel(model, data, 10, new Random(1), new String[] {});
    System.out.println("Model performance:\n"+eval.toSummaryString());

    String[] activities = new String[]{"Walk", "Walk", "Walk", "Run", "Walk", "Run", "Run", "Sit", "Sit", "Sit"};
    DiscreteLowPass dlpFilter = new DiscreteLowPass(3);
    for(String str : activities){
        System.out.println(str +" -> "+ dlpFilter.filter(str));
    }

}
项目:bestconf    文件:ConfigSampler.java   
public static ArrayList<Attribute> scaleDownDetour(Instances previousSet, Instance center){
    switch(scaleDownChoice){
    case 0:
        return scaleDownMindists(previousSet,center);
    default:
        return scaleDownNeighbordists(previousSet,center);
    }
}
项目:Machine-Learning-End-to-Endguide-for-Java-developers    文件:BookDecisionTree.java   
public BookDecisionTree(String fileName) {
    try {
        BufferedReader reader = new BufferedReader(new FileReader(fileName));
        trainingData = new Instances(reader);
        trainingData.setClassIndex(trainingData.numAttributes() - 1);
    } catch (IOException ex) {
        ex.printStackTrace();
    }
}
项目:BestConfig    文件:DataIOFile.java   
/**
 * Save @param data to the CSV file at @param path
 */
public static void saveDataToCsvFile(String path, Instances data) throws IOException{
        System.out.println("\nSaving to file " + path + "...");
        CSVSaver saver = new CSVSaver();
        saver.setInstances(data);
        saver.setFile(new File(path));
        saver.writeBatch();
}
项目:sstore-soft    文件:FeatureClusterer.java   
/**
     * 
     * @param trainingData
     * @param round
     * @throws Exception
     */
    protected AbstractClusterer createClusterer(MarkovAttributeSet aset, Instances trainingData) throws Exception {
        if (trace.val) LOG.trace(String.format("Clustering %d %s instances with %d attributes", trainingData.numInstances(), CatalogUtil.getDisplayName(catalog_proc), aset.size()));

        // Create the filter we need so that we only include the attributes in the given MarkovAttributeSet
        Filter filter = aset.createFilter(trainingData);

        // Using our training set to build the clusterer
        int seed = this.rand.nextInt(); 
//        SimpleKMeans inner_clusterer = new SimpleKMeans();
        EM inner_clusterer = new EM();
        String options[] = {
            "-N", Integer.toString(1000), // num_partitions),
            "-S", Integer.toString(seed),
            "-I", Integer.toString(100),

        };
        inner_clusterer.setOptions(options);

        FilteredClusterer filtered_clusterer = new FilteredClusterer();
        filtered_clusterer.setFilter(filter);
        filtered_clusterer.setClusterer(inner_clusterer);

        AbstractClusterer clusterer = filtered_clusterer;
        clusterer.buildClusterer(trainingData);

        return (clusterer);
    }
项目:BestConfig    文件:COMT2.java   
private static M5P buildModel(Instances modelInstances, int numOfInstanceInLeaf) throws Exception{
    M5P retval = new M5P();
    retval.setSaveInstances(true);
    retval.setOptions(Utils.splitOptions("-N -L -M "+numOfInstanceInLeaf));
    retval.buildClassifier(modelInstances);
    return retval;
}
项目:sstore-soft    文件:FeatureExtractor.java   
public static void main(String[] vargs) throws Exception {
        ArgumentsParser args = ArgumentsParser.load(vargs);
        args.require(
            ArgumentsParser.PARAM_CATALOG,
            ArgumentsParser.PARAM_WORKLOAD,
            ArgumentsParser.PARAM_MAPPINGS
        );

        FeatureExtractor extractor = new FeatureExtractor(args.catalogContext);
        Map<Procedure, FeatureSet> fsets = extractor.calculate(args.workload);

//        List<String> targets = args.getOptParams();

        for (Entry<Procedure, FeatureSet> e : fsets.entrySet()) {
            String proc_name = e.getKey().getName();
//            if (targets.contains(proc_name) == false) continue;

//            File path = new File(proc_name + ".fset");
//            e.getValue().save(path.getAbsolutePath());
//            LOG.info(String.format("Wrote FeatureSet with %d instances to '%s'", e.getValue().getTransactionCount(), path.getAbsolutePath()));

            File path = new File(proc_name + ".arff");
            Instances data = e.getValue().export(proc_name, false);
            FileUtil.writeStringToFile(path, data.toString());
            LOG.info(String.format("Wrote FeatureSet with %d instances to '%s'", data.numInstances(), path.getAbsolutePath()));
        }

    }
项目:CS-436_580L_Introduction-to-Machine-Learning    文件:Utils.java   
public static Instances convertToArff(List<Document> dataSet, List<String> vocabulary, String fileName) {
    int dataSetSize = dataSet.size();
    /* Create features */
    ArrayList<Attribute> attributes = new ArrayList<>();
    for (int i = 0; i < vocabulary.size(); i++) {
        attributes.add(new Attribute("word_" + i));
    }
    Attribute classAttribute = new Attribute("Class");
    attributes.add(classAttribute);

    /* Add examples */
    System.out.println("Building instances...");
    Instances trainingDataSet = new Instances(fileName, attributes, 0);
    for (int k = 0; k < dataSetSize; k++) {
        Document document = dataSet.get(k);
        Instance example = new DenseInstance(attributes.size());
        for (int i = 0; i < vocabulary.size(); i++) {
            String word = vocabulary.get(i);
            example.setValue(i, Collections.frequency(document.getTerms(), word));
        }
        example.setValue(classAttribute, document.getDocumentClass());
        trainingDataSet.add(example);
        int progress = (int) ((k * 100.0) / dataSetSize);
        System.out.printf("\rPercent completed: %3d%%", progress);
    }
    trainingDataSet.setClass(classAttribute);
    System.out.println();

    System.out.println("Writing to file ...");
    try {
        ArffSaver saver = new ArffSaver();
        saver.setInstances(trainingDataSet);
        saver.setFile(new File(fileName));
        saver.writeBatch();
    } catch (IOException e) {
        e.printStackTrace();
    }

    return trainingDataSet;
}
项目:Java-Data-Science-Made-Easy    文件:Main-SVG.java   
public Main() {
    try {
        BufferedReader datafile;
        datafile = readDataFile("camping.txt");
        Instances data = new Instances(datafile);
        data.setClassIndex(data.numAttributes() - 1);

        Instances trainingData = new Instances(data, 0, 14);
        Instances testingData = new Instances(data, 14, 5);
        Evaluation evaluation = new Evaluation(trainingData);

        SMO smo = new SMO();
        smo.buildClassifier(data);

        evaluation.evaluateModel(smo, testingData);
        System.out.println(evaluation.toSummaryString());

        // Test instance 
        Instance instance = new DenseInstance(3);
        instance.setValue(data.attribute("age"), 78);
        instance.setValue(data.attribute("income"), 125700);
        instance.setValue(data.attribute("camps"), 1);            
        instance.setDataset(data);
        System.out.println("The instance: " + instance);
        System.out.println(smo.classifyInstance(instance));
    } catch (Exception ex) {
        ex.printStackTrace();
    }
}
项目:BestConfig    文件:DDSSampler.java   
public static void main(String[] args){
    ArrayList<Attribute> atts = new ArrayList<Attribute>();

    Properties p1 = new Properties();
    p1.setProperty("range", "[0,1]");
    ProtectedProperties prop1 = new ProtectedProperties(p1);

    Properties p2 = new Properties();
    p2.setProperty("range", "[321,1E9]");
    ProtectedProperties prop2 = new ProtectedProperties(p2);

    Properties p3 = new Properties();
    p3.setProperty("range", "[1,30]");
    ProtectedProperties prop3 = new ProtectedProperties(p3);

    ArrayList<String> attVals = new ArrayList<String>();
    for (int i = 0; i < 5; i++)
          attVals.add("val" + (i+1));

    atts.add(new Attribute("att1", prop1));
    atts.add(new Attribute("att2", prop2));
    atts.add(new Attribute("att3", prop3));
    //atts.add(new Attribute("att4", attVals));
    //Instances data = LHSInitializer.getMultiDimContinuous(atts, 10, false);
    //Instances data = LHSInitializer.getMultiDim(atts, 10, false);
    DDSSampler sampler = new DDSSampler(3);

    sampler.setCurrentRound(0);
    Instances data = sampler.sampleMultiDimContinuous(atts, 2, false);
    System.out.println(data);

    sampler.setCurrentRound(01);
    data = sampler.sampleMultiDimContinuous(atts, 2, false);
    System.out.println(data);

    sampler.setCurrentRound(2);
    data = sampler.sampleMultiDimContinuous(atts, 2, false);
    System.out.println(data);
}
项目:CraTer    文件:InsMerge.java   
/***
     * <p>To Merge the datasets in path array and save the total dataset in dirpath.
     * </p>
     * @param path String array of arff file
     * @throws Exception
     */
    public static void getIns(String[] path, String dirpath) throws Exception{

        /** Create a empty dataset total*/
        Instances total = new Instances("total3500", getStandAttrs(), 1);

        total.setClassIndex(total.numAttributes() - 1);

        int len = path.length;
        Instances[] temp = new Instances[len];

        for(int i=0; i<path.length; i++){

            temp[i] = DataSource.read(path[i]);
            temp[i].setClassIndex(temp[i].numAttributes() - 1);

            total.addAll(temp[i]);
            System.out.println("adding " + path[i] + " " + temp[i].numInstances());
//          System.out.println("data" + total.numInstances() + "\n");
        }

        String totalName = dirpath+"total3500" + String.valueOf(System.currentTimeMillis()) + ".arff";

        DataSink.write(totalName,
                total);
        System.out.println("Writing the data into [" + totalName + "] successfully.\n");
    }
项目:BestConfig    文件:BestConf.java   
public static ArrayList<String> preprocessInstances(Instances retval){
    double[][] cMatrix;
    ArrayList<String> result = new ArrayList<String>();
    ArrayList<String> deleteAttNames = new ArrayList<String>();
    PrincipalComponents pc = new PrincipalComponents();
    HashMap<Integer, ArrayList<Integer>> filter = new HashMap<Integer, ArrayList<Integer>>();
    try {
        pc.buildEvaluator(retval);
        cMatrix = pc.getCorrelationMatrix();        
        for(int i = 0; i < cMatrix.length; i++){
            ArrayList<Integer> record = new ArrayList<Integer>();
            for(int j = i + 1; j < cMatrix.length; j++)
                if(cMatrix[i][j] >= correlationFactorThreshold || cMatrix[i][j] <= -correlationFactorThreshold){
                    record.add(j);
                }
            if(record.size() != 0){
                filter.put(i, record);
            }
        }
        Iterator<Map.Entry<Integer, ArrayList<Integer>>> iter = filter.entrySet().iterator();
        while (iter.hasNext()) {
            Map.Entry<Integer, ArrayList<Integer>> entry = iter.next();
            ArrayList<Integer> arr = entry.getValue();
            for(int i = 0; i < arr.size(); i++)
                if(arr.get(i) != cMatrix.length - 1 && !deleteAttNames.contains(retval.attribute(arr.get(i)).name())){
                    deleteAttNames.add(retval.attribute(arr.get(i)).name());
                }
            if(arr.contains(cMatrix.length-1)){
                result.add(retval.attribute(Integer.parseInt(entry.getKey().toString())).name());
            }
        }
        for(int i = 0; i < deleteAttNames.size(); i++){
            retval.deleteAttributeAt(retval.attribute(deleteAttNames.get(i)).index());
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
    return result;
}
项目:bestconf    文件:BestConf.java   
public static void testCOMT2() throws Exception{
    BestConf bestconf = new BestConf();
    Instances trainingSet = DataIOFile.loadDataFromArffFile("data/trainingBestConf0.arff");
    trainingSet.setClassIndex(trainingSet.numAttributes()-1);

    Instances samplePoints = LHSInitializer.getMultiDimContinuous(bestconf.getAttributes(), InitialSampleSetSize, false);
    samplePoints.insertAttributeAt(trainingSet.classAttribute(), samplePoints.numAttributes());
    samplePoints.setClassIndex(samplePoints.numAttributes()-1);

    COMT2 comt = new COMT2(samplePoints, COMT2Iteration);

    comt.buildClassifier(trainingSet);

    Evaluation eval = new Evaluation(trainingSet);
    eval.evaluateModel(comt, trainingSet);
    System.err.println(eval.toSummaryString());

    Instance best = comt.getInstanceWithPossibleMaxY(samplePoints.firstInstance());
    Instances bestInstances = new Instances(trainingSet,2);
    bestInstances.add(best);
    DataIOFile.saveDataToXrffFile("data/trainingBestConf_COMT2.arff", bestInstances);

    //now we output the training set with the class value updated as the predicted value
    Instances output = new Instances(trainingSet, trainingSet.numInstances());
    Enumeration<Instance> enu = trainingSet.enumerateInstances();
    while(enu.hasMoreElements()){
        Instance ins = enu.nextElement();
        double[] values = ins.toDoubleArray();
        values[values.length-1] = comt.classifyInstance(ins);
        output.add(ins.copy(values));
    }
    DataIOFile.saveDataToXrffFile("data/trainingBestConf0_predict.xrff", output);
}
项目:CraTer    文件:ImbalanceProcessingAve.java   
/***
     * <p>To get 10-fold cross validation in one single arff in <b>path</b></p>
     * <p>Use C4.5 and <b>Cost-sensitive learning</b> to classify the dataset.</p>
     * @param path dataset path
     * @throws Exception
     */
    public static void getEvalResultbyCost(String path, int index) throws Exception{

        Instances ins = DataSource.read(path);
        int numAttr = ins.numAttributes();
        ins.setClassIndex(numAttr - 1);

        /**Classifier setting*/
        J48 j48 = new J48();
//      j48.setConfidenceFactor(0.4f);
        j48.buildClassifier(ins);

        CostSensitiveClassifier csc = new CostSensitiveClassifier();
        csc.setClassifier(j48);
        csc.setCostMatrix(new CostMatrix(new BufferedReader(new FileReader("files/costm"))));

        Evaluation eval = new Evaluation(ins);

        eval.crossValidateModel(csc, ins, 10, new Random(1));

//      System.out.printf(" %4.3f %4.3f %4.3f", eval.precision(0), eval.recall(0), eval.fMeasure(0));
//      System.out.printf(" %4.3f %4.3f %4.3f", eval.precision(1), eval.recall(1), eval.fMeasure(1));
//      System.out.printf(" %4.3f \n\n", (1-eval.errorRate()));
        results[index][0] = eval.precision(0);
        results[index][1] = eval.recall(0);
        results[index][2] = eval.fMeasure(0);
        results[index][3] = eval.precision(1);
        results[index][4] = eval.recall(1);
        results[index][5] = eval.fMeasure(1);
        results[index][6] = 1-eval.errorRate();

    }
项目:biosses    文件:svmRegressor.java   
public static void runSVMRegression() throws Exception {
    BufferedReader br = null;
    int numFolds = 10;
    br = new BufferedReader(new FileReader("rawData.arff"));
    Instances trainData = new Instances(br);
    trainData.setClassIndex(trainData.numAttributes() - 1);
    br.close();

    WekaPackageManager.loadPackages(false, true, false);
    AbstractClassifier classifier = (AbstractClassifier) Class.forName(
            "weka.classifiers.functions.supportVector").newInstance();
    String options = ("-S 3 -V 10 -T 0");
    String[] optionsArray = options.split(" ");
    classifier.setOptions(optionsArray);
    classifier.buildClassifier(trainData);

    Evaluation evaluation = new Evaluation(trainData);
    /*******************CROSS VALIDATION*************************/
    evaluation.crossValidateModel(classifier, trainData, numFolds, new Random(1));
    /***********************************************************/

    evaluateResults(evaluation);




}
项目:BestConfig    文件:COMT2.java   
private static Instances getSiblings(M5P modelTree, Instance ins){
    RuleNode node = modelTree.getM5RootNode();

    while(!node.isLeaf()){
        if(ins.value(node.splitAtt())<=node.splitVal()){
            node = node.leftNode();
        }else {
            node = node.rightNode();
        }
    }

    return node.zyqGetTrainingSet();
}
项目:BestConfig    文件:ConfigSampler.java   
public static ArrayList<Attribute> scaleDownDetour(Instances previousSet, Instance center){
    switch(scaleDownChoice){
    case 0:
        return scaleDownMindists(previousSet,center);
    default:
        return scaleDownNeighbordists(previousSet,center);
    }
}
项目:ijcnlp2017-cmaps    文件:FeatureContainer.java   
public Instances createInstances(List<String> orderedFeatureNames) {

        if (orderedFeatureNames == null)
            orderedFeatureNames = new ArrayList<String>(this.getFeatures());

        Instances data = this.createEmptyDataset(orderedFeatureNames);
        for (I key : this.featureValues.keySet())
            data.add(this.createInstance(orderedFeatureNames, key));

        return data;
    }