public Main() { try { BufferedReader datafile; datafile = readDataFile("camping.txt"); Instances data = new Instances(datafile); data.setClassIndex(data.numAttributes() - 1); Instances trainingData = new Instances(data, 0, 14); Instances testingData = new Instances(data, 14, 5); Evaluation evaluation = new Evaluation(trainingData); SMO smo = new SMO(); smo.buildClassifier(data); evaluation.evaluateModel(smo, testingData); System.out.println(evaluation.toSummaryString()); // Test instance Instance instance = new DenseInstance(3); instance.setValue(data.attribute("age"), 78); instance.setValue(data.attribute("income"), 125700); instance.setValue(data.attribute("camps"), 1); instance.setDataset(data); System.out.println("The instance: " + instance); System.out.println(smo.classifyInstance(instance)); } catch (Exception ex) { ex.printStackTrace(); } }
public SentenceType classifySentence(Sentence sentence) { SpeechActsClassifier.Features features = speechActsClassifier.classifyFeatures(sentence); Instance inst = new DenseInstance(6); inst.setDataset(dataSet); inst.setValue(0, features.getSentenceLength()); inst.setValue(1, features.getNumberOfNouns()); inst.setValue(2, (features.isEndingInNounOrAdjective() ? 1 : 0)); inst.setValue(3, (features.isBeginningInVerb() ? 1 : 0)); inst.setValue(4, features.getCountOfWhMarkers()); inst.setValue(5, Utils.missingValue()); try { return SentenceType.valueOf(classifier.classifyInstance(inst)); } catch (Exception e) { throw new RuntimeException("Can't classify"); } }
public QuestionType classifyQuestion(Sentence sentence) { if (!sentence.isQuestion()) { return QuestionType.NA; } QuestionTypeClassifier.Features features = questionTypeClassifier.classifyFeatures(sentence); Instance inst = new DenseInstance(5); inst.setDataset(dataSet); inst.setValue(0, features.getWhWord()); inst.setValue(1, features.getWhWordPos()); inst.setValue(2, features.getPosOfNext()); inst.setValue(3, features.getRootPos()); inst.setValue(4, Utils.missingValue()); try { int ndx = (int) classifier.classifyInstance(inst); return QuestionType.valueOf(ndx); } catch (Exception e) { throw new RuntimeException("Not classified"); } }
public Instance createInstance(List<String> featureNames, I key) { double[] vals = new double[featureNames.size()]; for (int i = 0; i < featureNames.size(); i++) { Feature<Object> f = this.featureValues.get(key).get(featureNames.get(i)); if (f != null) vals[i] = f.getDoubleValue(); else { Class<Object> type = features.get(featureNames.get(i)); if (type.equals(Double.class) || type.equals(Float.class) || type.equals(Integer.class)) vals[i] = Double.NaN; if (type.equals(Boolean.class) || type.equals(String.class)) vals[i] = 0; } } return new DenseInstance(1.0, vals); }
public static Instances createXInstances(XYForRegression xy) { // build attributes ArrayList<Attribute> attributes = new ArrayList<Attribute>(); for (String attrName : xy.getAttrNames() ) { // attribute name is the polynomial power Attribute xattr = new Attribute(attrName); attributes.add(xattr); } double[][] Xarr = xy.getX(); int numInstances = Xarr.length; int numAttr = Xarr[0].length; Instances Xinst = new Instances("X", attributes, numInstances); // fill instances for (int i = 0; i < numInstances; ++i) { Instance row = new DenseInstance(numAttr); for (int j = 0; j < numAttr; ++j) { row.setValue(j, Xarr[i][j]); } Xinst.add(row); } return Xinst; }
public void train(TrainData data, TermVocabulary vocab) throws Exception { this.vocab = vocab; Instances trainset = buildInstances(); for (Pair<String, BytecodeData.MethodData> p : data.data()) { double[] instV = seqToFeats(p.getRight().getExtOpcodeSeq()); instV[instV.length - 1] = (double) vocab.termToPos(p.getKey()); Instance inst = new DenseInstance(1.0, instV); inst.setDataset(trainset); trainset.add(inst); } nb = new NaiveBayes(); nb.buildClassifier(trainset); }
public Map<String, Double> predict(BytecodeData.MethodData method) throws Exception { if (testHeader == null) testHeader = buildInstances(); double[] instV = seqToFeats(method.getExtOpcodeSeq()); instV[instV.length - 1] = 0.0; Instance inst = new DenseInstance(1.0, instV); inst.setDataset(testHeader); testHeader.add(inst); double[] probs = nb.distributionForInstance(inst); Map<String, Double> result = new HashMap<>(); int i = 0; for (String term : vocab.terms()) { result.put(term, probs[i]); i++; } testHeader.clear(); return result; }
public void train(TrainData data, TermVocabulary vocab) throws Exception { this.vocab = vocab; Instances trainset = buildInstances(); for (Pair<String, BytecodeData.MethodData> p : data.data()) { double[] instV = seqToFeats(p.getRight().getExtOpcodeSeq()); instV[instV.length - 1] = (double) vocab.termToPos(p.getKey()); Instance inst = new DenseInstance(1.0, instV); inst.setDataset(trainset); trainset.add(inst); } frf = new FastRandomForest(); frf.setSeed(1993); // frf.setMaxDepth(20); // frf.setNumThreads(1); frf.setNumTrees(numTrees); frf.buildClassifier(trainset); }
public Map<String, Double> predict(BytecodeData.MethodData method) throws Exception { if (testHeader == null) testHeader = buildInstances(); double[] instV = seqToFeats(method.getExtOpcodeSeq()); instV[instV.length - 1] = 0.0; Instance inst = new DenseInstance(1.0, instV); inst.setDataset(testHeader); testHeader.add(inst); double[] probs = frf.distributionForInstance(inst); Map<String, Double> result = new HashMap<>(); int i = 0; for (String term : vocab.terms()) { result.put(term, probs[i]); i++; } testHeader.clear(); return result; }
/** * Launch the move centroids tasks * * @param clusters the cluster centroids * @return the number of empty clusters */ protected int launchMoveCentroids(Instances[] clusters) { int emptyClusterCount = 0; List<Future<double[]>> results = new ArrayList<Future<double[]>>(); for (int i = 0; i < m_NumClusters; i++) { if (clusters[i].numInstances() == 0) { emptyClusterCount++; } else { Future<double[]> futureCentroid = m_executorPool.submit(new KMeansComputeCentroidTask(i, clusters[i])); results.add(futureCentroid); } } try { for (Future<double[]> d : results) { m_ClusterCentroids.add(new DenseInstance(1.0, d.get())); } } catch (Exception ex) { ex.printStackTrace(); } return emptyClusterCount; }
/** * Adds a training instance to our dataset, based on the coordinates of the * mouse on the panel. This method sets the x and y attributes and the class * (as defined by classAttIndex), and sets all other values as Missing. * * @param mouseX the x coordinate of the mouse, in pixels. * @param mouseY the y coordinate of the mouse, in pixels. * @param classAttIndex the index of the attribute that is currently selected * as the class attribute. * @param classValue the value to set the class to in our new point. */ public void addTrainingInstanceFromMouseLocation(int mouseX, int mouseY, int classAttIndex, double classValue) { // convert to coordinates in the training instance space. double x = convertFromPanelX(mouseX); double y = convertFromPanelY(mouseY); // build the training instance Instance newInstance = new DenseInstance(m_trainingData.numAttributes()); for (int i = 0; i < newInstance.numAttributes(); i++) { if (i == classAttIndex) { newInstance.setValue(i, classValue); } else if (i == m_xAttribute) { newInstance.setValue(i, x); } else if (i == m_yAttribute) { newInstance.setValue(i, y); } else { newInstance.setMissing(i); } } // add it to our data set. addTrainingInstance(newInstance); }
/** * Make an output instance given an input one * * @param inputI the input instance to process * @return the output instance with substrings replaced */ public Instance makeOutputInstance(Instance inputI) { double[] vals = new double[m_outputStructure.numAttributes()]; String[] stringVals = new String[m_outputStructure.numAttributes()]; for (int i = 0; i < inputI.numAttributes(); i++) { if (inputI.attribute(i).isString() && !inputI.isMissing(i)) { stringVals[i] = inputI.stringValue(i); } else { vals[i] = inputI.value(i); } } for (SubstringReplacerMatchRule mr : m_matchRules) { mr.apply(stringVals); } for (int i = 0; i < m_outputStructure.numAttributes(); i++) { if (m_outputStructure.attribute(i).isString() && stringVals[i] != null) { m_outputStructure.attribute(i).setStringValue(stringVals[i]); } } Instance result = new DenseInstance(inputI.weight(), vals); result.setDataset(m_outputStructure); return result; }
public Instance fillFeatureVector(NGGFeatureVector vSource, Instances data) { double[] values = new double[data.numAttributes()]; values[0] = vSource.getContainmentSimilarityArrayAtIndex(0); values[1] = vSource.getSizeSimilarityArrayAtIndex(0); values[2] = vSource.getValueSimilarityArrayAtIndex(0); values[3] = vSource.getNVSArrayAtIndex(0); values[4] = vSource.getContainmentSimilarityArrayAtIndex(1); values[5] = vSource.getSizeSimilarityArrayAtIndex(1); values[6] = vSource.getValueSimilarityArrayAtIndex(1); values[7] = vSource.getNVSArrayAtIndex(1); values[8] = data.attribute(8).indexOfValue(vSource.getLabel()); Instance inst = new DenseInstance(1.0, values); return inst; }
public Instance decideCentroid(double[] vals,Instances members){ Instance inst=new DenseInstance(vals.length); int q; for(q=0;q<vals.length;q++){ inst.setValue(q, vals[q]); } double minDistance=Double.MAX_VALUE; double tempDistance; int instanceID=0; int i; for(i=0;i<members.numInstances();i++){ tempDistance=myDistance(inst,members.instance(i)); if(tempDistance<minDistance ){ minDistance=tempDistance; instanceID=i; } } return members.instance(instanceID); }
/** Adds a training instance to our dataset, based on the coordinates of the mouse on the panel. This method sets the x and y attributes and the class (as defined by classAttIndex), and sets all other values as Missing. * @param mouseX the x coordinate of the mouse, in pixels. * @param mouseY the y coordinate of the mouse, in pixels. * @param classAttIndex the index of the attribute that is currently selected as the class attribute. * @param classValue the value to set the class to in our new point. */ public void addTrainingInstanceFromMouseLocation(int mouseX, int mouseY, int classAttIndex, double classValue) { //convert to coordinates in the training instance space. double x = convertFromPanelX(mouseX); double y = convertFromPanelY(mouseY); //build the training instance Instance newInstance = new DenseInstance(m_trainingData.numAttributes()); for (int i = 0; i < newInstance.numAttributes(); i++) { if (i == classAttIndex) { newInstance.setValue(i,classValue); } else if (i == m_xAttribute) newInstance.setValue(i,x); else if (i == m_yAttribute) newInstance.setValue(i,y); else newInstance.setMissing(i); } //add it to our data set. addTrainingInstance(newInstance); }
private Instances featuresToInstance(ArrayList<Double> features) { // Create the instance FastVector attributes = new FastVector(features.size()); for(int i = 0; i < features.size(); i++) attributes.addElement(new Attribute(""+i)); Instances dataUnlabeled = new Instances("instances", attributes, 0); Instance instance = new DenseInstance(features.size()); DecimalFormat df = new DecimalFormat("#.######"); for(int i = 0; i < features.size(); i++) instance.setValue(i, Double.valueOf(df.format(features.get(i)))); dataUnlabeled.add(instance); dataUnlabeled.setClassIndex(dataUnlabeled.numAttributes() - 1); return dataUnlabeled; }
private Instance buildInstance(AnnotatedEntailmentPair pair) { Instance i = new DenseInstance(features.size() + 1); i.setDataset(instances); int pos = 0; for (Feature f : features) { if (f.isNumeric()) i.setValue(pos, f.doubleValue(pair)); else i.setValue(pos, f.stringValue(pair)); pos++; } i.setValue( pos, pair.getEntailment() == null || pair.getEntailment().length() == 0 || !categories.contains(pair.getEntailment()) ? categories.get(0) : pair.getEntailment()); return i; }
/** * Method to get a Instances object from a featureVector * @param featureVector the featureVector to convert in Instances * @return an Instances object (in other words a set of Instance) */ public Instances featureVectorToInstances(ArrayList<Double> featureVector) { Instances instances = new Instances("Instances", attributes, 0); DenseInstance instance = new DenseInstance(attributes.size()); for (int i = 0; i < featureVector.size(); i++) instance.setValue(i, featureVector.get(i)); // instance.setValue(featureVector.size(), -1); instances.add(instance); // Set class attribute instances.setClassIndex(attributes.size() - 1); return instances; }
/** * Method to get a Instances object from a featureMatrix * @param featureMatrix the featureMatrix to convert in Instances * @return an Instances object (in other words a set of Instance) */ public Instances featureMatrixToInstances( ArrayList<ArrayList<Double>> featureMatrix) { Instances instances = new Instances("Instances", attributes, 0); DenseInstance instance = new DenseInstance(attributes.size()); for (int i = 0; i < featureMatrix.size(); i++) { for (int j = 0; j < featureMatrix.size(); j++) { instance.setValue(j, featureMatrix.get(i).get(j)); } // instance.setValue(featureVector.size(), -1); instances.add(instance); // Set class attribute instances.setClassIndex(attributes.size() - 1); } return instances; }
public SentimentClass.FiveWayClazz classify(String sentence) throws Exception { double[] instanceValue = new double[dataRaw.numAttributes()]; instanceValue[0] = dataRaw.attribute(0).addStringValue(sentence); Instance toClassify = new DenseInstance(1.0, instanceValue); dataRaw.setClassIndex(1); toClassify.setDataset(dataRaw); double prediction = this.classifier.classifyInstance(toClassify); double distribution[] = this.classifier.distributionForInstance(toClassify); return SentimentClass.FiveWayClazz.values()[(int)prediction]; /* if (distribution[0] != distribution[1]) return SentimentClass.FiveWayClazz.values()[(int)prediction]; else return SentimentClass.FiveWayClazz.NEUTRAL; */ }
public SentimentClass.ThreeWayClazz classify(String sentence) throws Exception { double[] instanceValue = new double[dataRaw.numAttributes()]; instanceValue[0] = dataRaw.attribute(0).addStringValue(sentence); Instance toClassify = new DenseInstance(1.0, instanceValue); dataRaw.setClassIndex(1); toClassify.setDataset(dataRaw); double prediction = this.classifier.classifyInstance(toClassify); double distribution[] = this.classifier.distributionForInstance(toClassify); if (distribution[0] != distribution[1]) return SentimentClass.ThreeWayClazz.values()[(int)prediction]; else return SentimentClass.ThreeWayClazz.NEUTRAL; }
/** * Transform the vectors into Weka Instances. * @return the Instances set. The attribute names are colX, where X is 0 to vector_size -1 and the values are "double" numbers. */ public Instances produceInstances(){ ArrayList<Attribute> fvWekaAttributes = new ArrayList<Attribute>(); for (int i = 0; i < vector_size ; i++){ Attribute attr = new Attribute("col"+i); fvWekaAttributes.add(attr); } Instances instances = new Instances("kmeansInstances", fvWekaAttributes, vocab_size); for (String word:vectors.keySet()) { Instance iExample = new DenseInstance(fvWekaAttributes.size()); for (int i = 0; i < vector_size; i++){ iExample.setValue(fvWekaAttributes.get(i), vectors.get(word).get(i)); } instances.add(iExample); } return instances; }
/** * toWekaInstance * * @param dataset * @return */ public Instance toWekaInstance(Instances dataset) { // create instance DenseInstance instance = new DenseInstance(13); instance.setDataset(dataset); // set values instance.setValue(0, this.getMean()); instance.setValue(1, this.getMedian()); instance.setValue(2, this.getMin()); instance.setValue(3, this.getMax()); instance.setValue(4, this.getStd()); instance.setValue(5, this.getLowQuantile()); instance.setValue(6, this.getHighQuantile()); instance.setValue(7, this.getIqr()); instance.setValue(8, this.getKurtosis()); instance.setValue(9, this.getRange()); instance.setValue(10, this.getPower()); instance.setValue(11, this.getTotalF0()); instance.setValue(12, stressed ? "stressed" : "unstressed"); // return instance return instance; }
@BeforeClass public static void setUpBeforeClass() throws Exception { ArrayList<Attribute> atts = new ArrayList<>(); ArrayList<String> classes = new ArrayList<>(); classes.add("e"); classes.add("r"); atts.add(new Attribute("a")); atts.add(new Attribute("class", classes)); dataset = new Instances("TestInstances", atts, 0); dataset.setClassIndex(dataset.numAttributes() - 1); double[][] data = new double[][]{ new double[]{1, 1}, new double[]{1, 0} }; for (double[] datum : data) { Instance instance = new DenseInstance(2, datum); dataset.add(instance); } }
@BeforeClass public static void setupBeforeClass() { ArrayList<Attribute> atts = new ArrayList<>(); att = new Attribute("a"); atts.add(att); Instances dataset = new Instances("TestInstances", atts, 0); dataset.setClassIndex(dataset.numAttributes() - 1); double[][] data = new double[][]{ new double[]{Double.NaN}, new double[]{0} }; instances = new ArrayList<>(); for (double[] datum : data) { Instance instance = new DenseInstance(6, datum); instance.setDataset(dataset); instances.add(instance); } }
public static final boolean classify(final double[] vector) throws Exception { // Obtain or generate a Thread-local instance Operator op; synchronized (table) { // avoid clashes within weka final Thread t = Thread.currentThread(); op = table.get(t); if (null == op) { op = new Operator(); table.put(t, op); } } // Future weka versions will use new DenseInstance(1, vector) instead final Instance ins = new DenseInstance(1, vector); ins.setDataset(op.data); // Was trained to return true or false, represented in weka as 0 or 1 return 1 == ((int) Math.round(op.c.classifyInstance(ins))); }
private void fillData(double[] featureValues, String className, Instances data) { double[] vals = new double[data.numAttributes()]; if (vals.length != (featureValues.length + 1)) { if (FrameworkContext.WARN) Log.w(TAG, "Number of feature values and weka instance values differs."); } for (int i = 0; i < featureValues.length; i++) { vals[i] = featureValues[i]; } vals[vals.length - 1] = attClassVals.indexOf(className); DenseInstance instance = new DenseInstance(1.0, vals); if (isLogDirectlyToFile) { instance.setDataset(data); logArffData(instance.toString()); } else { // add data.add(instance); } }
private Instance getTestInstance( String binding, String multicolor, String genre) { Instance instance = new DenseInstance(3); instance.setDataset(trainingData); instance.setValue(trainingData.attribute(0), binding); instance.setValue(trainingData.attribute(1), multicolor); instance.setValue(trainingData.attribute(2), genre); return instance; }
public static Instances convertToArff(List<Document> dataSet, List<String> vocabulary, String fileName) { int dataSetSize = dataSet.size(); /* Create features */ ArrayList<Attribute> attributes = new ArrayList<>(); for (int i = 0; i < vocabulary.size(); i++) { attributes.add(new Attribute("word_" + i)); } Attribute classAttribute = new Attribute("Class"); attributes.add(classAttribute); /* Add examples */ System.out.println("Building instances..."); Instances trainingDataSet = new Instances(fileName, attributes, 0); for (int k = 0; k < dataSetSize; k++) { Document document = dataSet.get(k); Instance example = new DenseInstance(attributes.size()); for (int i = 0; i < vocabulary.size(); i++) { String word = vocabulary.get(i); example.setValue(i, Collections.frequency(document.getTerms(), word)); } example.setValue(classAttribute, document.getDocumentClass()); trainingDataSet.add(example); int progress = (int) ((k * 100.0) / dataSetSize); System.out.printf("\rPercent completed: %3d%%", progress); } trainingDataSet.setClass(classAttribute); System.out.println(); System.out.println("Writing to file ..."); try { ArffSaver saver = new ArffSaver(); saver.setInstances(trainingDataSet); saver.setFile(new File(fileName)); saver.writeBatch(); } catch (IOException e) { e.printStackTrace(); } return trainingDataSet; }
public static Instances constructInstances(List<Pair<String, Attribute>> attributesList, List<Observation> observations) { List<Pair<String, Attribute>> attributes = attributesList .stream() .filter(pair -> !pair.getValue().isString()) .collect(Collectors.toList()); Instances trainingSet = new Instances( "Standard set", (ArrayList<Attribute>) attributes .stream() .map(pair -> pair.getValue()) .collect(Collectors.toList()), observations.size()); for(int i = 0; i < observations.size(); i ++) { Instance instance = new DenseInstance(attributes.size()); for(Pair<String, Attribute> attributePair : attributes) { updateInstance(attributePair, instance, observations, i); } trainingSet.add(instance); } trainingSet.setClassIndex(attributes.stream().map(pair -> pair.getKey()).collect(Collectors.toList()).indexOf(Fields.GENDER.name())); return trainingSet; }
private void reloadSeries(Number xValue, Number yValue) { try { Instance instance = new DenseInstance(NUMBER_OF_CLASSES); instance.setDataset(data); instance.setValue(0, xValue.doubleValue()); instance.setValue(1, yValue.doubleValue()); double predictedClass = tree.classifyInstance(instance); instance.setValue(2, predictedClass); data.add(instance); reloadSeries(); } catch (Exception e) { e.printStackTrace(); } }