/** * Validate a data split of train and validation data * * @param trainData Training data * @param valData Validation data * @throws WekaException Invalid validation split */ protected void validateSplit(Instances trainData, Instances valData) throws WekaException { if (earlyStopping.getValidationSetPercentage() < 10e-8) { // Use no validation set at all return; } int classIndex = trainData.classIndex(); int valDataNumDinstinctClassValues = valData.numDistinctValues(classIndex); int trainDataNumDistinctClassValues = trainData.numDistinctValues(classIndex); if (trainData.numClasses() > 1 && valDataNumDinstinctClassValues != trainDataNumDistinctClassValues) { throw new InvalidValidationPercentageException( "The validation data did not contain the same classes as the training data. " + "You should increase the validation percentage in the EarlyStopping configuration."); } }
/** * Batch scoring method. Calls the appropriate method for the base learner if * it implements BatchPredictor. Otherwise it simply calls the * distributionForInstance() method repeatedly. * * @param insts the instances to get predictions for * @return an array of probability distributions, one for each instance * @throws Exception if a problem occurs */ public double[][] distributionsForInstances(Instances insts) throws Exception { if (getClassifier() instanceof BatchPredictor) { Instances filteredInsts = Filter.useFilter(insts, m_Filter); if (filteredInsts.numInstances() != insts.numInstances()) { throw new WekaException( "FilteredClassifier: filter has returned more/less instances than required."); } return ((BatchPredictor) getClassifier()) .distributionsForInstances(filteredInsts); } else { double[][] result = new double[insts.numInstances()][insts.numClasses()]; for (int i = 0; i < insts.numInstances(); i++) { result[i] = distributionForInstance(insts.instance(i)); } return result; } }
/** * Prints the footer to the buffer. This will also store the generated output * in a file if an output file was specified. * * @throws Exception if check fails */ public void printFooter() throws Exception { String error; BufferedWriter writer; if ((error = checkBasic()) != null) { throw new WekaException(error); } doPrintFooter(); // write output to file if (!m_OutputFile.isDirectory()) { try { writer = new BufferedWriter(new FileWriter(m_OutputFile)); writer.write(m_FileBuffer.toString()); writer.newLine(); writer.flush(); writer.close(); } catch (Exception e) { e.printStackTrace(); } } }
/** * Prints the footer to the buffer. This will also store the generated * output in a file if an output file was specified. * * @throws Exception if check fails */ public void printFooter() throws Exception { String error; BufferedWriter writer; if ((error = checkBasic()) != null) throw new WekaException(error); doPrintFooter(); // write output to file if (!m_OutputFile.isDirectory()) { try { writer = new BufferedWriter(new FileWriter(m_OutputFile)); writer.write(m_FileBuffer.toString()); writer.newLine(); writer.flush(); writer.close(); } catch (Exception e) { e.printStackTrace(); } } }
/** * Batch scoring method. Calls the appropriate method for the base learner * if it implements BatchPredictor. Otherwise it simply calls the * distributionForInstance() method repeatedly. * * @param insts the instances to get predictions for * @return an array of probability distributions, one for each instance * @throws Exception if a problem occurs */ public double[][] distributionsForInstances(Instances insts) throws Exception { if (getClassifier() instanceof BatchPredictor) { Instances filteredInsts = Filter.useFilter(insts, m_Filter); if (filteredInsts.numInstances() != insts.numInstances()) { throw new WekaException("FilteredClassifier: filter has returned more/less instances than required."); } return ((BatchPredictor)getClassifier()).distributionsForInstances(filteredInsts); } else { double[][] result = new double[insts.numInstances()][insts.numClasses()]; for (int i = 0; i < insts.numInstances(); i++) { result[i] = distributionForInstance(insts.instance(i)); } return result; } }
/** * Build the Zoomodel instance * * @return ComputationGraph instance * @throws WekaException Either the .init operation on the current zooModel was not supported or * the data shape does not fit the chosen zooModel */ protected void createZooModel() throws WekaException { final AbstractInstanceIterator it = getInstanceIterator(); final boolean isImageIterator = it instanceof ImageInstanceIterator; // Make sure data is convolutional if (!isImageIterator) { throw new WrongIteratorException( "ZooModels currently only support images. " + "Please setup an ImageInstanceIterator."); } // Get the new width/heigth/channels from the iterator ImageInstanceIterator iii = (ImageInstanceIterator) it; int newWidth = iii.getWidth(); int newHeight = iii.getHeight(); int channels = iii.getNumChannels(); boolean initSuccessful = false; while (!initSuccessful) { // Increase width and height int[] newShape = new int[] {channels, newHeight, newWidth}; int[][] shapeWrap = new int[][] {newShape}; setInstanceIterator(new ResizeImageInstanceIterator(iii, newWidth, newHeight)); initSuccessful = initZooModel(trainData.numClasses(), getSeed(), shapeWrap); newWidth *= 1.2; newHeight *= 1.2; if (!initSuccessful) { log.warn( "The shape of the data did not fit the chosen " + "model. It was therefore resized to ({}x{}x{}).", channels, newHeight, newWidth); } } }
/** * Prints the classification to the buffer. * * @param classifier the classifier to use for printing the classification * @param inst the instance to print * @param index the index of the instance * @throws Exception if check fails or error occurs during printing of * classification */ public void printClassification(Classifier classifier, Instance inst, int index) throws Exception { String error; if ((error = checkBasic()) != null) { throw new WekaException(error); } doPrintClassification(classifier, inst, index); }
/** * Prints the classification to the buffer. * * @param dist the distribution from classifier for the supplied instance * @param inst the instance to print * @param index the index of the instance * @throws Exception if check fails or error occurs during printing of * classification */ public void printClassification(double[] dist, Instance inst, int index) throws Exception { String error; if ((error = checkBasic()) != null) { throw new WekaException(error); } doPrintClassification(dist, inst, index); }
/** * Runs the specified script. All options that weren't "consumed" (like "-s" * for the script filename), will be used as commandline arguments for the * actual script. * * @param script the script object to use * @param args the commandline arguments * @throws Exception if execution fails */ public static void runScript(Script script, String[] args) throws Exception { String tmpStr; File scriptFile; Vector<String> options; int i; if (Utils.getFlag('h', args) || Utils.getFlag("help", args)) { System.out.println(makeOptionString(script)); } else { // process options tmpStr = Utils.getOption('s', args); if (tmpStr.length() == 0) { throw new WekaException("No script supplied!"); } else { scriptFile = new File(tmpStr); } script.setOptions(args); // remove empty elements from array options = new Vector<String>(); for (i = 0; i < args.length; i++) { if (args[i].length() > 0) { options.add(args[i]); } } // run script script.run(scriptFile, options.toArray(new String[options.size()])); } }
/** * Prints the classification to the buffer. * * @param classifier the classifier to use for printing the classification * @param inst the instance to print * @param index the index of the instance * @throws Exception if check fails or error occurs during printing of classification */ public void printClassification(Classifier classifier, Instance inst, int index) throws Exception { String error; if ((error = checkBasic()) != null) throw new WekaException(error); doPrintClassification(classifier, inst, index); }
/** * Prints the classification to the buffer. * * @param dist the distribution from classifier for the supplied instance * @param inst the instance to print * @param index the index of the instance * @throws Exception if check fails or error occurs during printing of classification */ public void printClassification(double[] dist, Instance inst, int index) throws Exception { String error; if ((error = checkBasic()) != null) throw new WekaException(error); doPrintClassification(dist, inst, index); }
/** * Tries to open the file. * * @param file the file to open * @return true if successfully read */ public boolean open(File file) { boolean result; String content; if (m_Document == null) return true; // Warn if extension unwknown if (!checkExtension(file)) System.err.println("Extension of file '" + file + "' is unknown!"); try { // clear old content m_Document.remove(0, m_Document.getLength()); // add new content content = ScriptUtils.load(file); if (content == null) throw new WekaException("Error reading content of file '" + file + "'!"); m_Document.insertString(0, content, null); m_Modified = false; m_Filename = file; result = true; } catch (Exception e) { e.printStackTrace(); try { m_Document.remove(0, m_Document.getLength()); } catch (Exception ex) { // ignored } result = false; m_Filename = null; } return result; }
/** * Runs the specified script. All options that weren't "consumed" (like * "-s" for the script filename), will be used as commandline arguments for * the actual script. * * @param script the script object to use * @param args the commandline arguments * @throws Exception if execution fails */ public static void runScript(Script script, String[] args) throws Exception { String tmpStr; File scriptFile; Vector<String> options; int i; if (Utils.getFlag('h', args) || Utils.getFlag("help", args)) { System.out.println(makeOptionString(script)); } else { // process options tmpStr = Utils.getOption('s', args); if (tmpStr.length() == 0) throw new WekaException("No script supplied!"); else scriptFile = new File(tmpStr); script.setOptions(args); // remove empty elements from array options = new Vector<String>(); for (i = 0; i < args.length; i++) { if (args[i].length() > 0) options.add(args[i]); } // run script script.run(scriptFile, options.toArray(new String[options.size()])); } }
/** * Determines the output format based on the input format and returns * this. In case the output format cannot be returned immediately, i.e., * hasImmediateOutputFormat() returns false, then this method will called * from batchFinished() after the call of preprocess(Instances), in which, * e.g., statistics for the actual processing step can be gathered. * * @param inputFormat the input format to base the output format on * @return the output format * @throws Exception in case the determination goes wrong */ @Override protected Instances determineOutputFormat(Instances inputFormat) throws Exception { int i; int[] indices; StringBuilder order; Instances output; m_AttributeIndices.setUpper(inputFormat.numAttributes() - 1); order = new StringBuilder(); indices = m_AttributeIndices.getSelection(); if (indices.length == 0) throw new WekaException("No attributes defined as class attributes!"); for (i = 0; i < indices.length; i++) { if (i > 0) order.append(","); order.append("" + (indices[i]+1)); } for (i = 0; i < inputFormat.numAttributes(); i++) { if (m_AttributeIndices.isInRange(i)) continue; order.append(","); order.append("" + (i+1)); } m_Reorder.setAttributeIndices(order.toString()); m_Reorder.setInputFormat(inputFormat); output = m_Reorder.getOutputFormat(); output.setClassIndex(indices.length); output.setRelationName("-C " + indices.length); return output; }
/** * Creates the learning data sets for extracting repair patterns. * @param args * @throws Exception */ public static void main(String[] args) throws Exception { LearningDataSetEvaluationOptions options = new LearningDataSetEvaluationOptions(); CmdLineParser parser = new CmdLineParser(options); try { parser.parseArgument(args); } catch (CmdLineException e) { LearningDataSetEvaluation.printUsage(e.getMessage(), parser); return; } /* Print the help page. */ if(options.getHelp()) { LearningDataSetEvaluation.printHelp(parser); return; } double min = 0.1, max = 6, interval = 0.2; EvaluationResult[][] results = new EvaluationResult[options.getDataSetPaths().length][(int)Math.ceil((max-min)/interval)]; /* Evaluate each data set. */ for(int i = 0; i < options.getDataSetPaths().length; i++) { String dataSetPath = options.getDataSetPaths()[i]; /* Evaluate each value of epsilon. */ for(double epsilon = min; epsilon <= max; epsilon += interval) { int j = (int)(epsilon / interval); double complexityWeight = 0.2; /* Re-construct the data set. */ LearningDataSet dataSet = LearningDataSet.createLearningDataSet( dataSetPath, options.getOraclePath(), new LinkedList<KeywordUse>(), epsilon, complexityWeight, options.getMinClusterSize()); /* Store the total instances in the dataset before filtering. */ ClusterMetrics clusterMetrics = new ClusterMetrics(); /* Pre-process the file. */ dataSet.preProcess(getBasicRowFilterQuery(options.getMaxChangeComplexity())); clusterMetrics.setTotalInstances(dataSet.getSize()); dataSet.preProcess(getStatementRowFilterQuery(options.getMaxChangeComplexity())); /* Cluster the data set. */ try { dataSet.getWekaClusters(clusterMetrics); } catch (WekaException ex) { logger.error("Weka error on building clusters.", ex); } /* Evaluate the clusters. */ EvaluationResult result = dataSet.evaluate(clusterMetrics); results[i][j] = result; } } LearningDataSetEvaluation.printCSV(results, new String[]{"3", "5", "6", "7"}); //System.out.println("-----------------"); //RLineChart.printPRChart(results); //System.out.println("-----------------"); //RLineChart.printDensityChart(results, new String[]{"3", "5", "6", "7"}); }
/** * Computes the distribution for a given instance. * <br> * Portions of the code were taken from the LibLINEAR class. Original author: Benedikt Waldvogel (mail at bwaldvogel.de) * * @param instance The instance for which distribution is computed * @return The distribution * @throws Exception If the distribution can't be computed successfully */ @Override public double[] distributionForInstance(Instance instance) throws Exception { ///////////////////////////// Copied from LibLINEAR class ///////////////////////////////// m_ReplaceMissingValues.input(instance); m_ReplaceMissingValues.batchFinished(); instance = m_ReplaceMissingValues.output(); m_NominalToBinary.input(instance); m_NominalToBinary.batchFinished(); instance = m_NominalToBinary.output(); if (m_Filter != null) { m_Filter.input(instance); m_Filter.batchFinished(); instance = m_Filter.output(); } double[] result = new double[instance.numClasses()]; /////////////////////////////////////////////////////////////////////////////////////////// if (instance.classAttribute().isNominal() && (m_ProbabilityEstimates)) if (m_SolverType != SolverType.L2R_LR && m_SolverType != SolverType.L2R_LR_DUAL && m_SolverType != SolverType.L1R_LR) throw new WekaException("Probability estimation is currently only " + "supported for logistic regression"); for (int modelInd = 0; modelInd < models.length; modelInd++) { FeatureNode[] x = instanceToArray(instance, modelInd); double[] dec_values = new double[1]; Linear.predictValues(models[modelInd], x, dec_values); // The result value is the distance from the separating hyperplane for the class that is being considered // If the distance is positive - the instance belongs to the class that is being considered; if it is negative - it does not // We do not remap the labels here since LibLINEAR always puts the +1 class at index 0, and we assigned the +1 value in training to the class whose binary one-vs-all classifier this is result[modelInd] = dec_values[0]; } if (!m_ProbabilityEstimates) { // In the multiclass setting, the chosen class is the one with the largest distance from the separating hyperplane // In a binary setting there is only one value - if it is greater than 0 (i.e. instance does belong to class[0]) then maxInd remains = 0, else it is changed to 1 int maxInd = 0; for (int i = 1; i < result.length; i++) if (result[i] > result[maxInd]) maxInd = i; result = new double[instance.numClasses()]; result[maxInd] = 1; return result; } else { // Calculates the probabilities in the same way as in the LibLINEAR and Linear classes double [] prob_estimates = new double[instance.numClasses()]; for (int i = 0; i < prob_estimates.length; i++) prob_estimates[i] = 1 / (1 + Math.exp(-result[i])); if (instance.numClasses() == 2) // for binary classification prob_estimates[1] = 1. - prob_estimates[0]; else { double sum = 0; for (int i = 0; i < instance.numClasses(); i++) sum += prob_estimates[i]; for (int i = 0; i < instance.numClasses(); i++) prob_estimates[i] = prob_estimates[i] / sum; } return prob_estimates; } }
/** * Generates the classifier. * * @param instances the instances to be used for building the classifier * @throws Exception if the classifier can't be built successfully */ @Override public void buildClassifier(Instances instances) throws Exception { boolean noRule = true; // can classifier handle the data? getCapabilities().testWithFail(instances); // remove instances with missing class Instances data = new Instances(instances); data.deleteWithMissingClass(); // only class? -> build ZeroR model if (data.numAttributes() == 1) { System.err .println("Cannot build model (only class attribute present in data!), " + "using ZeroR model instead!"); m_ZeroR = new weka.classifiers.rules.ZeroR(); m_ZeroR.buildClassifier(data); return; } else { m_ZeroR = null; } // for each attribute ... Enumeration<Attribute> enu = instances.enumerateAttributes(); while (enu.hasMoreElements()) { try { OneRRule r = newRule(enu.nextElement(), data); // if this attribute is the best so far, replace the rule if (noRule || r.m_correct > m_rule.m_correct) { m_rule = r; } noRule = false; } catch (Exception ex) { } } if (noRule) { throw new WekaException("No attributes found to work with!"); } }
/** * Tries to open the file. * * @param file the file to open * @return true if successfully read */ public boolean open(File file) { boolean result; String content; if (m_Document == null) { return true; } // Warn if extension unwknown if (!checkExtension(file)) { System.err.println("Extension of file '" + file + "' is unknown!"); } try { // clear old content m_Document.remove(0, m_Document.getLength()); // add new content content = ScriptUtils.load(file); if (content == null) { throw new WekaException("Error reading content of file '" + file + "'!"); } m_Document.insertString(0, content, null); m_Modified = false; m_Filename = file; result = true; } catch (Exception e) { e.printStackTrace(); try { m_Document.remove(0, m_Document.getLength()); } catch (Exception ex) { // ignored } result = false; m_Filename = null; } return result; }
/** * Signify that this batch of input to the filter is finished. * * @return true if there are instances pending output * @throws IllegalStateException if no input structure has been defined */ @Override public boolean batchFinished() throws Exception { if (getInputFormat() == null) { throw new IllegalStateException("No input instance format defined"); } Instances toFilter = getInputFormat(); if (!isFirstBatchDone()) { // filter out attributes if necessary Instances toFilterIgnoringAttributes = removeIgnored(toFilter); // serialized model or build clusterer from scratch? File file = getSerializedClustererFile(); if (!file.isDirectory()) { ObjectInputStream ois = new ObjectInputStream(new FileInputStream(file)); m_ActualClusterer = (Clusterer) ois.readObject(); Instances header = null; // let's see whether there's an Instances header stored as well try { header = (Instances) ois.readObject(); } catch (Exception e) { // ignored } ois.close(); // same dataset format? if ((header != null) && (!header.equalHeaders(toFilterIgnoringAttributes))) { throw new WekaException( "Training header of clusterer and filter dataset don't match:\n" + header.equalHeadersMsg(toFilterIgnoringAttributes)); } } else { m_ActualClusterer = AbstractClusterer.makeCopy(m_Clusterer); m_ActualClusterer.buildClusterer(toFilterIgnoringAttributes); } // create output dataset with new attribute Instances filtered = new Instances(toFilter, 0); ArrayList<String> nominal_values = new ArrayList<String>( m_ActualClusterer.numberOfClusters()); for (int i = 0; i < m_ActualClusterer.numberOfClusters(); i++) { nominal_values.add("cluster" + (i + 1)); } filtered.insertAttributeAt(new Attribute("cluster", nominal_values), filtered.numAttributes()); setOutputFormat(filtered); } // build new dataset for (int i = 0; i < toFilter.numInstances(); i++) { convertInstance(toFilter.instance(i)); } flushInput(); m_NewBatch = true; m_FirstBatchDone = true; return (numPendingOutput() != 0); }
/** * Generates the classifier. * * @param instances the instances to be used for building the classifier * @throws Exception if the classifier can't be built successfully */ public void buildClassifier(Instances instances) throws Exception { boolean noRule = true; // can classifier handle the data? getCapabilities().testWithFail(instances); // remove instances with missing class Instances data = new Instances(instances); data.deleteWithMissingClass(); // only class? -> build ZeroR model if (data.numAttributes() == 1) { System.err.println( "Cannot build model (only class attribute present in data!), " + "using ZeroR model instead!"); m_ZeroR = new weka.classifiers.rules.ZeroR(); m_ZeroR.buildClassifier(data); return; } else { m_ZeroR = null; } // for each attribute ... Enumeration enu = instances.enumerateAttributes(); while (enu.hasMoreElements()) { try { OneRRule r = newRule((Attribute) enu.nextElement(), data); // if this attribute is the best so far, replace the rule if (noRule || r.m_correct > m_rule.m_correct) { m_rule = r; } noRule = false; } catch (Exception ex) { } } if (noRule) throw new WekaException("No attributes found to work with!"); }
/** * Gets the results for a specified run number. Different run * numbers correspond to different randomizations of the data. Results * produced should be sent to the current ResultListener * * @param run the run number to get results for. * @throws Exception if a problem occurs while getting the results */ public void doRun(int run) throws Exception { if (getRawOutput()) { if (m_ZipDest == null) m_ZipDest = new OutputZipper(m_OutputFile); } if (m_Instances == null) throw new Exception("No Instances set"); // Add in some fields to the key like run number, dataset name Object[] seKey = m_SplitEvaluator.getKey(); Object[] key = new Object [seKey.length + 2]; key[0] = Utils.backQuoteChars(m_Instances.relationName()); key[1] = "" + run; System.arraycopy(seKey, 0, key, 2, seKey.length); if (m_ResultListener.isResultRequired(this, key)) { // training set Instances train = new Instances(m_Instances); if (m_randomize) { Random rand = new Random(run); train.randomize(rand); } // test set String filename = createFilename(train); File file = new File(filename); if (!file.exists()) throw new WekaException("Test set '" + filename + "' not found!"); Instances test = DataSource.read(filename); // can we set the class attribute safely? if (train.numAttributes() == test.numAttributes()) test.setClassIndex(train.classIndex()); else throw new WekaException( "Train and test set (= " + filename + ") " + "differ in number of attributes: " + train.numAttributes() + " != " + test.numAttributes()); // test headers if (!train.equalHeaders(test)) throw new WekaException( "Train and test set (= " + filename + ") " + "are not compatible:\n" + train.equalHeadersMsg(test)); try { Object[] seResults = m_SplitEvaluator.getResult(train, test); Object[] results = new Object [seResults.length + 1]; results[0] = getTimestamp(); System.arraycopy(seResults, 0, results, 1, seResults.length); if (m_debugOutput) { String resultName = (""+run+"."+ Utils.backQuoteChars(train.relationName()) +"." +m_SplitEvaluator.toString()).replace(' ','_'); resultName = Utils.removeSubstring(resultName, "weka.classifiers."); resultName = Utils.removeSubstring(resultName, "weka.filters."); resultName = Utils.removeSubstring(resultName, "weka.attributeSelection."); m_ZipDest.zipit(m_SplitEvaluator.getRawResultOutput(), resultName); } m_ResultListener.acceptResult(this, key, results); } catch (Exception e) { // Save the train and test datasets for debugging purposes? throw e; } } }
/** * Signify that this batch of input to the filter is finished. * * @return true if there are instances pending output * @throws IllegalStateException if no input structure has been defined */ public boolean batchFinished() throws Exception { if (getInputFormat() == null) throw new IllegalStateException("No input instance format defined"); Instances toFilter = getInputFormat(); if (!isFirstBatchDone()) { // filter out attributes if necessary Instances toFilterIgnoringAttributes = removeIgnored(toFilter); // serialized model or build clusterer from scratch? File file = getSerializedClustererFile(); if (!file.isDirectory()) { ObjectInputStream ois = new ObjectInputStream(new FileInputStream(file)); m_ActualClusterer = (Clusterer) ois.readObject(); Instances header = null; // let's see whether there's an Instances header stored as well try { header = (Instances) ois.readObject(); } catch (Exception e) { // ignored } ois.close(); // same dataset format? if ((header != null) && (!header.equalHeaders(toFilterIgnoringAttributes))) throw new WekaException( "Training header of clusterer and filter dataset don't match:\n" + header.equalHeadersMsg(toFilterIgnoringAttributes)); } else { m_ActualClusterer = AbstractClusterer.makeCopy(m_Clusterer); m_ActualClusterer.buildClusterer(toFilterIgnoringAttributes); } // create output dataset with new attribute Instances filtered = new Instances(toFilter, 0); FastVector nominal_values = new FastVector(m_ActualClusterer.numberOfClusters()); for (int i = 0; i < m_ActualClusterer.numberOfClusters(); i++) { nominal_values.addElement("cluster" + (i+1)); } filtered.insertAttributeAt(new Attribute("cluster", nominal_values), filtered.numAttributes()); setOutputFormat(filtered); } // build new dataset for (int i=0; i<toFilter.numInstances(); i++) { convertInstance(toFilter.instance(i)); } flushInput(); m_NewBatch = true; m_FirstBatchDone = true; return (numPendingOutput() != 0); }
/** * Computes the distribution for a given instance. * * @param instance the instance for which distribution is computed * @return the distribution * @throws Exception if the distribution can't be computed successfully */ public double[] distributionForInstance (Instance instance) throws Exception { if (!getDoNotReplaceMissingValues()) { m_ReplaceMissingValues.input(instance); m_ReplaceMissingValues.batchFinished(); instance = m_ReplaceMissingValues.output(); } if (getConvertNominalToBinary() && m_NominalToBinary != null) { m_NominalToBinary.input(instance); m_NominalToBinary.batchFinished(); instance = m_NominalToBinary.output(); } if (m_Filter != null) { m_Filter.input(instance); m_Filter.batchFinished(); instance = m_Filter.output(); } Object x = instanceToArray(instance); double v; double[] result = new double[instance.numClasses()]; if (m_ProbabilityEstimates) { if (m_SVMType != SVMTYPE_L2_LR) { throw new WekaException("probability estimation is currently only " + "supported for L2-regularized logistic regression"); } int[] labels = (int[])invokeMethod(m_Model, "getLabels", null, null); double[] prob_estimates = new double[instance.numClasses()]; v = ((Integer) invokeMethod( Class.forName(CLASS_LINEAR).newInstance(), "predictProbability", new Class[]{ Class.forName(CLASS_MODEL), Array.newInstance(Class.forName(CLASS_FEATURENODE), Array.getLength(x)).getClass(), Array.newInstance(Double.TYPE, prob_estimates.length).getClass()}, new Object[]{ m_Model, x, prob_estimates})).doubleValue(); // Return order of probabilities to canonical weka attribute order for (int k = 0; k < prob_estimates.length; k++) { result[labels[k]] = prob_estimates[k]; } } else { v = ((Integer) invokeMethod( Class.forName(CLASS_LINEAR).newInstance(), "predict", new Class[]{ Class.forName(CLASS_MODEL), Array.newInstance(Class.forName(CLASS_FEATURENODE), Array.getLength(x)).getClass()}, new Object[]{ m_Model, x})).doubleValue(); assert (instance.classAttribute().isNominal()); result[(int) v] = 1; } return result; }
/** * Parses a given list of options. <p/> * <!-- options-start --> * Valid options are: <p/> * * <pre> -D * Produce debugging output. * (default no debugging output)</pre> * * <pre> -E <estimator> * The estimator can be one of the following: * eb -- Empirical Bayes estimator for noraml mixture (default) * nested -- Optimal nested model selector for normal mixture * subset -- Optimal subset selector for normal mixture * pace2 -- PACE2 for Chi-square mixture * pace4 -- PACE4 for Chi-square mixture * pace6 -- PACE6 for Chi-square mixture * * ols -- Ordinary least squares estimator * aic -- AIC estimator * bic -- BIC estimator * ric -- RIC estimator * olsc -- Ordinary least squares subset selector with a threshold</pre> * * <pre> -S <threshold value> * Threshold value for the OLSC estimator</pre> * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { setDebug(Utils.getFlag('D', options)); String estimator = Utils.getOption('E', options); if ( estimator.equals("ols") ) paceEstimator = olsEstimator; else if ( estimator.equals("olsc") ) paceEstimator = olscEstimator; else if( estimator.equals("eb") || estimator.equals("") ) paceEstimator = ebEstimator; else if ( estimator.equals("nested") ) paceEstimator = nestedEstimator; else if ( estimator.equals("subset") ) paceEstimator = subsetEstimator; else if ( estimator.equals("pace2") ) paceEstimator = pace2Estimator; else if ( estimator.equals("pace4") ) paceEstimator = pace4Estimator; else if ( estimator.equals("pace6") ) paceEstimator = pace6Estimator; else if ( estimator.equals("aic") ) paceEstimator = aicEstimator; else if ( estimator.equals("bic") ) paceEstimator = bicEstimator; else if ( estimator.equals("ric") ) paceEstimator = ricEstimator; else throw new WekaException("unknown estimator " + estimator + " for -E option" ); String string = Utils.getOption('S', options); if( ! string.equals("") ) olscThreshold = Double.parseDouble( string ); }
public HashMap<Integer,Double> getTopicWeights(Collection<Topic> topics) throws Exception { if (!decider.isReady()) throw new WekaException("You must build (or load) classifier first.") ; HashMap<Integer, Double> topicWeights = new HashMap<Integer, Double>() ; for (Topic topic: topics) { Instance i = getInstance(topic, null) ; double prob = decider.getDecisionDistribution(i).get(true) ; topicWeights.put(topic.getId(), prob) ; candidatesConsidered++ ; } return topicWeights ; }
/** * Performs the injection. Must be initialized beforehand. * * @param data the data to perform injection on * @return the updated dataset * @throws Exception if the injection fails, eg if not initialized. * @see #buildInjection(Instances) * @see #doInject(Instances) */ @Override public Instances inject(Instances data) throws Exception { if (!m_Initialized) throw new WekaException("Injection algorithm '" + getClass().getName() + "' not initialized!"); return doInject(data); }
/** * Performs the injection. Must be initialized beforehand. * * @param inst the instance to perform injection on * @return the updated instance * @throws Exception if the injection fails, eg if not initialized. * @see #buildInjection(Instances) */ @Override public Instance inject(Instance inst) throws Exception { if (!m_Initialized) throw new WekaException("Injection algorithm '" + getClass().getName() + "' not initialized!"); return doInject(inst); }
/** * Performs the imputation. Must be initialized beforehand. * * @param data the data to perform imputation on * @return the updated dataset * @throws Exception if the imputation fails, eg if not initialized. * @see #buildImputation(Instances) * @see #doImpute(Instances) */ @Override public Instances impute(Instances data) throws Exception { if (!m_Initialized) throw new WekaException("Imputation algorithm '" + getClass().getName() + "' not initialized!"); return doImpute(data); }
/** * Performs the imputation. Must be initialized beforehand. * * @param inst the instance to perform imputation on * @return the updated instance * @throws Exception if the imputation fails, eg if not initialized. * @see #buildImputation(Instances) */ @Override public Instance impute(Instance inst) throws Exception { if (!m_Initialized) throw new WekaException("Imputation algorithm '" + getClass().getName() + "' not initialized!"); return doImpute(inst); }