/** * Loads the training data as configured in {@link #dataConfig} and trains a * 3-gram SVM classifier. */ @Override public void afterPropertiesSet() throws Exception { this.trainingData = svmTrainer.train(); StringToWordVector stwvFilter = createFilter(this.trainingData); // Instances filterdInstances = Filter.useFilter(data, stwv); LibSVM svm = new LibSVM(); svm.setKernelType(new SelectedTag(0, LibSVM.TAGS_KERNELTYPE)); svm.setSVMType(new SelectedTag(0, LibSVM.TAGS_SVMTYPE)); svm.setProbabilityEstimates(true); // svm.buildClassifier(filterdInstances); FilteredClassifier filteredClassifier = new FilteredClassifier(); filteredClassifier.setFilter(stwvFilter); filteredClassifier.setClassifier(svm); filteredClassifier.buildClassifier(this.trainingData); this.classifier = filteredClassifier; // predict("nice cool amazing awesome beautiful"); // predict("this movie is simply awesome"); // predict("its very bad"); // predict("Not that great"); }
/** * Set the metric type for ranking rules * * @param d the type of metric */ public void setMetricType(SelectedTag d) { if (d.getTags() == TAGS_SELECTION) { m_metricType = d.getSelectedTag().getID(); } if (m_metricType == CONFIDENCE) { setMinMetric(0.9); } if (m_metricType == LIFT || m_metricType == CONVICTION) { setMinMetric(1.1); } if (m_metricType == LEVERAGE) { setMinMetric(0.1); } }
/** * Parses a given list of options. * <p/> * * <!-- options-start --> Valid options are: * <p/> * * <pre> * -mbc * Applies a Markov Blanket correction to the network structure, * after a network structure is learned. This ensures that all * nodes in the network are part of the Markov blanket of the * classifier node. * </pre> * * <pre> * -S [BAYES|MDL|ENTROPY|AIC|CROSS_CLASSIC|CROSS_BAYES] * Score type (BAYES, BDeu, MDL, ENTROPY and AIC) * </pre> * * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ @Override public void setOptions(String[] options) throws Exception { setMarkovBlanketClassifier(Utils.getFlag("mbc", options)); String sScore = Utils.getOption('S', options); if (sScore.compareTo("BAYES") == 0) { setScoreType(new SelectedTag(Scoreable.BAYES, TAGS_SCORE_TYPE)); } if (sScore.compareTo("BDeu") == 0) { setScoreType(new SelectedTag(Scoreable.BDeu, TAGS_SCORE_TYPE)); } if (sScore.compareTo("MDL") == 0) { setScoreType(new SelectedTag(Scoreable.MDL, TAGS_SCORE_TYPE)); } if (sScore.compareTo("ENTROPY") == 0) { setScoreType(new SelectedTag(Scoreable.ENTROPY, TAGS_SCORE_TYPE)); } if (sScore.compareTo("AIC") == 0) { setScoreType(new SelectedTag(Scoreable.AIC, TAGS_SCORE_TYPE)); } super.setOptions(options); }
protected void initializeSVMProbs(Instances data) throws Exception { m_svmProbs = new SGD(); m_svmProbs.setLossFunction(new SelectedTag(SGD.LOGLOSS, TAGS_SELECTION)); m_svmProbs.setLearningRate(m_learningRate); m_svmProbs.setLambda(m_lambda); m_svmProbs.setEpochs(m_epochs); ArrayList<Attribute> atts = new ArrayList<Attribute>(2); atts.add(new Attribute("pred")); ArrayList<String> attVals = new ArrayList<String>(2); attVals.add(data.classAttribute().value(0)); attVals.add(data.classAttribute().value(1)); atts.add(new Attribute("class", attVals)); m_fitLogisticStructure = new Instances("data", atts, 0); m_fitLogisticStructure.setClassIndex(1); m_svmProbs.buildClassifier(m_fitLogisticStructure); }
/** * Returns a description of the property value as java source. * * @return a value of type 'String' */ public String getJavaInitializationString() { SelectedTag s = (SelectedTag)getValue(); Tag [] tags = s.getTags(); String result = "new SelectedTag(" + s.getSelectedTag().getID() + ", {\n"; for (int i = 0; i < tags.length; i++) { result += "new Tag(" + tags[i].getID() + ",\"" + tags[i].getReadable() + "\")"; if (i < tags.length - 1) { result += ','; } result += '\n'; } return result + "})"; }
/** * Sets the current property value as text. * * @param text the text of the selected tag. * @exception java.lang.IllegalArgumentException if an error occurs */ public void setAsText(String text) { SelectedTag s = (SelectedTag)getValue(); Tag [] tags = s.getTags(); try { for (int i = 0; i < tags.length; i++) { if (text.equals(tags[i].getReadable())) { setValue(new SelectedTag(tags[i].getID(), tags)); return; } } } catch (Exception ex) { throw new java.lang.IllegalArgumentException(text); } }
public static LinearRegression createLinearRegression() { LinearRegression linreg = new LinearRegression(); linreg.setAttributeSelectionMethod(new SelectedTag(LinearRegression.SELECTION_NONE, LinearRegression.TAGS_SELECTION)); linreg.setEliminateColinearAttributes(false); // if wants debug info //linreg.setDebug(true); return linreg; }
@OptionMetadata( description = "The type of normalization to perform.", displayName = "attribute normalization", commandLineParamName = "normalization", commandLineParamSynopsis = "-normalization <int>", displayOrder = 12 ) public SelectedTag getFilterType() { return new SelectedTag(filterType, TAGS_FILTER); }
/** * Sets the type of SVM (default SVMTYPE_L2) * * @param value The type of the SVM */ @Override public void setSVMType(SelectedTag value) { if (value.getTags() == TAGS_SVMTYPE) { setSolverType(SolverType.getById(value.getSelectedTag().getID())); } }
/** * Set the metric type to use. * * @param d the metric type */ public void setMetricType(SelectedTag d) { int ordinal = d.getSelectedTag().getID(); for (DefaultAssociationRule.METRIC_TYPE m : DefaultAssociationRule.METRIC_TYPE .values()) { if (m.ordinal() == ordinal) { m_metric = m; break; } } }
/** * Set the search direction * * @param d the direction of the search */ public void setDirection(SelectedTag d) { if (d.getTags() == TAGS_SELECTION) { m_searchDirection = d.getSelectedTag().getID(); } }
/** * Sets the method used. Will be one of METHOD_1_AGAINST_ALL, * METHOD_ERROR_RANDOM, METHOD_ERROR_EXHAUSTIVE, or METHOD_1_AGAINST_1. * * @param newMethod the new method. */ public void setMethod(SelectedTag newMethod) { if (newMethod.getTags() == TAGS_METHOD) { m_Method = newMethod.getSelectedTag().getID(); } }
/** * Sets the source location of the cost matrix. Values other than * MATRIX_ON_DEMAND or MATRIX_SUPPLIED will be ignored. * * @param newMethod the cost matrix location method. */ public void setCostMatrixSource(SelectedTag newMethod) { if (newMethod.getTags() == TAGS_MATRIX_SOURCE) { m_MatrixSource = newMethod.getSelectedTag().getID(); } }
/** * Get the evaluation metric to use * * @return the evaluation metric to use */ public SelectedTag getEvaluationMetric() { for (int i = 0; i < TAGS_EVAL.length; i++) { if (TAGS_EVAL[i].getIDStr().equalsIgnoreCase(m_evalMetric)) { return new SelectedTag(i, TAGS_EVAL); } } // if we get here then it could be because a plugin // metric is no longer available. Default to rmse return new SelectedTag(12, TAGS_EVAL); }
/** * Sets the distance weighting method used. Values other than * WEIGHT_NONE, WEIGHT_INVERSE, or WEIGHT_SIMILARITY will be ignored. * * @param newMethod the distance weighting method to use */ public void setDistanceWeighting(SelectedTag newMethod) { if (newMethod.getTags() == TAGS_WEIGHTING) { m_DistanceWeighting = newMethod.getSelectedTag().getID(); } }
/** * Sets the method to use for handling missing values. Values other than * M_NORMAL, M_AVERAGE, M_MAXDIFF and M_DELETE will be ignored. * * @param newMode the method to use for handling missing values. */ public void setMissingMode(SelectedTag newMode) { if (newMode.getTags() == TAGS_MISSING) { m_MissingMode = newMode.getSelectedTag().getID(); } }
/** * Parses a given list of options. <p/> * <!-- options-start --> * Valid options are: <p/> * * <pre> -B <num> * Manual blend setting (default 20%) * </pre> * * <pre> -E * Enable entropic auto-blend setting (symbolic class only) * </pre> * * <pre> -M <char> * Specify the missing value treatment mode (default a) * Valid options are: a(verage), d(elete), m(axdiff), n(ormal) * </pre> * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { String blendStr = Utils.getOption('B', options); if (blendStr.length() != 0) { setGlobalBlend(Integer.parseInt(blendStr)); } setEntropicAutoBlend(Utils.getFlag('E', options)); String missingModeStr = Utils.getOption('M', options); if (missingModeStr.length() != 0) { switch ( missingModeStr.charAt(0) ) { case 'a': setMissingMode(new SelectedTag(M_AVERAGE, TAGS_MISSING)); break; case 'd': setMissingMode(new SelectedTag(M_DELETE, TAGS_MISSING)); break; case 'm': setMissingMode(new SelectedTag(M_MAXDIFF, TAGS_MISSING)); break; case 'n': setMissingMode(new SelectedTag(M_NORMAL, TAGS_MISSING)); break; default: setMissingMode(new SelectedTag(M_AVERAGE, TAGS_MISSING)); } } super.setOptions(options); Utils.checkForRemainingOptions(options); }
/** * Sets how the training data will be transformed. Should be one of * FILTER_NORMALIZE, FILTER_STANDARDIZE, FILTER_NONE. * * @param newType the new filtering mode */ public void setFilterType(SelectedTag newType) { if (newType.getTags() == TAGS_FILTER) { m_filterType = newType.getSelectedTag().getID(); } }
/** * Sets the type of GUI to use. * * @param value .the GUI type */ public void setGUIType(SelectedTag value) { if (value.getTags() == TAGS_GUI) { m_GUIType = value.getSelectedTag().getID(); initGUI(); } }
/** * Gets the list of tags that can be selected from. * * @return an array of string tags. */ public String[] getTags() { SelectedTag s = (SelectedTag)getValue(); Tag [] tags = s.getTags(); String [] result = new String [tags.length]; for (int i = 0; i < tags.length; i++) { result[i] = tags[i].getReadable(); } return result; }
/** * Tests out the selectedtag editor from the command line. * * @param args ignored */ public static void main(String [] args) { try { GenericObjectEditor.registerEditors(); Tag [] tags = { new Tag(1, "First option"), new Tag(2, "Second option"), new Tag(3, "Third option"), new Tag(4, "Fourth option"), new Tag(5, "Fifth option"), }; SelectedTag initial = new SelectedTag(1, tags); SelectedTagEditor ce = new SelectedTagEditor(); ce.setValue(initial); PropertyValueSelector ps = new PropertyValueSelector(ce); JFrame f = new JFrame(); f.addWindowListener(new WindowAdapter() { public void windowClosing(WindowEvent e) { System.exit(0); } }); f.getContentPane().setLayout(new BorderLayout()); f.getContentPane().add(ps, BorderLayout.CENTER); f.pack(); f.setVisible(true); } catch (Exception ex) { ex.printStackTrace(); System.err.println(ex.getMessage()); } }
/** * Returns an enumeration describing the available options. * * @return an enumeration of all the available options. */ @Override public Enumeration<Option> listOptions() { Vector<Option> result = new Vector<Option>(); result.addElement(new Option( "\tSpecify list of string attributes to convert to words.\n" + "\t(default: select all relational attributes)", "R", 1, "-R <index1,index2-index4,...>")); result.addElement(new Option( "\tInverts the matching sense of the selection.", "V", 0, "-V")); String desc = ""; for (Tag element : TAGS_SORTTYPE) { SelectedTag tag = new SelectedTag(element.getID(), TAGS_SORTTYPE); desc += "\t" + tag.getSelectedTag().getIDStr() + " = " + tag.getSelectedTag().getReadable() + "\n"; } result.addElement(new Option("\tDetermines the type of sorting:\n" + desc + "\t(default: " + new SelectedTag(SORT_CASESENSITIVE, TAGS_SORTTYPE) + ")", "S", 1, "-S " + Tag.toOptionList(TAGS_SORTTYPE))); result.addAll(Collections.list(super.listOptions())); return result.elements(); }
/** * Sets the sort type to be used. * * @param type the type of sorting */ public void setSortType(SelectedTag type) { if (type.getTags() == TAGS_SORTTYPE) { m_SortType = type.getSelectedTag().getID(); if (m_SortType == SORT_CASESENSITIVE) { m_Comparator = new CaseSensitiveComparator(); } else if (m_SortType == SORT_CASEINSENSITIVE) { m_Comparator = new CaseInsensitiveComparator(); } else { throw new IllegalStateException("Unhandled sort type '" + type + "'!"); } } }
/** * Gets the current settings of the filter. * * @return an array of strings suitable for passing to setOptions */ @Override public String[] getOptions() { Vector<String> options = new Vector<String>(); // if (getUseGaussian()) { // options[current++] = "-G"; // } if (getReplaceMissingValues()) { options.add("-M"); } if (getPercent() <= 0) { options.add("-N"); options.add("" + getNumberOfAttributes()); } else { options.add("-P"); options.add("" + getPercent()); } options.add("-R"); options.add("" + getSeed()); SelectedTag t = getDistribution(); options.add("-D"); options.add("" + t.getSelectedTag().getReadable()); return options.toArray(new String[0]); }
/** * Sets the distribution to use for calculating the random matrix * * @param newDstr the distribution to use */ public void setDistribution(SelectedTag newDstr) { if (newDstr.getTags() == TAGS_DSTRS_TYPE) { m_distribution = newDstr.getSelectedTag().getID(); } }
/** * Sets the attribute type to be deleted by the filter. * * @param type a TAGS_ATTRIBUTETYPE of the new type the filter should delete */ public void setAttributeType(SelectedTag type) { if (type.getTags() == TAGS_ATTRIBUTETYPE) { m_attTypeToDelete = type.getSelectedTag().getID(); } }
/** * Sets whether if the word frequencies for a document (instance) should be * normalized or not. * * @param newType the new type. */ public void setNormalizeDocLength(SelectedTag newType) { if (newType.getTags() == TAGS_FILTER) { m_filterType = newType.getSelectedTag().getID(); } }
/** * returns the default input order * * @return the default input order */ protected SelectedTag defaultInputOrder() { return new SelectedTag(ORDERED, TAGS_INPUTORDER); // TODO: the only one that // is currently // implemented, normally // RANDOMIZED }
public void setFilterType(SelectedTag newType) { if (newType.getTags() == TAGS_FILTER) { filterType = newType.getSelectedTag().getID(); } }
/** * Make the final PreconstructedKMeans clusterer to wrap the centroids and * stats found during map-reduce. * * @param best the best result from the runs of k-means that were performed in * parallel * @param preprocess any pre-processing filters applied * @param initialStartingPoints the initial starting centroids * @param finalNumIterations the final number of iterations performed * @return a final clusterer object * @throws DistributedWekaException if a problem occurs */ protected Clusterer makeFinalClusterer(KMeansReduceTask best, Filter preprocess, Instances initialStartingPoints, int finalNumIterations) throws DistributedWekaException { Clusterer finalClusterer = null; PreconstructedKMeans finalKMeans = new PreconstructedKMeans(); // global priming data for the distance function (this will be in // the transformed space if we're using preprocessing filters) Instances globalPrimingData = best.getGlobalDistanceFunctionPrimingData(); NormalizableDistance dist = new EuclideanDistance(); dist.setInstances(globalPrimingData); finalKMeans.setClusterCentroids(best.getCentroidsForRun()); finalKMeans.setFinalNumberOfIterations(finalNumIterations + 1); if (initialStartingPoints != null) { finalKMeans.setInitialStartingPoints(initialStartingPoints); } try { finalKMeans.setDistanceFunction(dist); finalKMeans.setClusterStats(best.getAggregatedCentroidSummaries()); } catch (Exception e) { throw new DistributedWekaException(e); } if (!getInitWithRandomCentroids()) { finalKMeans.setInitializationMethod(new SelectedTag( SimpleKMeans.KMEANS_PLUS_PLUS, SimpleKMeans.TAGS_SELECTION)); } finalKMeans.setDisplayStdDevs(getDisplayCentroidStdDevs()); finalClusterer = finalKMeans; if (preprocess != null) { PreconstructedFilteredClusterer fc = new PreconstructedFilteredClusterer(); fc.setFilter(preprocess); fc.setClusterer(finalKMeans); finalClusterer = fc; } return finalClusterer; }
/** * Set the initialization method to use * * @param method the initialization method to use */ public void setInitializationMethod(SelectedTag method) { if (method.getTags() == TAGS_SELECTION) { m_initializationMethod = method.getSelectedTag().getID(); } }
public void setLinkType(SelectedTag newLinkType) { if (newLinkType.getTags() == TAGS_LINK_TYPE) { m_nLinkType = newLinkType.getSelectedTag().getID(); } }
public SelectedTag getLinkType() { return new SelectedTag(m_nLinkType, TAGS_LINK_TYPE); }
/** * Parses a given list of options. * <p/> * * <!-- options-start --> Valid options are: * <p/> * * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ @Override public void setOptions(String[] options) throws Exception { m_bPrintNewick = Utils.getFlag('P', options); String optionString = Utils.getOption('N', options); if (optionString.length() != 0) { Integer temp = new Integer(optionString); setNumClusters(temp); } else { setNumClusters(2); } setDistanceIsBranchLength(Utils.getFlag('B', options)); String sLinkType = Utils.getOption('L', options); if (sLinkType.compareTo("SINGLE") == 0) { setLinkType(new SelectedTag(SINGLE, TAGS_LINK_TYPE)); } if (sLinkType.compareTo("COMPLETE") == 0) { setLinkType(new SelectedTag(COMPLETE, TAGS_LINK_TYPE)); } if (sLinkType.compareTo("AVERAGE") == 0) { setLinkType(new SelectedTag(AVERAGE, TAGS_LINK_TYPE)); } if (sLinkType.compareTo("MEAN") == 0) { setLinkType(new SelectedTag(MEAN, TAGS_LINK_TYPE)); } if (sLinkType.compareTo("CENTROID") == 0) { setLinkType(new SelectedTag(CENTROID, TAGS_LINK_TYPE)); } if (sLinkType.compareTo("WARD") == 0) { setLinkType(new SelectedTag(WARD, TAGS_LINK_TYPE)); } if (sLinkType.compareTo("ADJCOMPLETE") == 0) { setLinkType(new SelectedTag(ADJCOMPLETE, TAGS_LINK_TYPE)); } if (sLinkType.compareTo("NEIGHBOR_JOINING") == 0) { setLinkType(new SelectedTag(NEIGHBOR_JOINING, TAGS_LINK_TYPE)); } String nnSearchClass = Utils.getOption('A', options); if (nnSearchClass.length() != 0) { String nnSearchClassSpec[] = Utils.splitOptions(nnSearchClass); if (nnSearchClassSpec.length == 0) { throw new Exception("Invalid DistanceFunction specification string."); } String className = nnSearchClassSpec[0]; nnSearchClassSpec[0] = ""; setDistanceFunction((DistanceFunction) Utils.forName( DistanceFunction.class, className, nnSearchClassSpec)); } else { setDistanceFunction(new EuclideanDistance()); } super.setOptions(options); Utils.checkForRemainingOptions(options); }
/** * Sets the combination rule to use. Values other than * * @param newRule the combination rule method to use */ public void setCombinationRule(SelectedTag newRule) { if (newRule.getTags() == TAGS_RULES) { m_CombinationRule = newRule.getSelectedTag().getID(); } }
/** * Parses a given list of options. <p/> * <!-- options-start --> * Valid options are: <p/> * * <pre> -M <num> * Sets the method to use. Valid values are 0 (1-against-all), * 1 (random codes), 2 (exhaustive code), and 3 (1-against-1). (default 0) * </pre> * * <pre> -R <num> * Sets the multiplier when using random codes. (default 2.0)</pre> * * <pre> -P * Use pairwise coupling (only has an effect for 1-against1)</pre> * * <pre> -L * Use log loss decoding for random and exhaustive codes.</pre> * * <pre> -S <num> * Random number seed. * (default 1)</pre> * * <pre> -D * If set, classifier is run in debug mode and * may output additional info to the console</pre> * * <pre> -W * Full name of base classifier. * (default: weka.classifiers.functions.Logistic)</pre> * * <pre> * Options specific to classifier weka.classifiers.functions.Logistic: * </pre> * * <pre> -D * Turn on debugging output.</pre> * * <pre> -R <ridge> * Set the ridge in the log-likelihood.</pre> * * <pre> -M <number> * Set the maximum number of iterations (default -1, until convergence).</pre> * <!-- options-end --> * * @param options the list of options as an array of strings * @throws Exception if an option is not supported */ public void setOptions(String[] options) throws Exception { String errorString = Utils.getOption('M', options); if (errorString.length() != 0) { setMethod(new SelectedTag(Integer.parseInt(errorString), TAGS_METHOD)); } else { setMethod(new SelectedTag(METHOD_1_AGAINST_ALL, TAGS_METHOD)); } String rfactorString = Utils.getOption('R', options); if (rfactorString.length() != 0) { setRandomWidthFactor((new Double(rfactorString)).doubleValue()); } else { setRandomWidthFactor(2.0); } setUsePairwiseCoupling(Utils.getFlag('P', options)); setLogLossDecoding(Utils.getFlag('L', options)); super.setOptions(options); Utils.checkForRemainingOptions(options); }