/** * Checks whether the scheme can handle zero training instances. * * @param attrTypes attribute types that can be estimated * @param classType the class type (NUMERIC, NOMINAL, etc.) * @return index 0 is true if the test was passed, index 1 is true if test * was acceptable */ protected boolean[] canHandleZeroTraining(AttrTypes attrTypes, int classType) { print("handle zero training instances"); printAttributeSummary(attrTypes, classType); print("..."); FastVector accepts = new FastVector(); accepts.addElement("train"); accepts.addElement("value"); int numTrain = 0, numTest = getNumInstances(), numClasses = 2, missingLevel = 0; boolean attributeMissing = false, classMissing = false; int numAtts = 1; int attrIndex = 0; return runBasicTest( attrTypes, numAtts, attrIndex, classType, missingLevel, attributeMissing, classMissing, numTrain, numTest, numClasses, accepts); }
/** * Defines the clusters if pattern is RANDOM * * @param random random number generator * @return the cluster definitions * @throws Exception if something goes wrong */ private FastVector defineClustersRANDOM(Random random) throws Exception { FastVector clusters = new FastVector(m_NumClusters); double diffInstNum = (double) (m_MaxInstNum - m_MinInstNum); double minInstNum = (double) m_MinInstNum; double diffRadius = m_MaxRadius - m_MinRadius; Cluster cluster; for (int i = 0; i < m_NumClusters; i++) { int instNum = (int) (random.nextDouble() * diffInstNum + minInstNum); double radius = (random.nextDouble() * diffRadius) + m_MinRadius; // center is defined in the constructor of cluster cluster = new Cluster(instNum, radius, random); clusters.addElement((Object) cluster); } return clusters; }
/** * create sub tree * * @param iNode index of the lowest node in the tree * @param nRecords set of records in instances to be considered * @param instances data set * @return ADNode representing an ADTree */ public static ADNode makeADTree(int iNode, FastVector nRecords, Instances instances) { ADNode _ADNode = new ADNode(); _ADNode.m_nCount = nRecords.size(); _ADNode.m_nStartNode = iNode; if (nRecords.size() < MIN_RECORD_SIZE) { _ADNode.m_Instances = new Instance[nRecords.size()]; for (int iInstance = 0; iInstance < nRecords.size(); iInstance++) { _ADNode.m_Instances[iInstance] = instances.instance(((Integer) nRecords.elementAt(iInstance)).intValue()); } } else { _ADNode.m_VaryNodes = new VaryNode[instances.numAttributes() - iNode]; for (int iNode2 = iNode; iNode2 < instances.numAttributes(); iNode2++) { _ADNode.m_VaryNodes[iNode2 - iNode] = makeVaryNode(iNode2, nRecords, instances); } } return _ADNode; }
void layoutGraph() { if (m_BayesNet.getNrOfNodes() == 0) { return; } try { FastVector m_nodes = new FastVector(); FastVector m_edges = new FastVector(); BIFParser bp = new BIFParser(m_BayesNet.toXMLBIF03(), m_nodes, m_edges); bp.parse(); updateStatus(); m_layoutEngine = new HierarchicalBCEngine(m_nodes, m_edges, m_nPaddedNodeWidth, m_nNodeHeight); m_layoutEngine.addLayoutCompleteEventListener(this); m_layoutEngine.layoutGraph(); } catch (Exception e) { e.printStackTrace(); } }
/** * Method that finds all association rules. * * @throws Exception if an attribute is numeric */ private void findRulesQuickly() throws Exception { FastVector[] rules; // Build rules for (int j = 1; j < m_Ls.size(); j++) { FastVector currentItemSets = (FastVector)m_Ls.elementAt(j); Enumeration enumItemSets = currentItemSets.elements(); while (enumItemSets.hasMoreElements()) { AprioriItemSet currentItemSet = (AprioriItemSet)enumItemSets.nextElement(); //AprioriItemSet currentItemSet = new AprioriItemSet((ItemSet)enumItemSets.nextElement()); rules = currentItemSet.generateRules(m_minMetric, m_hashtables, j + 1); for (int k = 0; k < rules[0].size(); k++) { m_allTheRules[0].addElement(rules[0].elementAt(k)); m_allTheRules[1].addElement(rules[1].elementAt(k)); m_allTheRules[2].addElement(rules[2].elementAt(k)); if (rules.length > 3) { m_allTheRules[3].addElement(rules[3].elementAt(k)); m_allTheRules[4].addElement(rules[4].elementAt(k)); m_allTheRules[5].addElement(rules[5].elementAt(k)); } } } } }
public Instances timeInstances(SortedSet orders){ FastVector attributes = new FastVector(1); attributes.addElement(new Attribute("time")); Instances cInstances = new Instances("wiki", attributes,orders.size()); for(Iterator it = orders.iterator(); it.hasNext();){ // System.out.println("class: " + data.instance(i).getClass()); MyInstance instance = (MyInstance) it.next(); double [] values = new double[1]; //instance.toDoubleArray(); values[values.length - 1] = instance.getTime(); Instance newInstance = new Instance(1, values); newInstance = new MyInstance(newInstance, instance.getTime()); cInstances.add(newInstance); } return cInstances; }
/** Called by JUnit after each test method */ protected void tearDown() { m_Classifier = null; m_Tester = null; m_OptionTester = null; m_GOETester = null; m_updateableClassifier = false; m_weightedInstancesHandler = false; m_NominalPredictors = new boolean[LAST_CLASSTYPE + 1]; m_NumericPredictors = new boolean[LAST_CLASSTYPE + 1]; m_StringPredictors = new boolean[LAST_CLASSTYPE + 1]; m_DatePredictors = new boolean[LAST_CLASSTYPE + 1]; m_RelationalPredictors = new boolean[LAST_CLASSTYPE + 1]; m_handleMissingPredictors = new boolean[LAST_CLASSTYPE + 1]; m_handleMissingClass = new boolean[LAST_CLASSTYPE + 1]; m_handleClassAsFirstAttribute = new boolean[LAST_CLASSTYPE + 1]; m_handleClassAsSecondAttribute = new boolean[LAST_CLASSTYPE + 1]; m_RegressionResults = new FastVector[LAST_CLASSTYPE + 1]; m_NClasses = 4; }
private Instances getInstances() { Attribute rleRateAttr = new Attribute(ARFFAttributes.DATA_COMPRESSION_RATE_BY_RLE_ATTRIBUTE); Attribute rleRateVwAttr = new Attribute(ARFFAttributes.DATA_COMPRESSION_RATE_OF_VERTICALLY_WINDING_TEXT_BY_RLE_ATTRIBUTE); Attribute linesAttr = new Attribute(ARFFAttributes.NUMBER_OF_LINES_ATTRIBUTE); Attribute sizeAttr = new Attribute(ARFFAttributes.TEXT_SIZE_ATTRIBUTE); // Create nominal attribute "class" FastVector my_nominal_values = new FastVector(2); my_nominal_values.addElement(ARFFAttributes.ASCIIART_CLASS_NAME); my_nominal_values.addElement(ARFFAttributes.NON_ASCIIART_CLASS_NAME); Attribute className = new Attribute(ARFFAttributes.CLASS_ATTRIBUTE, my_nominal_values); FastVector attributes = new FastVector(5); attributes.addElement(rleRateAttr); attributes.addElement(rleRateVwAttr); attributes.addElement(linesAttr); attributes.addElement(sizeAttr); attributes.addElement(className); // Create the empty dataset "textart" with above attributes Instances instances = new Instances(ARFFAttributes.ASCIIART_CLASS_NAME, attributes, 0); instances.setClassIndex(className.index()); return instances; }
public TrialInstances buildVarInstances(InferenceRule ir, List allVars) { FastVector attributes = new FastVector(); WekaInterface.addAllPairs(attributes, allVars); attributes.addElement(new weka.core.Attribute("score")); int capacity = 30; OrderTranslator filter = new FilterTranslator(allVars); TrialInstances data = new TrialInstances("Var Ordering Constraints", attributes, capacity); if (allVars.size() <= 1) return data; for (Iterator i = allTrials.iterator(); i.hasNext();) { EpisodeCollection tc2 = (EpisodeCollection) i.next(); InferenceRule ir2 = tc2.getRule(solver); if (ir != ir2) continue; addToInstances(data, tc2, filter); } data.setClassIndex(data.numAttributes() - 1); return data; }
/** * Checks basic estimation of one attribute of the scheme, for simple non-troublesome * datasets. * * @param attrTypes the types the estimator can work with * @param classType the class type (NOMINAL, NUMERIC, etc.) * @return index 0 is true if the test was passed, index 1 is true if test * was acceptable */ protected boolean[] canEstimate(AttrTypes attrTypes, boolean supervised, int classType) { // supervised is ignored, no supervised estimators used yet print("basic estimation"); printAttributeSummary(attrTypes, classType); print("..."); FastVector accepts = new FastVector(); accepts.addElement("nominal"); accepts.addElement("numeric"); accepts.addElement("string"); accepts.addElement("date"); accepts.addElement("relational"); accepts.addElement("not in classpath"); int numTrain = getNumInstances(), numTest = getNumInstances(), numClasses = 2, missingLevel = 0; boolean attributeMissing = false, classMissing = false; int numAtts = 1, attrIndex = 0; return runBasicTest(attrTypes, numAtts, attrIndex, classType, missingLevel, attributeMissing, classMissing, numTrain, numTest, numClasses, accepts); }
/** * generates the header * * @return the header */ private Instances makeHeader() { FastVector fv = new FastVector(); fv.addElement(new Attribute(TRUE_POS_NAME)); fv.addElement(new Attribute(FALSE_NEG_NAME)); fv.addElement(new Attribute(FALSE_POS_NAME)); fv.addElement(new Attribute(TRUE_NEG_NAME)); fv.addElement(new Attribute(FP_RATE_NAME)); fv.addElement(new Attribute(TP_RATE_NAME)); fv.addElement(new Attribute(PRECISION_NAME)); fv.addElement(new Attribute(RECALL_NAME)); fv.addElement(new Attribute(FALLOUT_NAME)); fv.addElement(new Attribute(FMEASURE_NAME)); fv.addElement(new Attribute(SAMPLE_SIZE_NAME)); fv.addElement(new Attribute(LIFT_NAME)); fv.addElement(new Attribute(THRESHOLD_NAME)); return new Instances(RELATION_NAME, fv, 100); }
private Instances getInstances() { Attribute rleRateAttr = new Attribute(ARFFAttributes.DATA_COMPRESSION_RATE_BY_RLE_ATTRIBUTE); Attribute linesAttr = new Attribute(ARFFAttributes.NUMBER_OF_LINES_ATTRIBUTE); Attribute sizeAttr = new Attribute(ARFFAttributes.TEXT_SIZE_ATTRIBUTE); Attribute ngramAttr = new Attribute(ARFFAttributes.NUMBER_OF_NGRAMS_ATTRIBUTE); // Create nominal attribute "class" FastVector my_nominal_values = new FastVector(2); my_nominal_values.addElement(ARFFAttributes.ASCIIART_CLASS_NAME); my_nominal_values.addElement(ARFFAttributes.NON_ASCIIART_CLASS_NAME); Attribute className = new Attribute(ARFFAttributes.CLASS_ATTRIBUTE, my_nominal_values); FastVector attributes = new FastVector(5); attributes.addElement(rleRateAttr); attributes.addElement(linesAttr); attributes.addElement(sizeAttr); attributes.addElement(ngramAttr); attributes.addElement(className); // Create the empty dataset "textart" with above attributes Instances instances = new Instances(ARFFAttributes.RELATION_NAME, attributes, 0); instances.setClassIndex(className.index()); return instances; }
/** * Calculates the cumulative margin distribution for the set of * predictions, returning the result as a set of Instances. The * structure of these Instances is as follows:<p> <ul> * <li> <b>Margin</b> contains the margin value (which should be plotted * as an x-coordinate) * <li> <b>Current</b> contains the count of instances with the current * margin (plot as y axis) * <li> <b>Cumulative</b> contains the count of instances with margin * less than or equal to the current margin (plot as y axis) * </ul> <p> * * @return datapoints as a set of instances, null if no predictions * have been made. */ public Instances getCurve(FastVector predictions) { if (predictions.size() == 0) { return null; } Instances insts = makeHeader(); double [] margins = getMargins(predictions); int [] sorted = Utils.sort(margins); int binMargin = 0; int totalMargin = 0; insts.add(makeInstance(-1, binMargin, totalMargin)); for (int i = 0; i < sorted.length; i++) { double current = margins[sorted[i]]; double weight = ((NominalPrediction)predictions.elementAt(sorted[i])) .weight(); totalMargin += weight; binMargin += weight; if (true) { insts.add(makeInstance(current, binMargin, totalMargin)); binMargin = 0; } } return insts; }
/** * This method is an implementation for LayoutCompleteEventListener class. * It sets the size appropriate for m_GraphPanel GraphPanel and and revalidates it's * container JScrollPane once a LayoutCompleteEvent is received from the * LayoutEngine. Also, it updates positions of the Bayesian network stored * in m_BayesNet. */ public void layoutCompleted(LayoutCompleteEvent le) { LayoutEngine layoutEngine = m_layoutEngine; // (LayoutEngine) le.getSource(); FastVector nPosX = new FastVector(m_BayesNet.getNrOfNodes()); FastVector nPosY = new FastVector(m_BayesNet.getNrOfNodes()); for (int iNode = 0; iNode < layoutEngine.getNodes().size(); iNode++) { GraphNode gNode = (GraphNode) layoutEngine.getNodes().elementAt(iNode); if (gNode.nodeType == GraphNode.NORMAL) { nPosX.addElement(gNode.x); nPosY.addElement(gNode.y); } } m_BayesNet.layoutGraph(nPosX, nPosY); m_jStatusBar.setText("Graph layed out"); a_undo.setEnabled(true); a_redo.setEnabled(false); setAppropriateSize(); m_GraphPanel.invalidate(); m_jScrollPane.revalidate(); m_GraphPanel.repaint(); }
/** * Sets the format of the input instances. * * @param instanceInfo an Instances object containing the input instance * structure (any instances contained in the object are ignored - only the * structure is required). * @return true if the outputFormat may be collected immediately * @throws Exception if a problem occurs setting the input format */ public boolean setInputFormat(Instances instanceInfo) throws Exception { super.setInputFormat(instanceInfo); FastVector attributes = new FastVector(); int outputClass = -1; m_SelectedAttributes = determineIndices(instanceInfo.numAttributes()); for (int i = 0; i < m_SelectedAttributes.length; i++) { int current = m_SelectedAttributes[i]; if (instanceInfo.classIndex() == current) { outputClass = attributes.size(); } Attribute keep = (Attribute)instanceInfo.attribute(current).copy(); attributes.addElement(keep); } initInputLocators(instanceInfo, m_SelectedAttributes); Instances outputFormat = new Instances(instanceInfo.relationName(), attributes, 0); outputFormat.setClassIndex(outputClass); setOutputFormat(outputFormat); return true; }
/** * Tests the CostCurve generation from the command line. * The classifier is currently hardcoded. Pipe in an arff file. * * @param args currently ignored */ public static void main(String [] args) { try { Instances inst = new Instances(new java.io.InputStreamReader(System.in)); inst.setClassIndex(inst.numAttributes() - 1); CostCurve cc = new CostCurve(); EvaluationUtils eu = new EvaluationUtils(); Classifier classifier = new weka.classifiers.functions.Logistic(); FastVector predictions = new FastVector(); for (int i = 0; i < 2; i++) { // Do two runs. eu.setSeed(i); predictions.appendElements(eu.getCVPredictions(classifier, inst, 10)); //System.out.println("\n\n\n"); } Instances result = cc.getCurve(predictions); System.out.println(result); } catch (Exception ex) { ex.printStackTrace(); } }
/** * Set the output format. Changes the format of the specified date * attribute. */ private void setOutputFormat() { // Create new attributes FastVector newAtts = new FastVector(getInputFormat().numAttributes()); for (int j = 0; j < getInputFormat().numAttributes(); j++) { Attribute att = getInputFormat().attribute(j); if (j == m_AttIndex.getIndex()) { newAtts.addElement(new Attribute(att.name(), getDateFormat().toPattern())); } else { newAtts.addElement(att.copy()); } } // Create new header Instances newData = new Instances(getInputFormat().relationName(), newAtts, 0); newData.setClassIndex(getInputFormat().classIndex()); m_OutputAttribute = newData.attribute(m_AttIndex.getIndex()); setOutputFormat(newData); }
/** * Static utility function to count the data covered by the * rules after the given index in the given rules, and then * remove them. It returns the data not covered by the * successive rules. * * @param data the data to be processed * @param rules the ruleset * @param index the given index * @return the data after processing */ public static Instances rmCoveredBySuccessives(Instances data, FastVector rules, int index){ Instances rt = new Instances(data, 0); for(int i=0; i < data.numInstances(); i++){ Instance datum = data.instance(i); boolean covered = false; for(int j=index+1; j<rules.size();j++){ Rule rule = (Rule)rules.elementAt(j); if(rule.covers(datum)){ covered = true; break; } } if(!covered) rt.add(datum); } return rt; }
/** constructor, copies Bayesian network structure from a Bayesian network * encapsulated in a BIFReader */ public EditableBayesNet(BIFReader other) { m_Instances = other.m_Instances; m_ParentSets = other.getParentSets(); m_Distributions = other.getDistributions(); int nNodes = getNrOfNodes(); m_nPositionX = new FastVector(nNodes); m_nPositionY = new FastVector(nNodes); for (int i = 0; i < nNodes; i++) { m_nPositionX.addElement(other.m_nPositionX[i]); m_nPositionY.addElement(other.m_nPositionY[i]); } m_nEvidence = new FastVector(nNodes); for (int i = 0; i < nNodes; i++) { m_nEvidence.addElement(-1); } m_fMarginP = new FastVector(nNodes); for (int i = 0; i < nNodes; i++) { double[] P = new double[getCardinality(i)]; m_fMarginP.addElement(P); } clearUndoStack(); }
/** * Runs a text on the datasets with the given characteristics. * * @param attrTypes attribute types that can be estimated * @param numAtts number of attributes * @param attrIndex attribute index * @param classType the class type (NUMERIC, NOMINAL, etc.) * @param missingLevel the percentage of missing values * @param attributeMissing true if the missing values may be in * the attributes * @param classMissing true if the missing values may be in the class * @param numTrain the number of instances in the training set * @param numTest the number of instaces in the test set * @param numClasses the number of classes * @param accepts the acceptable string in an exception * @return index 0 is true if the test was passed, index 1 is true if test * was acceptable */ protected boolean[] runBasicTest(AttrTypes attrTypes, int numAtts, int attrIndex, int classType, int missingLevel, boolean attributeMissing, boolean classMissing, int numTrain, int numTest, int numClasses, FastVector accepts) { return runBasicTest(attrTypes, numAtts, attrIndex, classType, TestInstances.CLASS_IS_LAST, missingLevel, attributeMissing, classMissing, numTrain, numTest, numClasses, accepts); }
/** change the name of a value of a node * @param nTargetNode index of the node to set name for * @param sValue current name of the value * @param sNewValue new name of the value */ public void renameNodeValue(int nTargetNode, String sValue, String sNewValue) { // update undo stack if (m_bNeedsUndoAction) { addUndoAction(new RenameValueAction(nTargetNode, sValue, sNewValue)); } Attribute att = m_Instances.attribute(nTargetNode); int nCardinality = att.numValues(); FastVector values = new FastVector(nCardinality); for (int iValue = 0; iValue < nCardinality; iValue++) { if (att.value(iValue).equals(sValue)) { values.addElement(sNewValue); } else { values.addElement(att.value(iValue)); } } replaceAtt(nTargetNode, att.name(), values); }
/** align set of nodes with the left most node in the list * @param nodes list of indexes of nodes to align */ public void alignLeft(FastVector nodes) { // update undo stack if (m_bNeedsUndoAction) { addUndoAction(new alignLeftAction(nodes)); } int nMinX = -1; for (int iNode = 0; iNode < nodes.size(); iNode++) { int nX = getPositionX((Integer) nodes.elementAt(iNode)); if (nX < nMinX || iNode == 0) { nMinX = nX; } } for (int iNode = 0; iNode < nodes.size(); iNode++) { int nNode = (Integer) nodes.elementAt(iNode); m_nPositionX.setElementAt(nMinX, nNode); } }
/** align set of nodes with the right most node in the list * @param nodes list of indexes of nodes to align */ public void alignRight(FastVector nodes) { // update undo stack if (m_bNeedsUndoAction) { addUndoAction(new alignRightAction(nodes)); } int nMaxX = -1; for (int iNode = 0; iNode < nodes.size(); iNode++) { int nX = getPositionX((Integer) nodes.elementAt(iNode)); if (nX > nMaxX || iNode == 0) { nMaxX = nX; } } for (int iNode = 0; iNode < nodes.size(); iNode++) { int nNode = (Integer) nodes.elementAt(iNode); m_nPositionX.setElementAt(nMaxX, nNode); } }
/** align set of nodes with the top most node in the list * @param nodes list of indexes of nodes to align */ public void alignTop(FastVector nodes) { // update undo stack if (m_bNeedsUndoAction) { addUndoAction(new alignTopAction(nodes)); } int nMinY = -1; for (int iNode = 0; iNode < nodes.size(); iNode++) { int nY = getPositionY((Integer) nodes.elementAt(iNode)); if (nY < nMinY || iNode == 0) { nMinY = nY; } } for (int iNode = 0; iNode < nodes.size(); iNode++) { int nNode = (Integer) nodes.elementAt(iNode); m_nPositionY.setElementAt(nMinY, nNode); } }
/** space out set of nodes evenly between top and bottom most node in the list * @param nodes list of indexes of nodes to space out */ public void spaceVertical(FastVector nodes) { // update undo stack if (m_bNeedsUndoAction) { addUndoAction(new spaceVerticalAction(nodes)); } int nMinY = -1; int nMaxY = -1; for (int iNode = 0; iNode < nodes.size(); iNode++) { int nY = getPositionY((Integer) nodes.elementAt(iNode)); if (nY < nMinY || iNode == 0) { nMinY = nY; } if (nY > nMaxY || iNode == 0) { nMaxY = nY; } } for (int iNode = 0; iNode < nodes.size(); iNode++) { int nNode = (Integer) nodes.elementAt(iNode); m_nPositionY.setElementAt((int) (nMinY + iNode * (nMaxY - nMinY) / (nodes.size() - 1.0)), nNode); } }
/** Constructor */ public Plot2D() { super(); setProperties(); this.setBackground(m_backgroundColour); m_drawnPoints = new int[this.getWidth()][this.getHeight()]; /** Set up some default colours */ m_colorList = new FastVector(10); for (int noa = m_colorList.size(); noa < 10; noa++) { Color pc = m_DefaultColors[noa % 10]; int ija = noa / 10; ija *= 2; for (int j=0;j<ija;j++) { pc = pc.darker(); } m_colorList.addElement(pc); } }
DelValueAction(int nTargetNode, String sValue) { try { m_nTargetNode = nTargetNode; m_sValue = sValue; m_att = m_Instances.attribute(nTargetNode); SerializedObject so = new SerializedObject(m_Distributions[nTargetNode]); m_CPT = (Estimator[]) so.getObject(); ; m_children = new FastVector(); for (int iNode = 0; iNode < getNrOfNodes(); iNode++) { if (m_ParentSets[iNode].contains(nTargetNode)) { m_children.addElement(iNode); } } m_childAtts = new Estimator[m_children.size()][]; for (int iChild = 0; iChild < m_children.size(); iChild++) { int nChild = (Integer) m_children.elementAt(iChild); m_childAtts[iChild] = m_Distributions[nChild]; } } catch (Exception e) { e.printStackTrace(); } }
IdentifiedInstances<Element> createInstances() throws IOException { if (!isBagsInitialized()) initializeBags(); FastVector attrVector = new FastVector(attributes.size()); for (AttributeDefinition ad : attributes) attrVector.addElement(ad.getAttribute()); IdentifiedInstances<Element> result = new IdentifiedInstances<Element>(name, attrVector, 0); result.setClassIndex(classAttributeIndex); return result; }
@Override public Dataset mapStringToModel(JsonRequest request) throws ParseException { if(request != null && request.getData() != null && request.getData().length > 0) { FastVector fvWekaAttributes = new FastVector(2); FastVector nil = null; Attribute attr0 = new Attribute("text",nil, 0); FastVector fv = new FastVector(); for(String nominal : request.getClassVars()) { fv.addElement(nominal); } Attribute attr1 = new Attribute("class", fv,1); fvWekaAttributes.addElement(attr0); fvWekaAttributes.addElement(attr1); Instances ins = new Instances("attr-reln", fvWekaAttributes, request.getData().length); ins.setClassIndex(1); for(Text s : request.getData()) { Instance i = new Instance(2); i.setValue(attr0, s.getText()); i.setValue(attr1, s.getTclass()); ins.add(i); } return new Dataset(ins); } return null; }
/** * Randomly downsample the predictions * * @param retain the fraction of the predictions to retain * @param seed the random seed to use * @throws Exception if a problem occurs */ @SuppressWarnings({ "cast", "deprecation" }) public void prunePredictions(double retain, long seed) throws Exception { if (m_Predictions == null || m_Predictions.size() == 0 || retain == 1) { return; } int numToRetain = (int) (retain * m_Predictions.size()); if (numToRetain < 1) { numToRetain = 1; } Random r = new Random(seed); for (int i = 0; i < 50; i++) { r.nextInt(); } FastVector<Prediction> downSampled = new FastVector<Prediction>(numToRetain); FastVector<Prediction> tmpV = new FastVector<Prediction>(); tmpV.addAll(m_Predictions); for (int i = m_Predictions.size() - 1; i >= 0; i--) { int index = r.nextInt(i + 1); // downSampled.addElement(m_Predictions.elementAt(index)); // cast necessary for 3.7.10 compatibility downSampled.add(tmpV.get(index)); // downSampled.add(m_Predictions.get(index)); if (downSampled.size() == numToRetain) { break; } // m_Predictions.swap(i, index); tmpV.swap(i, index); } m_Predictions = downSampled; }
/** * Creates the weka data set for clustering of samples * * @param rawData * Data extracted from selected Raw data files and rows. * @return Weka library data set */ private Instances createSampleWekaDataset(double[][] rawData) { FastVector attributes = new FastVector(); for (int i = 0; i < rawData[0].length; i++) { String varName = "Var" + i; Attribute var = new Attribute(varName); attributes.addElement(var); } if (clusteringStep.getModule().getClass() .equals(HierarClusterer.class)) { Attribute name = new Attribute("name", (FastVector) null); attributes.addElement(name); } Instances data = new Instances("Dataset", attributes, 0); for (int i = 0; i < rawData.length; i++) { double[] values = new double[data.numAttributes()]; System.arraycopy(rawData[i], 0, values, 0, rawData[0].length); if (clusteringStep.getModule().getClass() .equals(HierarClusterer.class)) { values[data.numAttributes() - 1] = data.attribute("name") .addStringValue(this.selectedRawDataFiles[i].getName()); } Instance inst = new SparseInstance(1.0, values); data.add(inst); } return data; }
/** * Determines the output format based on the input format and returns * this. In case the output format cannot be returned immediately, i.e., * immediateOutputFormat() returns false, then this method will be called * from batchFinished(). * * @param inputFormat the input format to base the output format on * @return the output format * @throws Exception in case the determination goes wrong * @see #hasImmediateOutputFormat() * @see #batchFinished() */ protected Instances determineOutputFormat(Instances inputFormat) throws Exception { // generate header FastVector atts = new FastVector(); String prefix = getAlgorithm().getSelectedTag().getReadable(); for (int i = 0; i < getNumComponents(); i++) atts.addElement(new Attribute(prefix + "_" + (i+1))); atts.addElement(new Attribute("Class")); Instances result = new Instances(prefix, atts, 0); result.setClassIndex(result.numAttributes() - 1); return result; }
/** * Checks whether the scheme can handle zero training instances. * * @param nominalPredictor if true use nominal predictor attributes * @param numericPredictor if true use numeric predictor attributes * @param stringPredictor if true use string predictor attributes * @param datePredictor if true use date predictor attributes * @param relationalPredictor if true use relational predictor attributes * @param multiInstance whether multi-instance is needed * @return index 0 is true if the test was passed, index 1 is true if test * was acceptable */ protected boolean[] canHandleZeroTraining( boolean nominalPredictor, boolean numericPredictor, boolean stringPredictor, boolean datePredictor, boolean relationalPredictor, boolean multiInstance) { print("handle zero training instances"); printAttributeSummary( nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance); print("..."); FastVector accepts = new FastVector(); accepts.addElement("train"); accepts.addElement("value"); int numTrain = 0, missingLevel = 0; boolean predictorMissing = false; return runBasicTest( nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, missingLevel, predictorMissing, numTrain, accepts); }
/** * Checks basic missing value handling of the scheme. If the missing * values cause an exception to be thrown by the scheme, this will be * recorded. * * @param nominalPredictor if true use nominal predictor attributes * @param numericPredictor if true use numeric predictor attributes * @param stringPredictor if true use string predictor attributes * @param datePredictor if true use date predictor attributes * @param relationalPredictor if true use relational predictor attributes * @param multiInstance whether multi-instance is needed * @param predictorMissing true if the missing values may be in * the predictors * @param missingLevel the percentage of missing values * @return index 0 is true if the test was passed, index 1 is true if test * was acceptable */ protected boolean[] canHandleMissing( boolean nominalPredictor, boolean numericPredictor, boolean stringPredictor, boolean datePredictor, boolean relationalPredictor, boolean multiInstance, boolean predictorMissing, int missingLevel) { if (missingLevel == 100) print("100% "); print("missing"); if (predictorMissing) { print(" predictor"); } print(" values"); printAttributeSummary( nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance); print("..."); FastVector accepts = new FastVector(); accepts.addElement("missing"); accepts.addElement("value"); accepts.addElement("train"); int numTrain = getNumInstances(); return runBasicTest(nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, missingLevel, predictorMissing, numTrain, accepts); }
/** * Set the shape type for the plot data * @param st a FastVector of integers corresponding to shape types (see * constants defined in Plot2D) */ public void setShapeType(FastVector st) throws Exception { if (st.size() != m_plotInstances.numInstances()) { throw new Exception("PlotData2D: Shape type vector must have the same " +"number of entries as number of data points!"); } m_shapeType = new int [st.size()]; for (int i = 0; i < st.size(); i++) { m_shapeType[i] = ((Integer)st.elementAt(i)).intValue(); if (m_shapeType[i] == Plot2D.ERROR_SHAPE) { m_shapeSize[i] = 3; } } }
/** * Sorts the instances in the dataset by the run number. * * @param runColumn a value of type 'int' */ public void sort(int runColumn) { double [] runNums = new double [m_Dataset.size()]; for (int j = 0; j < runNums.length; j++) { runNums[j] = ((Instance) m_Dataset.elementAt(j)).value(runColumn); } int [] index = Utils.stableSort(runNums); FastVector newDataset = new FastVector(runNums.length); for (int j = 0; j < index.length; j++) { newDataset.addElement(m_Dataset.elementAt(index[j])); } m_Dataset = newDataset; }
/** * Clears all plots */ public void removeAllPlots() { m_masterPlot = null; m_plotInstances = null; m_plots = new FastVector(); m_xIndex = 0; m_yIndex = 0; m_cIndex = 0; }
FastVector selectElements(Node item, String sElement) throws Exception { NodeList children = item.getChildNodes(); FastVector nodelist = new FastVector(); for (int iNode = 0; iNode < children.getLength(); iNode++) { Node node = children.item(iNode); if ((node.getNodeType() == Node.ELEMENT_NODE) && node.getNodeName().equals(sElement)) { nodelist.addElement(node); } } return nodelist; }
/** * Generates rules out of item sets * @param minConfidence the minimum confidence * @param noPrune flag indicating whether the rules are pruned accoridng to the minimum confidence value * @return a set of rules */ public final FastVector[] generateRules(double minConfidence, boolean noPrune) { FastVector premises = new FastVector(),consequences = new FastVector(), conf = new FastVector(); FastVector[] rules = new FastVector[3]; ItemSet premise, consequence; // Generate all rules with class in the consequence. premise = new ItemSet(m_totalTransactions); consequence = new ItemSet(m_totalTransactions); int[] premiseItems = new int[m_items.length]; int[] consequenceItems = new int[1]; System.arraycopy(m_items, 0, premiseItems, 0, m_items.length); consequence.setItem(consequenceItems); premise.setItem(premiseItems); consequence.setItemAt(m_classLabel,0); consequence.setCounter(this.m_ruleSupCounter); premise.setCounter(this.m_counter); premises.addElement(premise); consequences.addElement(consequence); conf.addElement(new Double((double)this.m_ruleSupCounter/(double)this.m_counter)); rules[0] = premises; rules[1] = consequences; rules[2] = conf; if(!noPrune) pruneRules(rules, minConfidence); return rules; }
/** * Checks basic prediction of the scheme, for simple non-troublesome * datasets. * * @param nominalPredictor if true use nominal predictor attributes * @param numericPredictor if true use numeric predictor attributes * @param stringPredictor if true use string predictor attributes * @param datePredictor if true use date predictor attributes * @param relationalPredictor if true use relational predictor attributes * @param multiInstance whether multi-instance is needed * @param classType the class type (NOMINAL, NUMERIC, etc.) * @return index 0 is true if the test was passed, index 1 is true if test * was acceptable */ protected boolean[] canPredict( boolean nominalPredictor, boolean numericPredictor, boolean stringPredictor, boolean datePredictor, boolean relationalPredictor, boolean multiInstance, int classType) { print("basic predict"); printAttributeSummary( nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType); print("..."); FastVector accepts = new FastVector(); accepts.addElement("any"); accepts.addElement("unary"); accepts.addElement("binary"); accepts.addElement("nominal"); accepts.addElement("numeric"); accepts.addElement("string"); accepts.addElement("date"); accepts.addElement("relational"); accepts.addElement("multi-instance"); accepts.addElement("not in classpath"); int numTrain = getNumInstances(), numClasses = 2, missingLevel = 0; boolean predictorMissing = false, classMissing = false; return runBasicTest(nominalPredictor, numericPredictor, stringPredictor, datePredictor, relationalPredictor, multiInstance, classType, missingLevel, predictorMissing, classMissing, numTrain, numClasses, accepts); }