/** * Construct a ScoreDistribution entry * * @param scoreE the node containing the distribution * @param miningSchema the mining schema * @param baseCount the number of records at the node that owns this * distribution entry * @throws Exception if something goes wrong */ protected ScoreDistribution(Element scoreE, MiningSchema miningSchema, double baseCount) throws Exception { // get the label m_classLabel = scoreE.getAttribute("value"); Attribute classAtt = miningSchema.getFieldsAsInstances().classAttribute(); if (classAtt == null || classAtt.indexOfValue(m_classLabel) < 0) { throw new Exception( "[ScoreDistribution] class attribute not set or class value " + m_classLabel + " not found!"); } m_classLabelIndex = classAtt.indexOfValue(m_classLabel); // get the frequency String recordC = scoreE.getAttribute("recordCount"); m_recordCount = Double.parseDouble(recordC); // get the optional confidence String confidence = scoreE.getAttribute("confidence"); if (confidence != null && confidence.length() > 0) { m_confidence = Double.parseDouble(confidence); } else if (!Utils.isMissingValue(baseCount) && baseCount > 0) { m_confidence = m_recordCount / baseCount; } }
protected NeuralInput(Element input, MiningSchema miningSchema) throws Exception { m_ID = input.getAttribute("id"); NodeList fL = input.getElementsByTagName("DerivedField"); if (fL.getLength() != 1) { throw new Exception("[NeuralInput] expecting just one derived field!"); } Element dF = (Element)fL.item(0); Instances allFields = miningSchema.getFieldsAsInstances(); ArrayList<Attribute> fieldDefs = new ArrayList<Attribute>(); for (int i = 0; i < allFields.numAttributes(); i++) { fieldDefs.add(allFields.attribute(i)); } m_field = new DerivedFieldMetaInfo(dF, fieldDefs, miningSchema.getTransformationDictionary()); }
/** * Construct a ScoreDistribution entry * * @param scoreE the node containing the distribution * @param miningSchema the mining schema * @param baseCount the number of records at the node that owns this * distribution entry * @throws Exception if something goes wrong */ protected ScoreDistribution(Element scoreE, MiningSchema miningSchema, double baseCount) throws Exception { // get the label m_classLabel = scoreE.getAttribute("value"); Attribute classAtt = miningSchema.getFieldsAsInstances().classAttribute(); if (classAtt == null || classAtt.indexOfValue(m_classLabel) < 0) { throw new Exception("[ScoreDistribution] class attribute not set or class value " + m_classLabel + " not found!"); } m_classLabelIndex = classAtt.indexOfValue(m_classLabel); // get the frequency String recordC = scoreE.getAttribute("recordCount"); m_recordCount = Double.parseDouble(recordC); // get the optional confidence String confidence = scoreE.getAttribute("confidence"); if (confidence != null && confidence.length() > 0) { m_confidence = Double.parseDouble(confidence); } else if (!Utils.isMissingValue(baseCount) && baseCount > 0) { m_confidence = m_recordCount / baseCount; } }
/** * Factory method to return the appropriate predicate for a given node in * the tree. * * @param nodeE the XML node encapsulating the tree node. * @param miningSchema the mining schema in use * @return a Predicate * @throws Exception of something goes wrong. */ static Predicate getPredicate(Element nodeE, MiningSchema miningSchema) throws Exception { Predicate result = null; NodeList children = nodeE.getChildNodes(); for (int i = 0; i < children.getLength(); i++) { Node child = children.item(i); if (child.getNodeType() == Node.ELEMENT_NODE) { String tagName = ((Element) child).getTagName(); if (tagName.equals("True")) { result = new True(); break; } else if (tagName.equals("False")) { result = new False(); break; } else if (tagName.equals("SimplePredicate")) { result = new SimplePredicate((Element) child, miningSchema); break; } else if (tagName.equals("CompoundPredicate")) { result = new CompoundPredicate((Element) child, miningSchema); break; } else if (tagName.equals("SimpleSetPredicate")) { result = new SimpleSetPredicate((Element) child, miningSchema); break; } } } if (result == null) { throw new Exception( "[Predicate] unknown or missing predicate type in node"); } return result; }
public CompoundPredicate(Element compoundP, MiningSchema miningSchema) throws Exception { // Instances totalStructure = miningSchema.getFieldsAsInstances(); String booleanOpp = compoundP.getAttribute("booleanOperator"); for (BooleanOperator b : BooleanOperator.values()) { if (b.toString().equals(booleanOpp)) { m_booleanOperator = b; } } // now get all the encapsulated operators NodeList children = compoundP.getChildNodes(); for (int i = 0; i < children.getLength(); i++) { Node child = children.item(i); if (child.getNodeType() == Node.ELEMENT_NODE) { String tagName = ((Element) child).getTagName(); if (tagName.equals("True")) { m_components.add(new True()); } else if (tagName.equals("False")) { m_components.add(new False()); } else if (tagName.equals("SimplePredicate")) { m_components .add(new SimplePredicate((Element) child, miningSchema)); } else if (tagName.equals("CompoundPredicate")) { m_components.add(new CompoundPredicate((Element) child, miningSchema)); } else { m_components.add(new SimpleSetPredicate((Element) child, miningSchema)); } } } }
private void getChildNodes(Element nodeE, MiningSchema miningSchema) throws Exception { NodeList children = nodeE.getChildNodes(); for (int i = 0; i < children.getLength(); i++) { Node child = children.item(i); if (child.getNodeType() == Node.ELEMENT_NODE) { String tagName = ((Element) child).getTagName(); if (tagName.equals("Node")) { TreeNode tempN = new TreeNode((Element) child, miningSchema); m_childNodes.add(tempN); } } } }
/** * Constructor for a RuleSetModel * * @param model the XML element encapsulating the RuleSetModel * @param dataDictionary the data dictionary to use * @param miningSchema the mining schema to use * @throws Exception if something goes wrong */ public RuleSetModel(Element model, Instances dataDictionary, MiningSchema miningSchema) throws Exception { super(dataDictionary, miningSchema); if (!getPMMLVersion().equals("3.2")) { // TODO: might have to throw an exception and only support 3.2 } String fn = model.getAttribute("functionName"); if (fn.equals("regression")) { m_functionType = MiningFunction.REGRESSION; } String modelName = model.getAttribute("modelName"); if (modelName != null && modelName.length() > 0) { m_modelName = modelName; } String algoName = model.getAttribute("algorithmName"); if (algoName != null && algoName.length() > 0) { m_algorithmName = algoName; } NodeList ruleset = model.getElementsByTagName("RuleSet"); if (ruleset.getLength() == 1) { Node ruleSetNode = ruleset.item(0); if (ruleSetNode.getNodeType() == Node.ELEMENT_NODE) { m_ruleSet = new RuleSet((Element)ruleSetNode, miningSchema); } } else { throw new Exception ("[RuleSetModel] Should only have a single RuleSet!"); } }
/** * Constructs a new PMML Regression. * * @param model the <code>Element</code> containing the regression model * @param dataDictionary the data dictionary as an Instances object * @param miningSchema the mining schema * @throws Exception if there is a problem constructing this Regression */ public Regression(Element model, Instances dataDictionary, MiningSchema miningSchema) throws Exception { super(dataDictionary, miningSchema); int functionType = RegressionTable.REGRESSION; // determine function name first String fName = model.getAttribute("functionName"); if (fName.equals("regression")) { functionType = RegressionTable.REGRESSION; } else if (fName.equals("classification")) { functionType = RegressionTable.CLASSIFICATION; } else { throw new Exception("[PMML Regression] Function name not defined in pmml!"); } // do we have an algorithm name? String algName = model.getAttribute("algorithmName"); if (algName != null && algName.length() > 0) { m_algorithmName = algName; } // determine normalization method (if any) m_normalizationMethod = determineNormalization(model); setUpRegressionTables(model, functionType); // convert any string attributes in the mining schema //miningSchema.convertStringAttsToNominal(); }
/** * Factory method to return the appropriate predicate for * a given node in the tree. * * @param nodeE the XML node encapsulating the tree node. * @param miningSchema the mining schema in use * @return a Predicate * @throws Exception of something goes wrong. */ static Predicate getPredicate(Element nodeE, MiningSchema miningSchema) throws Exception { Predicate result = null; NodeList children = nodeE.getChildNodes(); for (int i = 0; i < children.getLength(); i++) { Node child = children.item(i); if (child.getNodeType() == Node.ELEMENT_NODE) { String tagName = ((Element)child).getTagName(); if (tagName.equals("True")) { result = new True(); break; } else if (tagName.equals("False")) { result = new False(); break; } else if (tagName.equals("SimplePredicate")) { result = new SimplePredicate((Element)child, miningSchema); break; } else if (tagName.equals("CompoundPredicate")) { result = new CompoundPredicate((Element)child, miningSchema); break; } else if (tagName.equals("SimpleSetPredicate")) { result = new SimpleSetPredicate((Element)child, miningSchema); break; } } } if (result == null) { throw new Exception("[Predicate] unknown or missing predicate type in node"); } return result; }
public CompoundPredicate(Element compoundP, MiningSchema miningSchema) throws Exception { // Instances totalStructure = miningSchema.getFieldsAsInstances(); String booleanOpp = compoundP.getAttribute("booleanOperator"); for (BooleanOperator b : BooleanOperator.values()) { if (b.toString().equals(booleanOpp)) { m_booleanOperator = b; } } // now get all the encapsulated operators NodeList children = compoundP.getChildNodes(); for (int i = 0; i < children.getLength(); i++) { Node child = children.item(i); if (child.getNodeType() == Node.ELEMENT_NODE) { String tagName = ((Element)child).getTagName(); if (tagName.equals("True")) { m_components.add(new True()); } else if (tagName.equals("False")) { m_components.add(new False()); } else if (tagName.equals("SimplePredicate")) { m_components.add(new SimplePredicate((Element)child, miningSchema)); } else if (tagName.equals("CompoundPredicate")) { m_components.add(new CompoundPredicate((Element)child, miningSchema)); } else { m_components.add(new SimpleSetPredicate((Element)child, miningSchema)); } } } }
private void getChildNodes(Element nodeE, MiningSchema miningSchema) throws Exception { NodeList children = nodeE.getChildNodes(); for (int i = 0; i < children.getLength(); i++) { Node child = children.item(i); if (child.getNodeType() == Node.ELEMENT_NODE) { String tagName = ((Element)child).getTagName(); if (tagName.equals("Node")) { TreeNode tempN = new TreeNode((Element)child, miningSchema); m_childNodes.add(tempN); } } } }
public SimplePredicate(Element simpleP, MiningSchema miningSchema) throws Exception { Instances totalStructure = miningSchema.getFieldsAsInstances(); // get the field name and set up the index String fieldS = simpleP.getAttribute("field"); Attribute att = totalStructure.attribute(fieldS); if (att == null) { throw new Exception("[SimplePredicate] unable to find field " + fieldS + " in the incoming instance structure!"); } // find the index int index = -1; for (int i = 0; i < totalStructure.numAttributes(); i++) { if (totalStructure.attribute(i).name().equals(fieldS)) { index = i; m_fieldName = totalStructure.attribute(i).name(); break; } } m_fieldIndex = index; if (att.isNominal()) { m_isNominal = true; } // get the operator String oppS = simpleP.getAttribute("operator"); for (Operator o : Operator.values()) { if (o.toString().equals(oppS)) { m_operator = o; break; } } if (m_operator != Operator.ISMISSING && m_operator != Operator.ISNOTMISSING) { String valueS = simpleP.getAttribute("value"); if (att.isNumeric()) { m_value = Double.parseDouble(valueS); } else { m_nominalValue = valueS; m_value = att.indexOfValue(valueS); if (m_value < 0) { throw new Exception("[SimplePredicate] can't find value " + valueS + " in nominal " + "attribute " + att.name()); } } } }
public SimpleSetPredicate(Element setP, MiningSchema miningSchema) throws Exception { Instances totalStructure = miningSchema.getFieldsAsInstances(); // get the field name and set up the index String fieldS = setP.getAttribute("field"); Attribute att = totalStructure.attribute(fieldS); if (att == null) { throw new Exception("[SimplePredicate] unable to find field " + fieldS + " in the incoming instance structure!"); } // find the index int index = -1; for (int i = 0; i < totalStructure.numAttributes(); i++) { if (totalStructure.attribute(i).name().equals(fieldS)) { index = i; m_fieldName = totalStructure.attribute(i).name(); break; } } m_fieldIndex = index; if (att.isNominal()) { m_isNominal = true; m_nominalLookup = att; } // need to scan the children looking for an array type NodeList children = setP.getChildNodes(); for (int i = 0; i < children.getLength(); i++) { Node child = children.item(i); if (child.getNodeType() == Node.ELEMENT_NODE) { if (Array.isArray((Element) child)) { // found the array m_set = Array.create((Element) child); break; } } } if (m_set == null) { throw new Exception("[SimpleSetPredictate] couldn't find an " + "array containing the set values!"); } // check array type against field type if (m_set.getType() == Array.ArrayType.STRING && !m_isNominal) { throw new Exception("[SimpleSetPredicate] referenced field " + totalStructure.attribute(m_fieldIndex).name() + " is numeric but array type is string!"); } else if (m_set.getType() != Array.ArrayType.STRING && m_isNominal) { throw new Exception("[SimpleSetPredicate] referenced field " + totalStructure.attribute(m_fieldIndex).name() + " is nominal but array type is numeric!"); } }
protected TreeNode(Element nodeE, MiningSchema miningSchema) throws Exception { Attribute classAtt = miningSchema.getFieldsAsInstances().classAttribute(); // get the ID String id = nodeE.getAttribute("id"); if (id != null && id.length() > 0) { m_ID = id; } // get the score for this node String scoreS = nodeE.getAttribute("score"); if (scoreS != null && scoreS.length() > 0) { m_scoreString = scoreS; // try to parse as a number in case we // are part of a regression tree if (classAtt.isNumeric()) { try { m_scoreNumeric = Double.parseDouble(scoreS); } catch (NumberFormatException ex) { throw new Exception( "[TreeNode] class is numeric but unable to parse score " + m_scoreString + " as a number!"); } } else { // store the index of this class value m_scoreIndex = classAtt.indexOfValue(m_scoreString); if (m_scoreIndex < 0) { throw new Exception( "[TreeNode] can't find match for predicted value " + m_scoreString + " in class attribute!"); } } } // get the record count if defined String recordC = nodeE.getAttribute("recordCount"); if (recordC != null && recordC.length() > 0) { m_recordCount = Double.parseDouble(recordC); } // get the default child (if applicable) String defaultC = nodeE.getAttribute("defaultChild"); if (defaultC != null && defaultC.length() > 0) { m_defaultChildID = defaultC; } // TODO: Embedded model (once we support model composition) // Now get the ScoreDistributions (if any and mining function // is classification) at this level if (m_functionType == MiningFunction.CLASSIFICATION) { getScoreDistributions(nodeE, miningSchema); } // Now get the Predicate m_predicate = Predicate.getPredicate(nodeE, miningSchema); // Now get the child Node(s) getChildNodes(nodeE, miningSchema); // If we have a default child specified, find it now if (m_defaultChildID != null) { for (TreeNode t : m_childNodes) { if (t.getID().equals(m_defaultChildID)) { m_defaultChild = t; break; } } } }
public TreeModel(Element model, Instances dataDictionary, MiningSchema miningSchema) throws Exception { super(dataDictionary, miningSchema); if (!getPMMLVersion().equals("3.2")) { // TODO: might have to throw an exception and only support 3.2 } String fn = model.getAttribute("functionName"); if (fn.equals("regression")) { m_functionType = MiningFunction.REGRESSION; } // get the missing value strategy (if any) String missingVS = model.getAttribute("missingValueStrategy"); if (missingVS != null && missingVS.length() > 0) { for (MissingValueStrategy m : MissingValueStrategy.values()) { if (m.toString().equals(missingVS)) { m_missingValueStrategy = m; break; } } } // get the missing value penalty (if any) String missingP = model.getAttribute("missingValuePenalty"); if (missingP != null && missingP.length() > 0) { // try to parse as a number try { m_missingValuePenalty = Double.parseDouble(missingP); } catch (NumberFormatException ex) { System.err.println("[TreeModel] WARNING: " + "couldn't parse supplied missingValuePenalty as a number"); } } String splitC = model.getAttribute("splitCharacteristic"); if (splitC != null && splitC.length() > 0) { for (SplitCharacteristic s : SplitCharacteristic.values()) { if (s.toString().equals(splitC)) { m_splitCharacteristic = s; break; } } } // find the root node of the tree NodeList children = model.getChildNodes(); for (int i = 0; i < children.getLength(); i++) { Node child = children.item(i); if (child.getNodeType() == Node.ELEMENT_NODE) { String tagName = ((Element) child).getTagName(); if (tagName.equals("Node")) { m_root = new TreeNode((Element) child, miningSchema); break; } } } }
protected NeuralOutputs(Element outputs, MiningSchema miningSchema) throws Exception { m_classAttribute = miningSchema.getMiningSchemaAsInstances().classAttribute(); int vals = (m_classAttribute.isNumeric()) ? 1 : m_classAttribute.numValues(); m_outputNeurons = new String[vals]; m_categoricalIndexes = new int[vals]; NodeList outputL = outputs.getElementsByTagName("NeuralOutput"); if (outputL.getLength() != m_outputNeurons.length) { throw new Exception("[NeuralOutputs] the number of neural outputs does not match " + "the number expected!"); } for (int i = 0; i < outputL.getLength(); i++) { Node outputN = outputL.item(i); if (outputN.getNodeType() == Node.ELEMENT_NODE) { Element outputE = (Element)outputN; // get the ID for this output neuron m_outputNeurons[i] = outputE.getAttribute("outputNeuron"); if (m_classAttribute.isNumeric()) { // get the single norm continuous NodeList contL = outputE.getElementsByTagName("NormContinuous"); if (contL.getLength() != 1) { throw new Exception("[NeuralOutputs] Should be exactly one norm continuous element " + "for numeric class!"); } Node normContNode = contL.item(0); String attName = ((Element)normContNode).getAttribute("field"); Attribute dummyTargetDef = new Attribute(attName); ArrayList<Attribute> dummyFieldDefs = new ArrayList<Attribute>(); dummyFieldDefs.add(dummyTargetDef); m_regressionMapping = new NormContinuous((Element)normContNode, FieldMetaInfo.Optype.CONTINUOUS, dummyFieldDefs); break; } else { // we just need to grab the categorical value (out of the NormDiscrete element) // that this output neuron is associated with NodeList discL = outputE.getElementsByTagName("NormDiscrete"); if (discL.getLength() != 1) { throw new Exception("[NeuralOutputs] Should be only one norm discrete element " + "per derived field/neural output for a nominal class!"); } Node normDiscNode = discL.item(0); String attValue = ((Element)normDiscNode).getAttribute("value"); int index = m_classAttribute.indexOfValue(attValue); if (index < 0) { throw new Exception("[NeuralOutputs] Can't find specified target value " + attValue + " in class attribute " + m_classAttribute.name()); } m_categoricalIndexes[i] = index; } } } }
public NeuralNetwork(Element model, Instances dataDictionary, MiningSchema miningSchema) throws Exception { super(dataDictionary, miningSchema); String fn = model.getAttribute("functionName"); if (fn.equals("regression")) { m_functionType = MiningFunction.REGRESSION; } String act = model.getAttribute("activationFunction"); if (act == null || act.length() == 0) { throw new Exception("[NeuralNetwork] no activation functon defined"); } // get the activation function for (ActivationFunction a : ActivationFunction.values()) { if (a.toString().equals(act)) { m_activationFunction = a; break; } } // get the normalization method (if specified) String norm = model.getAttribute("normalizationMethod"); if (norm != null && norm.length() > 0) { for (Normalization n : Normalization.values()) { if (n.toString().equals(norm)) { m_normalizationMethod = n; break; } } } String thresh = model.getAttribute("threshold"); if (thresh != null && thresh.length() > 0) { m_threshold = Double.parseDouble(thresh); } String width = model.getAttribute("width"); if (width != null && width.length() > 0) { m_width = Double.parseDouble(width); } String alt = model.getAttribute("altitude"); if (alt != null && alt.length() > 0) { m_altitude = Double.parseDouble(alt); } // get all the inputs NodeList inputL = model.getElementsByTagName("NeuralInput"); m_numberOfInputs = inputL.getLength(); m_inputs = new NeuralInput[m_numberOfInputs]; for (int i = 0; i < m_numberOfInputs; i++) { Node inputN = inputL.item(i); if (inputN.getNodeType() == Node.ELEMENT_NODE) { NeuralInput nI = new NeuralInput((Element)inputN, m_miningSchema); m_inputs[i] = nI; } } // get the layers NodeList layerL = model.getElementsByTagName("NeuralLayer"); m_numberOfLayers = layerL.getLength(); m_layers = new NeuralLayer[m_numberOfLayers]; for (int i = 0; i < m_numberOfLayers; i++) { Node layerN = layerL.item(i); if (layerN.getNodeType() == Node.ELEMENT_NODE) { NeuralLayer nL = new NeuralLayer((Element)layerN); m_layers[i] = nL; } } // get the outputs NodeList outputL = model.getElementsByTagName("NeuralOutputs"); if (outputL.getLength() != 1) { throw new Exception("[NeuralNetwork] Should be just one NeuralOutputs element defined!"); } m_outputs = new NeuralOutputs((Element)outputL.item(0), m_miningSchema); }
public Rule(Element ruleE, MiningSchema miningSchema) throws Exception { // Set up the predicate m_predicate = TreeModel.Predicate.getPredicate(ruleE, miningSchema); }
/** * Construct a regression table from an <code>Element</code> * * @param table the table to encapsulate * @param functionType the type of function * (regression or classification) * to use * @param mSchema the mining schema * @throws Exception if there is a problem while constructing * this regression table */ protected RegressionTable(Element table, int functionType, MiningSchema mSchema) throws Exception { m_miningSchema = mSchema; m_functionType = functionType; Instances miningSchema = m_miningSchema.getFieldsAsInstances(); // get the intercept String intercept = table.getAttribute("intercept"); if (intercept.length() > 0) { m_intercept = Double.parseDouble(intercept); } // get the target category (if classification) if (m_functionType == CLASSIFICATION) { // target category MUST be defined String targetCat = table.getAttribute("targetCategory"); if (targetCat.length() > 0) { Attribute classA = miningSchema.classAttribute(); for (int i = 0; i < classA.numValues(); i++) { if (classA.value(i).equals(targetCat)) { m_targetCategory = i; } } } if (m_targetCategory == -1) { throw new Exception("[RegressionTable] No target categories defined for classification"); } } // read all the numeric predictors NodeList numericPs = table.getElementsByTagName("NumericPredictor"); for (int i = 0; i < numericPs.getLength(); i++) { Node nP = numericPs.item(i); if (nP.getNodeType() == Node.ELEMENT_NODE) { NumericPredictor numP = new NumericPredictor((Element)nP, miningSchema); m_predictors.add(numP); } } // read all the categorical predictors NodeList categoricalPs = table.getElementsByTagName("CategoricalPredictor"); for (int i = 0; i < categoricalPs.getLength(); i++) { Node cP = categoricalPs.item(i); if (cP.getNodeType() == Node.ELEMENT_NODE) { CategoricalPredictor catP = new CategoricalPredictor((Element)cP, miningSchema); m_predictors.add(catP); } } // read all the PredictorTerms NodeList predictorTerms = table.getElementsByTagName("PredictorTerm"); for (int i = 0; i < predictorTerms.getLength(); i++) { Node pT = predictorTerms.item(i); PredictorTerm predT = new PredictorTerm((Element)pT, miningSchema); m_predictorTerms.add(predT); } }
public SimpleSetPredicate(Element setP, MiningSchema miningSchema) throws Exception { Instances totalStructure = miningSchema.getFieldsAsInstances(); // get the field name and set up the index String fieldS = setP.getAttribute("field"); Attribute att = totalStructure.attribute(fieldS); if (att == null) { throw new Exception("[SimplePredicate] unable to find field " + fieldS + " in the incoming instance structure!"); } // find the index int index = -1; for (int i = 0; i < totalStructure.numAttributes(); i++) { if (totalStructure.attribute(i).name().equals(fieldS)) { index = i; m_fieldName = totalStructure.attribute(i).name(); break; } } m_fieldIndex = index; if (att.isNominal()) { m_isNominal = true; m_nominalLookup = att; } // need to scan the children looking for an array type NodeList children = setP.getChildNodes(); for (int i = 0; i < children.getLength(); i++) { Node child = children.item(i); if (child.getNodeType() == Node.ELEMENT_NODE) { if (Array.isArray((Element)child)) { // found the array m_set = Array.create((Element)child); break; } } } if (m_set == null) { throw new Exception("[SimpleSetPredictate] couldn't find an " + "array containing the set values!"); } // check array type against field type if (m_set.getType() == Array.ArrayType.STRING && !m_isNominal) { throw new Exception("[SimpleSetPredicate] referenced field " + totalStructure.attribute(m_fieldIndex).name() + " is numeric but array type is string!"); } else if (m_set.getType() != Array.ArrayType.STRING && m_isNominal) { throw new Exception("[SimpleSetPredicate] referenced field " + totalStructure.attribute(m_fieldIndex).name() + " is nominal but array type is numeric!"); } }
protected TreeNode(Element nodeE, MiningSchema miningSchema) throws Exception { Attribute classAtt = miningSchema.getFieldsAsInstances().classAttribute(); // get the ID String id = nodeE.getAttribute("id"); if (id != null && id.length() > 0) { m_ID = id; } // get the score for this node String scoreS = nodeE.getAttribute("score"); if (scoreS != null && scoreS.length() > 0) { m_scoreString = scoreS; // try to parse as a number in case we // are part of a regression tree if (classAtt.isNumeric()) { try { m_scoreNumeric = Double.parseDouble(scoreS); } catch (NumberFormatException ex) { throw new Exception("[TreeNode] class is numeric but unable to parse score " + m_scoreString + " as a number!"); } } else { // store the index of this class value m_scoreIndex = classAtt.indexOfValue(m_scoreString); if (m_scoreIndex < 0) { throw new Exception("[TreeNode] can't find match for predicted value " + m_scoreString + " in class attribute!"); } } } // get the record count if defined String recordC = nodeE.getAttribute("recordCount"); if (recordC != null && recordC.length() > 0) { m_recordCount = Double.parseDouble(recordC); } // get the default child (if applicable) String defaultC = nodeE.getAttribute("defaultChild"); if (defaultC != null && defaultC.length() > 0) { m_defaultChildID = defaultC; } //TODO: Embedded model (once we support model composition) // Now get the ScoreDistributions (if any and mining function // is classification) at this level if (m_functionType == MiningFunction.CLASSIFICATION) { getScoreDistributions(nodeE, miningSchema); } // Now get the Predicate m_predicate = Predicate.getPredicate(nodeE, miningSchema); // Now get the child Node(s) getChildNodes(nodeE, miningSchema); // If we have a default child specified, find it now if (m_defaultChildID != null) { for (TreeNode t : m_childNodes) { if (t.getID().equals(m_defaultChildID)) { m_defaultChild = t; break; } } } }
public TreeModel(Element model, Instances dataDictionary, MiningSchema miningSchema) throws Exception { super(dataDictionary, miningSchema); if (!getPMMLVersion().equals("3.2")) { // TODO: might have to throw an exception and only support 3.2 } String fn = model.getAttribute("functionName"); if (fn.equals("regression")) { m_functionType = MiningFunction.REGRESSION; } // get the missing value strategy (if any) String missingVS = model.getAttribute("missingValueStrategy"); if (missingVS != null && missingVS.length() > 0) { for (MissingValueStrategy m : MissingValueStrategy.values()) { if (m.toString().equals(missingVS)) { m_missingValueStrategy = m; break; } } } // get the missing value penalty (if any) String missingP = model.getAttribute("missingValuePenalty"); if (missingP != null && missingP.length() > 0) { // try to parse as a number try { m_missingValuePenalty = Double.parseDouble(missingP); } catch (NumberFormatException ex) { System.err.println("[TreeModel] WARNING: " + "couldn't parse supplied missingValuePenalty as a number"); } } String splitC = model.getAttribute("splitCharacteristic"); if (splitC != null && splitC.length() > 0) { for (SplitCharacteristic s : SplitCharacteristic.values()) { if (s.toString().equals(splitC)) { m_splitCharacteristic = s; break; } } } // find the root node of the tree NodeList children = model.getChildNodes(); for (int i = 0; i < children.getLength(); i++) { Node child = children.item(i); if (child.getNodeType() == Node.ELEMENT_NODE) { String tagName = ((Element)child).getTagName(); if (tagName.equals("Node")) { m_root = new TreeNode((Element)child, miningSchema); break; } } } }