// // PRIVATE METHODS // private DecisionTree decisionTreeLearning(DataSet ds, List<String> attributeNames, ConstantDecisonTree defaultTree) { if (ds.size() == 0) { return defaultTree; } if (allExamplesHaveSameClassification(ds)) { return new ConstantDecisonTree(ds.getExample(0).targetValue()); } if (attributeNames.Count == 0) { return majorityValue(ds); } String chosenAttribute = chooseAttribute(ds, attributeNames); DecisionTree tree = new DecisionTree(chosenAttribute); ConstantDecisonTree m = majorityValue(ds); List<String> values = ds.getPossibleAttributeValues(chosenAttribute); foreach (String v in values) { DataSet filtered = ds.matchingDataSet(chosenAttribute, v); List<String> newAttribs = Util.removeFrom(attributeNames, chosenAttribute); DecisionTree subTree = decisionTreeLearning(filtered, newAttribs, m); tree.addNode(v, subTree); } return tree; }
private bool allExamplesHaveSameClassification(DataSet ds) { String classification = ds.getExample(0).targetValue(); List<Example>.Enumerator iter = ds.iterator(); while (iter.MoveNext()) { Example element = iter.Current; if (!(element.targetValue().Equals(classification))) { return false; } } return true; }
private ConstantDecisonTree majorityValue(DataSet ds) { Learner learner = new MajorityLearner(); learner.train(ds); return new ConstantDecisonTree(learner.predict(ds.getExample(0))); }
private void adjustExampleWeights(DataSet ds, Learner l, double error) { double epsilon = error / (1.0 - error); for (int j = 0; j < ds.examples.Count; j++) { Example e = ds.getExample(j); if ((l.predict(e).Equals(e.targetValue()))) { exampleWeights[j] = exampleWeights[j] * epsilon; } } exampleWeights = Util.normalize(exampleWeights); }
private double calculateError(DataSet ds, Learner l) { double error = 0.0; for (int i = 0; i < ds.examples.Count; i++) { Example e = ds.getExample(i); if (!(l.predict(e).Equals(e.targetValue()))) { error = error + exampleWeights[i]; } } return error; }
private List<List<Double>> rawExamplesFromDataSet(DataSet ds, Numerizer numerizer) { // assumes all values for inout and target are doubles List<List<Double>> rds = new List<List<Double>>(); for (int i = 0; i < ds.size(); i++) { List<Double> rexample = new List<Double>(); Example e = ds.getExample(i); Pair<List<Double>, List<Double>> p = numerizer.numerize(e); List<Double> attributes = p.getFirst(); foreach (Double d in attributes) { rexample.Add(d); } List<Double> targets = p.getSecond(); foreach (Double d in targets) { rexample.Add(d); } rds.Add(rexample); } return rds; }
private bool allExamplesHaveSameTargetValue(DataSet matched) { // assumes at least i example in dataset String targetValue = matched.getExample(0).targetValue(); foreach (Example e in matched.examples) { if (!(e.targetValue().Equals(targetValue))) { return false; } } return true; }