public void train(DataSet ds) { initializeExampleWeights(ds.examples.Count); foreach (Learner learner in learners) { learner.train(ds); double error = calculateError(ds, learner); if (error < 0.0001) { break; } adjustExampleWeights(ds, learner, error); double newHypothesisWeight = learnerWeights[learner] * Math.Log((1.0 - error) / error); if (learnerWeights.ContainsKey(learner)) { learnerWeights[learner] = newHypothesisWeight; } else { learnerWeights.Add(learner, newHypothesisWeight); } } }
// // PRIVATE METHODS // private DecisionTree decisionTreeLearning(DataSet ds, List<String> attributeNames, ConstantDecisonTree defaultTree) { if (ds.size() == 0) { return defaultTree; } if (allExamplesHaveSameClassification(ds)) { return new ConstantDecisonTree(ds.getExample(0).targetValue()); } if (attributeNames.Count == 0) { return majorityValue(ds); } String chosenAttribute = chooseAttribute(ds, attributeNames); DecisionTree tree = new DecisionTree(chosenAttribute); ConstantDecisonTree m = majorityValue(ds); List<String> values = ds.getPossibleAttributeValues(chosenAttribute); foreach (String v in values) { DataSet filtered = ds.matchingDataSet(chosenAttribute, v); List<String> newAttribs = Util.removeFrom(attributeNames, chosenAttribute); DecisionTree subTree = decisionTreeLearning(filtered, newAttribs, m); tree.addNode(v, subTree); } return tree; }
/* * create a normalized data "table" from the DataSet using numerizer. At * this stage, the data isnot split into input pattern and targets TODO * remove redundancy of recreating the target columns. the numerizer has * already isolated the targets */ public void createNormalizedDataFromDataSet(DataSet ds, Numerizer numerizer) { List<List<Double>> rds = rawExamplesFromDataSet(ds, numerizer); // normalize raw dataset nds = normalize(rds); }
public AdaBoostLearner(List<Learner> learners, DataSet ds) { this.learners = learners; this.dataSet = ds; initializeExampleWeights(ds.examples.Count); initializeHypothesisWeights(learners.Count); }
public void train(DataSet ds) { List<String> targets = new List<String>(); foreach (Example e in ds.examples) { targets.Add(e.targetValue()); } result = Util.mode(targets); }
public DataSet unmatchedExamples(DataSet ds) { DataSet unmatched = ds.emptyDataSet(); foreach (Example e in ds.examples) { if (!(matches(e))) { unmatched.add(e); } } return unmatched; }
public DataSet removeExample(Example e) { DataSet ds = new DataSet(specification); foreach (Example eg in examples) { if (!(e.Equals(eg))) { ds.add(eg); } } return ds; }
public static DecisionTree getStumpFor(DataSet ds, String attributeName, String attributeValue, String returnValueIfMatched, List<String> unmatchedValues, String returnValueIfUnmatched) { DecisionTree dt = new DecisionTree(attributeName); dt.addLeaf(attributeValue, returnValueIfMatched); foreach (String unmatchedValue in unmatchedValues) { dt.addLeaf(unmatchedValue, returnValueIfUnmatched); } return dt; }
public int[] test(DataSet ds) { int[] results = new int[] { 0, 0 }; foreach (Example e in ds.examples) { if (e.targetValue().Equals(predict(e))) { results[0] = results[0] + 1; } else { results[1] = results[1] + 1; } } return results; }
public DataSet fromFile(String filename, DataSetSpecification spec, Char separator) { // assumed file in data directory and ends in .csv DataSet ds = new DataSet(spec); using (StreamReader reader = new StreamReader(typeof(DataSetFactory).Assembly.GetManifestResourceStream("AIMA.Resource." + filename))) { String line; while ((line = reader.ReadLine()) != null) { ds.add(exampleFromString(line, spec, separator)); } } return ds; }
// // START-Learner public void train(DataSet ds) { folDSDomain = new FOLDataSetDomain(ds.specification, trueGoalValue); List<FOLExample> folExamples = new List<FOLExample>(); int egNo = 1; foreach (Example e in ds.examples) { folExamples.Add(new FOLExample(folDSDomain, e, egNo)); egNo++; } // Setup a KB to be used for learning kb = new FOLKnowledgeBase(folDSDomain, new FOLOTTERLikeTheoremProver( 1000, false)); CurrentBestLearning cbl = new CurrentBestLearning(folDSDomain, kb); currentBestHypothesis = cbl.currentBestLearning(folExamples); }
// // PRIVATE METHODS // private DecisionList decisionListLearning(DataSet ds) { if (ds.size() == 0) { return new DecisionList(positive, negative); } List<DLTest> possibleTests = testFactory .createDLTestsWithAttributeCount(ds, 1); DLTest test = getValidTest(possibleTests, ds); if (test == null) { return new DecisionList(null, FAILURE); } // at this point there is a test that classifies some subset of examples // with the same target value DataSet matched = test.matchedExamples(ds); DecisionList list = new DecisionList(positive, negative); list.add(test, matched.getExample(0).targetValue()); return list.mergeWith(decisionListLearning(test.unmatchedExamples(ds))); }
public static List<DecisionTree> getStumpsFor(DataSet ds, String returnValueIfMatched, String returnValueIfUnmatched) { List<String> attributes = ds.getNonTargetAttributes(); List<DecisionTree> trees = new List<DecisionTree>(); foreach (String attribute in attributes) { List<String> values = ds.getPossibleAttributeValues(attribute); foreach (String value in values) { List<String> unmatchedValues = Util.removeFrom(ds .getPossibleAttributeValues(attribute), value); DecisionTree tree = getStumpFor(ds, attribute, value, returnValueIfMatched, unmatchedValues, returnValueIfUnmatched); trees.Add(tree); } } return trees; }
public virtual void train(DataSet ds) { List<String> attributes = ds.getNonTargetAttributes(); this.tree = decisionTreeLearning(ds, attributes, new ConstantDecisonTree(defaultValue)); }
/* * method called by clients to set up data set and make it ready for * processing */ public void createExamplesFromDataSet(DataSet ds, Numerizer numerizer) { createNormalizedDataFromDataSet(ds, numerizer); setTargetColumns(); createExamples(); }
public DataSet matchingDataSet(String attributeName, String attributeValue) { DataSet ds = new DataSet(specification); foreach (Example e in examples) { if (e.getAttributeValueAsString(attributeName).Equals( attributeValue)) { ds.add(e); } } return ds; }
public void testOn(DataSet ds) { // TODO Auto-generated method stub }
public Dictionary<String, DataSet> splitByAttribute(String attributeName) { Dictionary<String, DataSet> results = new Dictionary<String, DataSet>(); foreach (Example e in examples) { String val = e.getAttributeValueAsString(attributeName); if (results.ContainsKey(val)) { results[val].add(e); } else { DataSet ds = new DataSet(specification); ds.add(e); results.Add(val, ds); } } return results; }
public override void train(DataSet ds) { // System.Console.WriteLine("Stump learner training"); // do nothing the stump is not inferred from the dataset }
private DLTest getValidTest(List<DLTest> possibleTests, DataSet ds) { foreach (DLTest test in possibleTests) { DataSet matched = test.matchedExamples(ds); if (!(matched.size() == 0)) { if (allExamplesHaveSameTargetValue(matched)) { return test; } } } return null; }
private void adjustExampleWeights(DataSet ds, Learner l, double error) { double epsilon = error / (1.0 - error); for (int j = 0; j < ds.examples.Count; j++) { Example e = ds.getExample(j); if ((l.predict(e).Equals(e.targetValue()))) { exampleWeights[j] = exampleWeights[j] * epsilon; } } exampleWeights = Util.normalize(exampleWeights); }
private double calculateError(DataSet ds, Learner l) { double error = 0.0; for (int i = 0; i < ds.examples.Count; i++) { Example e = ds.getExample(i); if (!(l.predict(e).Equals(e.targetValue()))) { error = error + exampleWeights[i]; } } return error; }
public void train(DataSet ds) { this.decisionList = decisionListLearning(ds); }
private String chooseAttribute(DataSet ds, List<String> attributeNames) { double greatestGain = 0.0; String attributeWithGreatestGain = attributeNames[0]; foreach (String attr in attributeNames) { double gain = ds.calculateGainFor(attr); if (gain > greatestGain) { greatestGain = gain; attributeWithGreatestGain = attr; } } return attributeWithGreatestGain; }
public DataSet copy() { DataSet ds = new DataSet(specification); foreach (Example e in examples) { ds.add(e); } return ds; }
private bool allExamplesHaveSameClassification(DataSet ds) { String classification = ds.getExample(0).targetValue(); List<Example>.Enumerator iter = ds.iterator(); while (iter.MoveNext()) { Example element = iter.Current; if (!(element.targetValue().Equals(classification))) { return false; } } return true; }
public override List<DLTest> createDLTestsWithAttributeCount(DataSet ds, int i) { return tests; }
private List<List<Double>> rawExamplesFromDataSet(DataSet ds, Numerizer numerizer) { // assumes all values for inout and target are doubles List<List<Double>> rds = new List<List<Double>>(); for (int i = 0; i < ds.size(); i++) { List<Double> rexample = new List<Double>(); Example e = ds.getExample(i); Pair<List<Double>, List<Double>> p = numerizer.numerize(e); List<Double> attributes = p.getFirst(); foreach (Double d in attributes) { rexample.Add(d); } List<Double> targets = p.getSecond(); foreach (Double d in targets) { rexample.Add(d); } rds.Add(rexample); } return rds; }
private ConstantDecisonTree majorityValue(DataSet ds) { Learner learner = new MajorityLearner(); learner.train(ds); return new ConstantDecisonTree(learner.predict(ds.getExample(0))); }
private bool allExamplesHaveSameTargetValue(DataSet matched) { // assumes at least i example in dataset String targetValue = matched.getExample(0).targetValue(); foreach (Example e in matched.examples) { if (!(e.targetValue().Equals(targetValue))) { return false; } } return true; }