示例#1
0
        public void train(DataSet ds) {
		initializeExampleWeights(ds.examples.Count);

		foreach (Learner learner in learners) {
			learner.train(ds);

			double error = calculateError(ds, learner);
			if (error < 0.0001) {
				break;
			}

			adjustExampleWeights(ds, learner, error);

			double newHypothesisWeight = learnerWeights[learner]
					* Math.Log((1.0 - error) / error);
            if (learnerWeights.ContainsKey(learner))
            {
                learnerWeights[learner] = newHypothesisWeight;
            }
            else
            {
                learnerWeights.Add(learner, newHypothesisWeight);
            }
		}
	}
        //
        // PRIVATE METHODS
        //

        private DecisionTree decisionTreeLearning(DataSet ds,
                List<String> attributeNames, ConstantDecisonTree defaultTree)
        {
            if (ds.size() == 0)
            {
                return defaultTree;
            }
            if (allExamplesHaveSameClassification(ds))
            {
                return new ConstantDecisonTree(ds.getExample(0).targetValue());
            }
            if (attributeNames.Count == 0)
            {
                return majorityValue(ds);
            }
            String chosenAttribute = chooseAttribute(ds, attributeNames);

            DecisionTree tree = new DecisionTree(chosenAttribute);
            ConstantDecisonTree m = majorityValue(ds);

            List<String> values = ds.getPossibleAttributeValues(chosenAttribute);
            foreach (String v in values)
            {
                DataSet filtered = ds.matchingDataSet(chosenAttribute, v);
                List<String> newAttribs = Util.removeFrom(attributeNames,
                        chosenAttribute);
                DecisionTree subTree = decisionTreeLearning(filtered, newAttribs, m);
                tree.addNode(v, subTree);

            }

            return tree;
        }
示例#3
0
        /*
         * create a normalized data "table" from the DataSet using numerizer. At
         * this stage, the data isnot split into input pattern and targets TODO
         * remove redundancy of recreating the target columns. the numerizer has
         * already isolated the targets
         */
        public void createNormalizedDataFromDataSet(DataSet ds, Numerizer numerizer)
        {

            List<List<Double>> rds = rawExamplesFromDataSet(ds, numerizer);
            // normalize raw dataset
            nds = normalize(rds);
        }
示例#4
0
        public AdaBoostLearner(List<Learner> learners, DataSet ds)
        {
            this.learners = learners;
            this.dataSet = ds;

            initializeExampleWeights(ds.examples.Count);
            initializeHypothesisWeights(learners.Count);
        }
示例#5
0
 public void train(DataSet ds)
 {
     List<String> targets = new List<String>();
     foreach (Example e in ds.examples)
     {
         targets.Add(e.targetValue());
     }
     result = Util.mode(targets);
 }
示例#6
0
 public DataSet unmatchedExamples(DataSet ds)
 {
     DataSet unmatched = ds.emptyDataSet();
     foreach (Example e in ds.examples)
     {
         if (!(matches(e)))
         {
             unmatched.add(e);
         }
     }
     return unmatched;
 }
示例#7
0
 public DataSet removeExample(Example e)
 {
     DataSet ds = new DataSet(specification);
     foreach (Example eg in examples)
     {
         if (!(e.Equals(eg)))
         {
             ds.add(eg);
         }
     }
     return ds;
 }
示例#8
0
 public static DecisionTree getStumpFor(DataSet ds, String attributeName,
         String attributeValue, String returnValueIfMatched,
         List<String> unmatchedValues, String returnValueIfUnmatched)
 {
     DecisionTree dt = new DecisionTree(attributeName);
     dt.addLeaf(attributeValue, returnValueIfMatched);
     foreach (String unmatchedValue in unmatchedValues)
     {
         dt.addLeaf(unmatchedValue, returnValueIfUnmatched);
     }
     return dt;
 }
示例#9
0
        public int[] test(DataSet ds) {
		int[] results = new int[] { 0, 0 };

		foreach (Example e in ds.examples) {
			if (e.targetValue().Equals(predict(e))) {
				results[0] = results[0] + 1;
			} else {
				results[1] = results[1] + 1;
			}
		}
		return results;
	}
示例#10
0
	public DataSet fromFile(String filename, DataSetSpecification spec,
			Char separator) {
		// assumed file in data directory and ends in .csv
		DataSet ds = new DataSet(spec);
        using (StreamReader reader = new StreamReader(typeof(DataSetFactory).Assembly.GetManifestResourceStream("AIMA.Resource." + filename)))
        {
            String line;
            while ((line = reader.ReadLine()) != null)
            {
                ds.add(exampleFromString(line, spec, separator));
            }
        }
		return ds;

	}
示例#11
0
        //
        // START-Learner
        public void train(DataSet ds) {
		folDSDomain = new FOLDataSetDomain(ds.specification, trueGoalValue);
		List<FOLExample> folExamples = new List<FOLExample>();
		int egNo = 1;
		foreach (Example e in ds.examples) {
			folExamples.Add(new FOLExample(folDSDomain, e, egNo));
			egNo++;
		}

		// Setup a KB to be used for learning
		kb = new FOLKnowledgeBase(folDSDomain, new FOLOTTERLikeTheoremProver(
				1000, false));

		CurrentBestLearning cbl = new CurrentBestLearning(folDSDomain, kb);

		currentBestHypothesis = cbl.currentBestLearning(folExamples);
	}
示例#12
0
 //
 // PRIVATE METHODS
 //
 private DecisionList decisionListLearning(DataSet ds)
 {
     if (ds.size() == 0)
     {
         return new DecisionList(positive, negative);
     }
     List<DLTest> possibleTests = testFactory
             .createDLTestsWithAttributeCount(ds, 1);
     DLTest test = getValidTest(possibleTests, ds);
     if (test == null)
     {
         return new DecisionList(null, FAILURE);
     }
     // at this point there is a test that classifies some subset of examples
     // with the same target value
     DataSet matched = test.matchedExamples(ds);
     DecisionList list = new DecisionList(positive, negative);
     list.add(test, matched.getExample(0).targetValue());
     return list.mergeWith(decisionListLearning(test.unmatchedExamples(ds)));
 }
示例#13
0
        public static List<DecisionTree> getStumpsFor(DataSet ds,
                String returnValueIfMatched, String returnValueIfUnmatched)
        {
            List<String> attributes = ds.getNonTargetAttributes();
            List<DecisionTree> trees = new List<DecisionTree>();
            foreach (String attribute in attributes)
            {
                List<String> values = ds.getPossibleAttributeValues(attribute);
                foreach (String value in values)
                {
                    List<String> unmatchedValues = Util.removeFrom(ds
                            .getPossibleAttributeValues(attribute), value);

                    DecisionTree tree = getStumpFor(ds, attribute, value,
                            returnValueIfMatched, unmatchedValues,
                            returnValueIfUnmatched);
                    trees.Add(tree);

                }
            }
            return trees;
        }
示例#14
0
 public virtual void train(DataSet ds)
 {
     List<String> attributes = ds.getNonTargetAttributes();
     this.tree = decisionTreeLearning(ds, attributes,
             new ConstantDecisonTree(defaultValue));
 }
示例#15
0
        /*
         * method called by clients to set up data set and make it ready for
         * processing
         */
        public void createExamplesFromDataSet(DataSet ds, Numerizer numerizer)
        {
            createNormalizedDataFromDataSet(ds, numerizer);
            setTargetColumns();
            createExamples();

        }
示例#16
0
 public DataSet matchingDataSet(String attributeName, String attributeValue)
 {
     DataSet ds = new DataSet(specification);
     foreach (Example e in examples)
     {
         if (e.getAttributeValueAsString(attributeName).Equals(
                 attributeValue))
         {
             ds.add(e);
         }
     }
     return ds;
 }
	public void testOn(DataSet ds) {
		// TODO Auto-generated method stub
	}
示例#18
0
 public Dictionary<String, DataSet> splitByAttribute(String attributeName)
 {
     Dictionary<String, DataSet> results = new Dictionary<String, DataSet>();
     foreach (Example e in examples)
     {
         String val = e.getAttributeValueAsString(attributeName);
         if (results.ContainsKey(val))
         {
             results[val].add(e);
         }
         else
         {
             DataSet ds = new DataSet(specification);
             ds.add(e);
             results.Add(val, ds);
         }
     }
     return results;
 }
示例#19
0
	public override void train(DataSet ds) {
		// System.Console.WriteLine("Stump learner training");
		// do nothing the stump is not inferred from the dataset
	}
示例#20
0
        private DLTest getValidTest(List<DLTest> possibleTests, DataSet ds) {
		foreach (DLTest test in possibleTests) {
			DataSet matched = test.matchedExamples(ds);
			if (!(matched.size() == 0)) {
				if (allExamplesHaveSameTargetValue(matched)) {
					return test;
				}
			}

		}
		return null;
	}
示例#21
0
 private void adjustExampleWeights(DataSet ds, Learner l, double error)
 {
     double epsilon = error / (1.0 - error);
     for (int j = 0; j < ds.examples.Count; j++)
     {
         Example e = ds.getExample(j);
         if ((l.predict(e).Equals(e.targetValue())))
         {
             exampleWeights[j] = exampleWeights[j] * epsilon;
         }
     }
     exampleWeights = Util.normalize(exampleWeights);
 }
示例#22
0
 private double calculateError(DataSet ds, Learner l)
 {
     double error = 0.0;
     for (int i = 0; i < ds.examples.Count; i++)
     {
         Example e = ds.getExample(i);
         if (!(l.predict(e).Equals(e.targetValue())))
         {
             error = error + exampleWeights[i];
         }
     }
     return error;
 }
示例#23
0
 public void train(DataSet ds)
 {
     this.decisionList = decisionListLearning(ds);
 }
示例#24
0
        private String chooseAttribute(DataSet ds, List<String> attributeNames)
        {
            double greatestGain = 0.0;
            String attributeWithGreatestGain = attributeNames[0];
            foreach (String attr in attributeNames)
            {
                double gain = ds.calculateGainFor(attr);
                if (gain > greatestGain)
                {
                    greatestGain = gain;
                    attributeWithGreatestGain = attr;
                }
            }

            return attributeWithGreatestGain;
        }
示例#25
0
 public DataSet copy()
 {
     DataSet ds = new DataSet(specification);
     foreach (Example e in examples)
     {
         ds.add(e);
     }
     return ds;
 }
示例#26
0
        private bool allExamplesHaveSameClassification(DataSet ds)
        {
            String classification = ds.getExample(0).targetValue();
            List<Example>.Enumerator iter = ds.iterator();
            while (iter.MoveNext())
            {
                Example element = iter.Current;
                if (!(element.targetValue().Equals(classification)))
                {
                    return false;
                }

            }
            return true;
        }
示例#27
0
 public override List<DLTest> createDLTestsWithAttributeCount(DataSet ds, int i)
 {
     return tests;
 }
示例#28
0
        private List<List<Double>> rawExamplesFromDataSet(DataSet ds,
                Numerizer numerizer) {
		// assumes all values for inout and target are doubles
		List<List<Double>> rds = new List<List<Double>>();
		for (int i = 0; i < ds.size(); i++) {
			List<Double> rexample = new List<Double>();
			Example e = ds.getExample(i);
			Pair<List<Double>, List<Double>> p = numerizer.numerize(e);
			List<Double> attributes = p.getFirst();
			foreach (Double d in attributes) {
				rexample.Add(d);
			}
			List<Double> targets = p.getSecond();
			foreach (Double d in targets) {
				rexample.Add(d);
			}
			rds.Add(rexample);
		}
		return rds;
	}
示例#29
0
 private ConstantDecisonTree majorityValue(DataSet ds)
 {
     Learner learner = new MajorityLearner();
     learner.train(ds);
     return new ConstantDecisonTree(learner.predict(ds.getExample(0)));
 }
示例#30
0
        private bool allExamplesHaveSameTargetValue(DataSet matched) {
		// assumes at least i example in dataset
		String targetValue = matched.getExample(0).targetValue();
		foreach (Example e in matched.examples) {
			if (!(e.targetValue().Equals(targetValue))) {
				return false;
			}
		}
		return true;
	}