public static DecisionTreeScore ScoreWithTreeWithTestSet(DecisionTreeLevel decisionTree, List <DataSetValue> testDataSetValues) { DecisionTreeScore score = new DecisionTreeScore(0, 0, 0, 0, decisionTree); foreach (var testDataSetValue in testDataSetValues) { bool output = decisionTree.Evaluate(testDataSetValue.Values); if (output && testDataSetValue.Output) { score.PositiveHit++; } else if (!output && !testDataSetValue.Output) { score.NegativeHits++; } else if (output && !testDataSetValue.Output) { score.FalsePositive++; } else if (!output && testDataSetValue.Output) { score.FalseNegative++; } } score.NodeCount = decisionTree.GetNodeCount(); return(score); }
public DecisionTreeScore(double positiveHit, double falsePositive, double negativeHits, double falseNegative, DecisionTreeLevel decisionTree) { _decisionTree = decisionTree; PositiveHit = positiveHit; FalsePositive = falsePositive; NegativeHits = negativeHits; FalseNegative = falseNegative; }
public void D3(List <DataSetAttribute> attributes, List <DataSetValue> values) { // Check whether we even need to split or not int totalTrueValues = values.Count(v => v.Output); int totalFalseValues = values.Count(v => !v.Output); if (totalFalseValues == 0 && totalTrueValues > 0) { _localValue = true; return; } if (totalTrueValues == 0 && totalFalseValues > 0) { _localValue = false; return; } // Can we split on attributes? if (attributes.Count == 0) { // Can't split anymore. We'll decide on the most prevalent value _localValue = totalTrueValues > totalFalseValues; return; } // First, find the attribute with the highest "E" List <DataSetAttributeWithCounts> e = CalculateEForAllAttributes(attributes, values); DataSetAttributeWithCounts attributeWithMinEntropy = FindAttributeWithMinEntropy(e); _attributeToSplitOn = attributeWithMinEntropy; // Is it worth it to split on attributes if (!ShouldSplitOnAttributeAccordingToChiSquared(attributeWithMinEntropy)) { // Not worth it to split. We'll decide on the most prevalent value _localValue = totalTrueValues > totalFalseValues; return; } // Remove this attribute from the list of new attributes to create new subtrees List <DataSetAttribute> newAttributes = attributes.Where(a => a.Name != attributeWithMinEntropy.Name).ToList(); // Split the values in many sets _dictionaryOfSubTrees = new Dictionary <string, DecisionTreeLevel>(attributeWithMinEntropy.PossibleValues.Count); var dictionaryOfValues = new Dictionary <string, List <DataSetValue> >(); foreach (var dataSetValue in values) { string value = dataSetValue.Values[attributeWithMinEntropy.ValueIndex]; DecisionTreeLevel localTreeLevel; List <DataSetValue> localValues; if (!_dictionaryOfSubTrees.TryGetValue(value, out localTreeLevel)) { localTreeLevel = new DecisionTreeLevel(ChiTestLimit); _dictionaryOfSubTrees[value] = localTreeLevel; localValues = new List <DataSetValue>(); dictionaryOfValues[value] = localValues; } else { localValues = dictionaryOfValues[value]; } localValues.Add(dataSetValue); } // Recursively run D3 on them foreach (var decisionTree in _dictionaryOfSubTrees) { var localValues = dictionaryOfValues[decisionTree.Key]; decisionTree.Value.D3(newAttributes, localValues); } }