public static DecisionTreeScore ScoreWithTreeWithTestSet(DecisionTreeLevel decisionTree, List <DataSetValue> testDataSetValues) { return(ScoreWithTreeWithTestSet(new List <DecisionTreeLevel>() { decisionTree }, testDataSetValues)); }
public void D3() { // Check whether we even need to split or not int totalTrueValues = _values.Count(v => v.Output); int totalFalseValues = _values.Count(v => !v.Output); if (totalFalseValues == 0 && totalTrueValues > 0) { _localValue = true; return; } if (totalTrueValues == 0 && totalFalseValues > 0) { _localValue = false; return; } // Can we split on attributes? if (_attributes.Count == 0) { // Can't split anymore. We'll decide on the most prevalent value _localValue = totalTrueValues > totalFalseValues; return; } // First, find the attribute with the highest "E" List <DataSetAttributeWithCounts> e = CalculateEForAllAttributes(_attributes, _values); DataSetAttributeWithCounts attributeWithMinEntropy = FindAttributeWithMinEntropy(e); _attributeToSplitOn = attributeWithMinEntropy; // Is it worth it to split on attributes if (!ShouldSplitOnAttributeAccordingToChiSquared(attributeWithMinEntropy)) { // Not worth it to split. We'll decide on the most prevalent value _localValue = totalTrueValues > totalFalseValues; return; } // Remove this attribute from the list of new attributes to create new subtrees List <DataSetAttribute> newAttributes = _attributes.Where(a => a.Name != attributeWithMinEntropy.Name).ToList(); // Split the values in many sets _dictionaryOfSubTrees = new Dictionary <string, DecisionTreeLevel>(attributeWithMinEntropy.PossibleValues.Count); var dictionaryOfValues = new Dictionary <string, List <DataSetValue> >(); foreach (var dataSetValue in _values) { string value = dataSetValue.Values[attributeWithMinEntropy.ValueIndex]; DecisionTreeLevel localTreeLevel; List <DataSetValue> localValues; if (!_dictionaryOfSubTrees.TryGetValue(value, out localTreeLevel)) { localValues = new List <DataSetValue>(); dictionaryOfValues[value] = localValues; localTreeLevel = new DecisionTreeLevel(ChiTestLimit, newAttributes, localValues); _dictionaryOfSubTrees[value] = localTreeLevel; } else { localValues = dictionaryOfValues[value]; } localValues.Add(dataSetValue); } // Recursively run D3 on them foreach (var decisionTree in _dictionaryOfSubTrees) { List <DataSetValue> localValues = dictionaryOfValues[decisionTree.Key]; decisionTree.Value.D3(); } }