Beispiel #1
0
 public static DecisionTreeScore ScoreWithTreeWithTestSet(DecisionTreeLevel decisionTree, List <DataSetValue> testDataSetValues)
 {
     return(ScoreWithTreeWithTestSet(new List <DecisionTreeLevel>()
     {
         decisionTree
     }, testDataSetValues));
 }
Beispiel #2
0
        public void D3()
        {
            // Check whether we even need to split or not
            int totalTrueValues  = _values.Count(v => v.Output);
            int totalFalseValues = _values.Count(v => !v.Output);

            if (totalFalseValues == 0 && totalTrueValues > 0)
            {
                _localValue = true;
                return;
            }

            if (totalTrueValues == 0 && totalFalseValues > 0)
            {
                _localValue = false;
                return;
            }

            // Can we split on attributes?
            if (_attributes.Count == 0)
            {
                // Can't split anymore. We'll decide on the most prevalent value
                _localValue = totalTrueValues > totalFalseValues;
                return;
            }

            // First, find the attribute with the highest "E"
            List <DataSetAttributeWithCounts> e = CalculateEForAllAttributes(_attributes, _values);
            DataSetAttributeWithCounts        attributeWithMinEntropy = FindAttributeWithMinEntropy(e);

            _attributeToSplitOn = attributeWithMinEntropy;

            // Is it worth it to split on attributes
            if (!ShouldSplitOnAttributeAccordingToChiSquared(attributeWithMinEntropy))
            {
                // Not worth it to split. We'll decide on the most prevalent value
                _localValue = totalTrueValues > totalFalseValues;
                return;
            }

            // Remove this attribute from the list of new attributes to create new subtrees
            List <DataSetAttribute> newAttributes = _attributes.Where(a => a.Name != attributeWithMinEntropy.Name).ToList();

            // Split the values in many sets
            _dictionaryOfSubTrees = new Dictionary <string, DecisionTreeLevel>(attributeWithMinEntropy.PossibleValues.Count);
            var dictionaryOfValues = new Dictionary <string, List <DataSetValue> >();

            foreach (var dataSetValue in _values)
            {
                string              value = dataSetValue.Values[attributeWithMinEntropy.ValueIndex];
                DecisionTreeLevel   localTreeLevel;
                List <DataSetValue> localValues;
                if (!_dictionaryOfSubTrees.TryGetValue(value, out localTreeLevel))
                {
                    localValues = new List <DataSetValue>();
                    dictionaryOfValues[value]    = localValues;
                    localTreeLevel               = new DecisionTreeLevel(ChiTestLimit, newAttributes, localValues);
                    _dictionaryOfSubTrees[value] = localTreeLevel;
                }
                else
                {
                    localValues = dictionaryOfValues[value];
                }

                localValues.Add(dataSetValue);
            }

            // Recursively run D3 on them
            foreach (var decisionTree in _dictionaryOfSubTrees)
            {
                List <DataSetValue> localValues = dictionaryOfValues[decisionTree.Key];
                decisionTree.Value.D3();
            }
        }