public static DecisionTreeScore ScoreWithTreeWithTestSet(DecisionTreeLevel decisionTree, List <DataSetValue> testDataSetValues)
        {
            DecisionTreeScore score = new DecisionTreeScore(0, 0, 0, 0, decisionTree);

            foreach (var testDataSetValue in testDataSetValues)
            {
                bool output = decisionTree.Evaluate(testDataSetValue.Values);
                if (output && testDataSetValue.Output)
                {
                    score.PositiveHit++;
                }
                else if (!output && !testDataSetValue.Output)
                {
                    score.NegativeHits++;
                }
                else if (output && !testDataSetValue.Output)
                {
                    score.FalsePositive++;
                }
                else if (!output && testDataSetValue.Output)
                {
                    score.FalseNegative++;
                }
            }

            score.NodeCount = decisionTree.GetNodeCount();

            return(score);
        }
 public DecisionTreeScore(double positiveHit, double falsePositive, double negativeHits, double falseNegative, DecisionTreeLevel decisionTree)
 {
     _decisionTree = decisionTree;
     PositiveHit   = positiveHit;
     FalsePositive = falsePositive;
     NegativeHits  = negativeHits;
     FalseNegative = falseNegative;
 }
        public void D3(List <DataSetAttribute> attributes, List <DataSetValue> values)
        {
            // Check whether we even need to split or not
            int totalTrueValues  = values.Count(v => v.Output);
            int totalFalseValues = values.Count(v => !v.Output);

            if (totalFalseValues == 0 && totalTrueValues > 0)
            {
                _localValue = true;
                return;
            }

            if (totalTrueValues == 0 && totalFalseValues > 0)
            {
                _localValue = false;
                return;
            }

            // Can we split on attributes?
            if (attributes.Count == 0)
            {
                // Can't split anymore. We'll decide on the most prevalent value
                _localValue = totalTrueValues > totalFalseValues;
                return;
            }

            // First, find the attribute with the highest "E"
            List <DataSetAttributeWithCounts> e = CalculateEForAllAttributes(attributes, values);
            DataSetAttributeWithCounts        attributeWithMinEntropy = FindAttributeWithMinEntropy(e);

            _attributeToSplitOn = attributeWithMinEntropy;

            // Is it worth it to split on attributes
            if (!ShouldSplitOnAttributeAccordingToChiSquared(attributeWithMinEntropy))
            {
                // Not worth it to split. We'll decide on the most prevalent value
                _localValue = totalTrueValues > totalFalseValues;
                return;
            }

            // Remove this attribute from the list of new attributes to create new subtrees
            List <DataSetAttribute> newAttributes = attributes.Where(a => a.Name != attributeWithMinEntropy.Name).ToList();

            // Split the values in many sets
            _dictionaryOfSubTrees = new Dictionary <string, DecisionTreeLevel>(attributeWithMinEntropy.PossibleValues.Count);
            var dictionaryOfValues = new Dictionary <string, List <DataSetValue> >();

            foreach (var dataSetValue in values)
            {
                string              value = dataSetValue.Values[attributeWithMinEntropy.ValueIndex];
                DecisionTreeLevel   localTreeLevel;
                List <DataSetValue> localValues;
                if (!_dictionaryOfSubTrees.TryGetValue(value, out localTreeLevel))
                {
                    localTreeLevel = new DecisionTreeLevel(ChiTestLimit);
                    _dictionaryOfSubTrees[value] = localTreeLevel;
                    localValues = new List <DataSetValue>();
                    dictionaryOfValues[value] = localValues;
                }
                else
                {
                    localValues = dictionaryOfValues[value];
                }

                localValues.Add(dataSetValue);
            }

            // Recursively run D3 on them
            foreach (var decisionTree in _dictionaryOfSubTrees)
            {
                var localValues = dictionaryOfValues[decisionTree.Key];
                decisionTree.Value.D3(newAttributes, localValues);
            }
        }