예제 #1
0
        /// <summary>
        /// Builds the tree by choosing the best dividing criteria for the current set
        /// </summary>
        public static DecisionNode BuildTree(Set rows, MeasuringMetric mode = MeasuringMetric.Entropy)
        {
            Fun scoreCalc = Entropy;

            switch (mode)
            {
            case MeasuringMetric.Entropy:
                scoreCalc = Entropy;
                break;

            case MeasuringMetric.GiniImpurity:
                scoreCalc = GiniImpurity;
                break;

            case MeasuringMetric.Variance:
                scoreCalc = Variance;
                break;
            }

            if (rows.Count == 0)
            {
                return(new DecisionNode());
            }
            double currentScore = scoreCalc(rows);


            // Set up some variables to track the best criteria
            double     bestGain     = 0;
            Pair       bestCriteria = new Pair();
            DividedSet bestSets     = new DividedSet();

            int columnCount = rows[0].Length - 1;

            for (int col = 0; col < columnCount; ++col)
            {
                // Generate the list of different values in this column
                List <string> columnValues = new List <string>();

                foreach (var row in rows)
                {
                    if (!columnValues.Contains(row[col]))
                    {
                        columnValues.Add(row[col]);
                    }
                }

                // Now try dividing the rows up for each value in this column
                foreach (var value in columnValues)
                {
                    DividedSet divSet = DivideSet(rows, col, value);

                    // Information gain
                    double p    = (double)divSet.Key.Count / rows.Count;
                    double gain = currentScore - p * scoreCalc(divSet.Key) - (1 - p) * scoreCalc(divSet.Value);


                    if (gain > bestGain && divSet.Key.Count > 0 && divSet.Value.Count > 0)
                    {
                        bestGain     = gain;
                        bestCriteria = new Pair(col, value);
                        bestSets     = divSet;
                    }
                }
            }

            // Create the subbranches
            if (bestGain > 0)
            {
                DecisionNode trueBranch  = BuildTree(bestSets.Key);
                DecisionNode falseBranch = BuildTree(bestSets.Value);
                return(new DecisionNode(column: bestCriteria.Key, value: bestCriteria.Value, nextTrueNode: trueBranch,
                                        nextFalseNode: falseBranch));
            }
            return(new DecisionNode(results: UniqueCounts(rows)));
        }
예제 #2
0
        /// <summary>
        /// Builds the tree by choosing the best dividing criteria for the current set
        /// </summary>
        public static DecisionNode BuildTree(Set rows, MeasuringMetric mode = MeasuringMetric.Entropy)
        {
            Fun scoreCalc = Entropy;
            switch (mode)
            {
                case MeasuringMetric.Entropy:
                    scoreCalc = Entropy;
                    break;
                case MeasuringMetric.GiniImpurity:
                    scoreCalc = GiniImpurity;
                    break;
                case MeasuringMetric.Variance:
                    scoreCalc = Variance;
                    break;
            }

            if (rows.Count == 0)
            {
                return new DecisionNode();
            }
            double currentScore = scoreCalc(rows);


            // Set up some variables to track the best criteria
            double bestGain = 0;
            Pair bestCriteria = new Pair();
            DividedSet bestSets = new DividedSet();

            int columnCount = rows[0].Length - 1;
            for (int col = 0; col < columnCount; ++col)
            {
                // Generate the list of different values in this column
                List<string> columnValues = new List<string>();

                foreach (var row in rows)
                {
                    if (!columnValues.Contains(row[col]))
                    {
                        columnValues.Add(row[col]);
                    }
                }

                // Now try dividing the rows up for each value in this column
                foreach (var value in columnValues)
                {
                    DividedSet divSet = DivideSet(rows, col, value);

                    // Information gain
                    double p = (double)divSet.Key.Count / rows.Count;
                    double gain = currentScore - p * scoreCalc(divSet.Key) - (1 - p) * scoreCalc(divSet.Value);


                    if (gain > bestGain && divSet.Key.Count > 0 && divSet.Value.Count > 0)
                    {
                        bestGain = gain;
                        bestCriteria = new Pair(col, value);
                        bestSets = divSet;
                    }
                }
            }

            // Create the subbranches
            if (bestGain > 0)
            {
                DecisionNode trueBranch = BuildTree(bestSets.Key);
                DecisionNode falseBranch = BuildTree(bestSets.Value);
                return new DecisionNode(column: bestCriteria.Key, value: bestCriteria.Value, nextTrueNode: trueBranch,
                                        nextFalseNode: falseBranch);
            }
            return new DecisionNode(results: UniqueCounts(rows));
        }