Exemple #1
0
        public void Train(Datas.Useable train)
        {
            var branch_score = train.getLabelCounts();

            int max_correct_branch = branch_score.Values.Max();

            if (branch_score.Values.Sum() != max_correct_branch) // All children in one levels
            {
                var tups = new Tuple <float, float> [train._Labels.Count];

                int cols = train._CountColumns;
                int rows = train._CountRows;

                int best_correct = int.MinValue;

                for (int c = 0; c < cols; c++)
                {
                    for (int r = 0; r < rows; r++)
                    {
                        tups[r] = new Tuple <float, float>(train._Labels[r], train._Data[r, c]);
                    }

                    Array.Sort(tups, (Tuple <float, float> a, Tuple <float, float> b) =>
                               { return(a.Item2.CompareTo(b.Item2)); });

                    var branch_less_data = new Dictionary <float, int>();
                    var branch_more_data = new Dictionary <float, int>();

                    foreach (var kvp in branch_score)
                    {
                        branch_less_data[kvp.Key] = 0;
                        branch_more_data[kvp.Key] = kvp.Value;
                    }

                    for (int split_point = 0; split_point < rows - 1; split_point++)
                    {
                        var   tup        = tups[split_point];
                        float this_label = tup.Item1;
                        float this_value = tup.Item2;
                        branch_less_data[this_label]++;
                        branch_more_data[this_label]--;
                        float next_value = tups[split_point + 1].Item2;

                        // Skip identical values.
                        float split_value = (this_value + next_value) / 2;
                        if ((this_value < split_value) == (next_value < split_value))
                        {
                            continue;
                        }

                        int correct = branch_less_data.Values.Max() + branch_more_data.Values.Max();

                        if (correct > best_correct)
                        {
                            best_correct                   = correct;
                            this._BranchSplitValue         = split_value;
                            this._BranchColumn             = c;
                            this._BranchLessClassification = branch_less_data.ArgMax();
                            this._BranchMoreClassification = branch_more_data.ArgMax();
                        }
                    }
                }

                if (best_correct != int.MinValue)
                {
                    return;
                }
            }

            this._LeafClassification = branch_score.ArgMax();
            this._IsLeaf             = true;
        }
Exemple #2
0
        public void Train(Datas.Useable train, int current_depth)
        {
            var branch_score = train.getLabelCounts();

            int max_correct_branch = branch_score.Values.Max();

            if ((branch_score.Values.Sum() != max_correct_branch) && // All children are in one cluster.
                (this._MaxDepth != current_depth))                   // Limit Levels
            {
                var tups = new Tuple <float, float> [train._Labels.Count];

                int cols = train._CountColumns;
                int rows = train._CountRows;

                double best_entropy = double.MaxValue;
                int    best_column  = -1;
                float  best_split   = -1;

                for (int c = 0; c < cols; c++)
                {
                    for (int r = 0; r < rows; r++)
                    {
                        tups[r] = new Tuple <float, float>(train._Labels[r], train._Data[r, c]);
                    }

                    Array.Sort(tups, (Tuple <float, float> a, Tuple <float, float> b) =>
                               { return(a.Item2.CompareTo(b.Item2)); });

                    var branch_less_data = new Dictionary <float, int>();
                    var branch_more_data = new Dictionary <float, int>();

                    foreach (var kvp in branch_score)
                    {
                        branch_less_data[kvp.Key] = 0;
                        branch_more_data[kvp.Key] = kvp.Value;
                    }

                    for (int split_point = 0; split_point < rows - 1; split_point++)
                    {
                        var   tup        = tups[split_point];
                        float this_label = tup.Item1;
                        float this_value = tup.Item2;
                        branch_less_data[this_label]++;
                        branch_more_data[this_label]--;
                        float next_value = tups[split_point + 1].Item2;

                        // Skip identical values.
                        float split_value = (this_value + next_value) / 2;
                        if ((this_value < split_value) == (next_value < split_value))
                        {
                            continue;
                        }


                        double p_less  = (split_point + 1.0) / rows;
                        double p_more  = 1 - p_less;
                        double entropy = p_less * branch_less_data.Values.Entropy() +
                                         p_more * branch_more_data.Values.Entropy();

                        if (entropy < best_entropy)
                        {
                            best_entropy = entropy;
                            best_split   = split_value;
                            best_column  = c;
                        }
                    }
                }

                if (best_column != -1)
                {
                    this._BranchSplitValue = best_split;
                    this._BranchColumn     = best_column;

                    Datas.Useable less, more;

                    train.Split(
                        this._BranchColumn,
                        this._BranchSplitValue,
                        out less,
                        out more);

                    this._BranchLess = new DecisionTree(this._MaxDepth);
                    this._BranchMore = new DecisionTree(this._MaxDepth);

                    this._BranchLess.Train(less, current_depth + 1);
                    this._BranchMore.Train(more, current_depth + 1);

                    return;
                }
            }

            this._LeafClassification = branch_score.ArgMax();
            this._IsLeaf             = true;
        }