Example #1
0
        private void run(double[][] inputs, int[] outputs)
        {
            // Initial argument check
            DecisionTreeHelper.CheckArgs(tree, inputs, outputs);

            // Reset the usage of all attributes
            for (int i = 0; i < attributeUsageCount.Length; i++)
            {
                // a[i] has never been used
                attributeUsageCount[i] = 0;
            }

            thresholds = new double[tree.Attributes.Count][];

            var candidates = new List <double>(inputs.Length);

            // 0. Create candidate split thresholds for each attribute
            for (int i = 0; i < tree.Attributes.Count; i++)
            {
                if (tree.Attributes[i].Nature == DecisionVariableKind.Continuous)
                {
                    double[] v = inputs.GetColumn(i);
                    int[]    o = (int[])outputs.Clone();

                    IGrouping <double, int>[] sortedValueToClassesMapping =
                        v.
                        Select((value, index) => new KeyValuePair <double, int>(value, o[index])).
                        GroupBy(keyValuePair => keyValuePair.Key, keyValuePair => keyValuePair.Value).
                        OrderBy(keyValuePair => keyValuePair.Key).
                        ToArray();

                    for (int j = 0; j < sortedValueToClassesMapping.Length - 1; j++)
                    {
                        // Following the results by Fayyad and Irani (1992) (see footnote on Quinlan (1996)):
                        // "If all cases of adjacent values V[i] and V[i+1] belong to the same class,
                        // a threshold between them cannot lead to a partition that has the maximum value of
                        // the criterion." i.e no reason the add the threshold as a candidate

                        IGrouping <double, int> currentValueToClasses = sortedValueToClassesMapping[j];
                        IGrouping <double, int> nextValueToClasses    = sortedValueToClassesMapping[j + 1];
                        double a = nextValueToClasses.Key;
                        double b = currentValueToClasses.Key;
                        if (a - b > Constants.DoubleEpsilon && currentValueToClasses.Union(nextValueToClasses).Count() > 1)
                        {
                            candidates.Add((currentValueToClasses.Key + nextValueToClasses.Key) / 2.0);
                        }
                    }

                    thresholds[i] = candidates.ToArray();
                    candidates.Clear();
                }
            }


            // 1. Create a root node for the tree
            tree.Root = new DecisionNode(tree);

            // Recursively split the tree nodes
            split(tree.Root, inputs, outputs, 0);
        }
Example #2
0
        private void run(int[][] inputs, int[] outputs)
        {
            // Initial argument check
            DecisionTreeHelper.CheckArgs(Model, inputs, outputs);

            // Reset the usage of all attributes
            for (int i = 0; i < AttributeUsageCount.Length; i++)
            {
                // a[i] has never been used
                AttributeUsageCount[i] = 0;
            }

            // 1. Create a root node for the tree
            this.Model.Root = new DecisionNode(Model);

            // Recursively split the tree nodes
            split(Model.Root, inputs, outputs, 0);
        }