Пример #1
0
        private void Run(double[][] inputs, double[][] validationInputs)
        {
            var root       = Tree.Root = new DecisionNode(Tree);
            var thresholds = ThresholdsCalculator.Calculate(inputs);

            for (int attributeIndex = 0; attributeIndex < GlobalVariables.Dimensions; attributeIndex++)
            {
                var min = thresholds[attributeIndex].Min();

                thresholds[attributeIndex]
                    = thresholds[attributeIndex].RemoveAll(min);

                var minSplitInformation = new SplitInformation(inputs.GetColumn(attributeIndex),
                                                               validationInputs.GetColumn(attributeIndex),
                                                               validation.Length,
                                                               ComparisonKind.GreaterThan,
                                                               min,
                                                               learn.Length);

                var minSplit = new SuggestSplitPoint(attributeIndex,
                                                     ComparisonKind.GreaterThan,
                                                     min,
                                                     minSplitInformation,
                                                     null);

                var minNode = new SplitDecisionNode(Tree, root, minSplit.Left,
                                                    minSplit.ComparisonKind, minSplit.SplitValue, attributeIndex);

                inputs           = inputs.CutRowByColumnValue(attributeIndex, ComparisonKind.GreaterThan, min);
                validationInputs = validationInputs.CutRowByColumnValue(attributeIndex, ComparisonKind.GreaterThan, min);

                root.Branches.AttributeIndex = attributeIndex;
                root.Branches.AddRange(minNode);
                root = minNode;

                var max = thresholds[attributeIndex].Max();

                thresholds[attributeIndex]
                    = thresholds[attributeIndex].RemoveAll(max);

                var maxSplitInformation = new SplitInformation(inputs.GetColumn(attributeIndex),
                                                               validationInputs.GetColumn(attributeIndex),
                                                               validation.Length,
                                                               ComparisonKind.LessThan,
                                                               max,
                                                               learn.Length);

                var maxSplit = new SuggestSplitPoint(attributeIndex,
                                                     ComparisonKind.LessThan,
                                                     max,
                                                     maxSplitInformation);

                var maxNode = new SplitDecisionNode(Tree, root, maxSplit.Left,
                                                    maxSplit.ComparisonKind, maxSplit.SplitValue, attributeIndex);

                inputs           = inputs.CutRowByColumnValue(attributeIndex, ComparisonKind.LessThan, max);
                validationInputs = validationInputs.CutRowByColumnValue(attributeIndex, ComparisonKind.LessThan, max);

                root.Branches.AttributeIndex = attributeIndex;
                root.Branches.AddRange(maxNode);
                root = maxNode;
            }

            int[] maxAttributeUsage = null;
            if (GlobalVariables.GrowCondition.MaxAttributeUsage.HasValue)
            {
                maxAttributeUsage = Enumerable.Repeat(GlobalVariables.GrowCondition.MaxAttributeUsage.Value, thresholds.Length).ToArray();
            }

            Split(root, inputs, validationInputs, thresholds, root.GetHeight(), maxAttributeUsage);
        }
Пример #2
0
        private void Split(DecisionNode root, double[][] inputs, double[][] validationInputs, double[][] thresholds, int height, int[] attributeUsage)
        {
            if (GlobalVariables.GrowCondition.MaxTreeHeight.HasValue && GlobalVariables.GrowCondition.MaxTreeHeight <= height)
            {
                root.Output = inputs.Length;
                return;
            }

            SuggestSplitPoint suggestSplitPoint = null;

            for (int thresholdAttribute = 0; thresholdAttribute < thresholds.Length; thresholdAttribute++)
            {
                for (int thresholdIndex = 0; thresholdIndex < thresholds[thresholdAttribute].Length; thresholdIndex++)
                {
                    var comparisonKind = ComparisonKind.GreaterThanOrEqual;
                    var splitInformationLeft
                        = new SplitInformation(inputs.GetColumn(thresholdAttribute),
                                               validationInputs.GetColumn(thresholdAttribute),
                                               validation.Length,
                                               comparisonKind,
                                               thresholds[thresholdAttribute][thresholdIndex],
                                               learn.Length);

                    var splitInformationRight
                        = new SplitInformation(inputs.GetColumn(thresholdAttribute),
                                               validationInputs.GetColumn(thresholdAttribute),
                                               validation.Length,
                                               comparisonKind.GetOpposed(),
                                               thresholds[thresholdAttribute][thresholdIndex],
                                               learn.Length);

                    var localBestSplitPoint = new SuggestSplitPoint(thresholdAttribute,
                                                                    comparisonKind,
                                                                    thresholds[thresholdAttribute][thresholdIndex],
                                                                    splitInformationLeft,
                                                                    splitInformationRight);

                    // Check 1. both braches have correct size, 2. left branch has correct size and right is 0, 3. right branch has correct size and left is 0,
                    if (GlobalVariables.GrowCondition.MinLeafSize.HasValue
                        &&
                        !((splitInformationLeft.ConfusionMatrix.TruePositives >= GlobalVariables.GrowCondition.MinLeafSize.Value
                           &&
                           splitInformationRight.ConfusionMatrix.TruePositives >= GlobalVariables.GrowCondition.MinLeafSize.Value)
                          ||
                          (splitInformationLeft.ConfusionMatrix.TruePositives >= GlobalVariables.GrowCondition.MinLeafSize.Value
                           &&
                           splitInformationRight.ConfusionMatrix.TruePositives == 0)
                          ||
                          (splitInformationRight.ConfusionMatrix.TruePositives >= GlobalVariables.GrowCondition.MinLeafSize.Value
                           &&
                           splitInformationLeft.ConfusionMatrix.TruePositives == 0)))
                    {
                        continue;
                    }

                    var parentSize = splitInformationLeft.ConfusionMatrix.TruePositives
                                     + splitInformationRight.ConfusionMatrix.TruePositives;

                    if (parentSize <= 1)
                    {
                        continue;
                    }

                    if (suggestSplitPoint == null ||
                        localBestSplitPoint.IsBetterThan(suggestSplitPoint))
                    {
                        suggestSplitPoint = localBestSplitPoint;
                    }
                }
            }

            if (suggestSplitPoint == null || !suggestSplitPoint.IsBetterThanParent(root))
            {
                root.Output = inputs.Length;
                return;
            }

            if (GlobalVariables.GrowCondition.MaxAttributeUsage.HasValue)
            {
                attributeUsage[suggestSplitPoint.AttributeIndex] = attributeUsage[suggestSplitPoint.AttributeIndex] - 1;

                if (attributeUsage[suggestSplitPoint.AttributeIndex] == 0)
                {
                    thresholds[suggestSplitPoint.AttributeIndex] = new double[0];
                }
            }

            var children = new[]
            {
                new SplitDecisionNode(Tree, root, suggestSplitPoint.Left, suggestSplitPoint.ComparisonKind, suggestSplitPoint.SplitValue, suggestSplitPoint.AttributeIndex),
                new SplitDecisionNode(Tree, root, suggestSplitPoint.Right, suggestSplitPoint.ComparisonKind.GetOpposed(), suggestSplitPoint.SplitValue, suggestSplitPoint.AttributeIndex)
            };

            root.Branches.AttributeIndex = suggestSplitPoint.AttributeIndex;
            root.Branches.AddRange(children);

            foreach (var child in children)
            {
                var fulfillingInputs
                    = inputs.CutRowByColumnValue(suggestSplitPoint.AttributeIndex, child.Comparison, child.Value.Value);
                var fulfillingValidation
                    = validationInputs.CutRowByColumnValue(suggestSplitPoint.AttributeIndex, child.Comparison, child.Value.Value);

                var tempTresholds = thresholds.Copy();

                tempTresholds[suggestSplitPoint.AttributeIndex]
                    = tempTresholds[suggestSplitPoint.AttributeIndex].GetVeryfied(child.Comparison, suggestSplitPoint.SplitValue);

                Split(child, fulfillingInputs, fulfillingValidation, tempTresholds, height + 1, attributeUsage?.Copy());
            }
        }