예제 #1
0
        public void Learn()
        {
            var rootMask = new BitArrayX(input.Length, true);

            Root = new DecisionNode
            {
                SubsetMask  = new SubsetMask(rootMask, input.Length, output, outputFactorsCount),
                UsedFactors = new BitArrayX(inputFactorsCount)
            };

            var stack = new Stack <DecisionNode>();

            stack.Push(Root);

            while (stack.Count > 0)
            {
                var node = stack.Pop();

                SplitNode(node);

                if (node.IsLeaf)
                {
                    continue;
                }

                stack.Push(node.Present);
                stack.Push(node.Absent);
            }
        }
예제 #2
0
        private static IEnumerable <DecisionNode> GetChildren(DecisionNode node)
        {
            if (!node.IsLeaf)
            {
                yield return(node.Present);

                yield return(node.Absent);
            }
        }
예제 #3
0
        private string ToString(DecisionNode node)
        {
            var factor = node.IsLeaf
                                ? OutputFactors[node.Factor]
                                : InputFactors[node.Factor];

            return($"[{indices[node]}] " +
                   (node.IsLeaf
                                           ? $"Return {factor}"
                                           : $"If {factor} present {indices[node.Present]}, absent {indices[node.Absent]}"));
        }
예제 #4
0
        private void SplitNode(DecisionNode node)
        {
            var mask = node.SubsetMask.Mask;
            var size = node.SubsetMask.Size;

            if (node.SubsetMask.EntropyZeroFactor >= 0)
            {
                node.IsLeaf = true;
                node.Factor = node.SubsetMask.EntropyZeroFactor;
                return;
            }

            var bestInformationGain  = double.MinValue;
            var bestFactor           = -1;
            var bestPresentSplitMask = default(SubsetMask);
            var bestAbsentSplitMask  = default(SubsetMask);

            for (var inputFactor = 0; inputFactor < inputFactorsCount; inputFactor++)
            {
                if (node.UsedFactors[inputFactor])
                {
                    continue;
                }

                var factorPresentMask = new BitArrayX(mask).And(factorPresentMasks[inputFactor]);
                var factorAbsentMask  = new BitArrayX(mask).AndNot(factorPresentMask);

                var factorPresentCount = factorPresentMask.CountBitSet();
                var factorAbsentCount  = size - factorPresentCount;

                var factorPresentSubset = new SubsetMask(factorPresentMask, factorPresentCount, output, outputFactorsCount);
                var factorAbsentSubset  = new SubsetMask(factorAbsentMask, factorAbsentCount, output, outputFactorsCount);

                var informationGain = node.SubsetMask.Entropy -
                                      (factorPresentSubset.Entropy * factorPresentCount / size) -
                                      (factorAbsentSubset.Entropy * factorAbsentCount / size);

                if (informationGain > bestInformationGain)
                {
                    bestInformationGain  = informationGain;
                    bestFactor           = inputFactor;
                    bestPresentSplitMask = factorPresentSubset;
                    bestAbsentSplitMask  = factorAbsentSubset;
                }
            }

            if (bestFactor == -1)
            {
                var counts = new int[outputFactorsCount];
                for (var i = 0; i < output.Length; i++)
                {
                    if (mask[i])
                    {
                        counts[output[i]]++;
                    }
                }

                node.IsLeaf = true;
                node.Factor = counts.IndexOfMax();
                return;
            }

            var childUsedFactors = new BitArrayX(node.UsedFactors)
            {
                [bestFactor] = true
            };

            node.Factor = bestFactor;

            node.Present = new DecisionNode
            {
                SubsetMask  = bestPresentSplitMask,
                Factor      = bestFactor,
                Parent      = node,
                UsedFactors = childUsedFactors,
            };

            node.Absent = new DecisionNode
            {
                SubsetMask  = bestAbsentSplitMask,
                Factor      = bestFactor,
                Parent      = node,
                UsedFactors = childUsedFactors,
            };
        }