Пример #1
0
        public SubsetMask(BitArrayX mask, int size, int[] output, int outputFactorsCount)
        {
            Mask              = mask;
            Size              = size;
            Entropy           = 0d;
            EntropyZeroFactor = -1;

            for (var factor = 0; factor < outputFactorsCount; factor++)
            {
                var count = 0;
                for (var i = 0; i < output.Length; i++)
                {
                    if (mask[i] && output[i] == factor)
                    {
                        count++;
                    }
                }

                if (count == Size)
                {
                    Entropy           = 0d;
                    EntropyZeroFactor = factor;
                    return;
                }

                if (count > 0)
                {
                    var px = (double)count / Size;
                    Entropy -= px * Math.Log(px, 2);
                }
            }
        }
Пример #2
0
        public void Learn()
        {
            var rootMask = new BitArrayX(input.Length, true);

            Root = new DecisionNode
            {
                SubsetMask  = new SubsetMask(rootMask, input.Length, output, outputFactorsCount),
                UsedFactors = new BitArrayX(inputFactorsCount)
            };

            var stack = new Stack <DecisionNode>();

            stack.Push(Root);

            while (stack.Count > 0)
            {
                var node = stack.Pop();

                SplitNode(node);

                if (node.IsLeaf)
                {
                    continue;
                }

                stack.Push(node.Present);
                stack.Push(node.Absent);
            }
        }
Пример #3
0
        public void CanCreateAllTrues()
        {
            for (var length = 0; length < 200; length++)
            {
                var expected = new BitArrayX(length);
                for (var i = 0; i < length; i++)
                {
                    expected[i] = true;
                }

                var actual    = new BitArrayX(length, true);
                var isCorrect = BitArrayX.Equals(expected, actual);

                Assert.IsTrue(isCorrect);
            }
        }
Пример #4
0
        private void SplitNode(DecisionNode node)
        {
            var mask = node.SubsetMask.Mask;
            var size = node.SubsetMask.Size;

            if (node.SubsetMask.EntropyZeroFactor >= 0)
            {
                node.IsLeaf = true;
                node.Factor = node.SubsetMask.EntropyZeroFactor;
                return;
            }

            var bestInformationGain  = double.MinValue;
            var bestFactor           = -1;
            var bestPresentSplitMask = default(SubsetMask);
            var bestAbsentSplitMask  = default(SubsetMask);

            for (var inputFactor = 0; inputFactor < inputFactorsCount; inputFactor++)
            {
                if (node.UsedFactors[inputFactor])
                {
                    continue;
                }

                var factorPresentMask = new BitArrayX(mask).And(factorPresentMasks[inputFactor]);
                var factorAbsentMask  = new BitArrayX(mask).AndNot(factorPresentMask);

                var factorPresentCount = factorPresentMask.CountBitSet();
                var factorAbsentCount  = size - factorPresentCount;

                var factorPresentSubset = new SubsetMask(factorPresentMask, factorPresentCount, output, outputFactorsCount);
                var factorAbsentSubset  = new SubsetMask(factorAbsentMask, factorAbsentCount, output, outputFactorsCount);

                var informationGain = node.SubsetMask.Entropy -
                                      (factorPresentSubset.Entropy * factorPresentCount / size) -
                                      (factorAbsentSubset.Entropy * factorAbsentCount / size);

                if (informationGain > bestInformationGain)
                {
                    bestInformationGain  = informationGain;
                    bestFactor           = inputFactor;
                    bestPresentSplitMask = factorPresentSubset;
                    bestAbsentSplitMask  = factorAbsentSubset;
                }
            }

            if (bestFactor == -1)
            {
                var counts = new int[outputFactorsCount];
                for (var i = 0; i < output.Length; i++)
                {
                    if (mask[i])
                    {
                        counts[output[i]]++;
                    }
                }

                node.IsLeaf = true;
                node.Factor = counts.IndexOfMax();
                return;
            }

            var childUsedFactors = new BitArrayX(node.UsedFactors)
            {
                [bestFactor] = true
            };

            node.Factor = bestFactor;

            node.Present = new DecisionNode
            {
                SubsetMask  = bestPresentSplitMask,
                Factor      = bestFactor,
                Parent      = node,
                UsedFactors = childUsedFactors,
            };

            node.Absent = new DecisionNode
            {
                SubsetMask  = bestAbsentSplitMask,
                Factor      = bestFactor,
                Parent      = node,
                UsedFactors = childUsedFactors,
            };
        }