/**
         * <summary> The createChildrenForDiscrete method creates an List of values, a partition with respect to attributes and an List
         * of DecisionNodes as children.</summary>
         *
         * <param name="attributeIndex">Index of the attribute.</param>
         * <param name="parameter">     RandomForestParameter like seed, ensembleSize, attributeSubsetSize.</param>
         * <param name="isStump">       Refers to decision trees with only 1 splitting rule.</param>
         */
        private void CreateChildrenForDiscrete(int attributeIndex, RandomForestParameter parameter, bool isStump)
        {
            var valueList    = _data.GetAttributeValueList(attributeIndex);
            var childrenData = _data.DivideWithRespectToAttribute(attributeIndex);

            _children = new List <DecisionNode>();
            for (var i = 0; i < valueList.Count; i++)
            {
                _children.Add(new DecisionNode(childrenData.Get(i),
                                               new DecisionCondition(attributeIndex, new DiscreteAttribute(valueList[i])), parameter,
                                               isStump));
            }
        }
Exemple #2
0
        public void TestTrain()
        {
            var randomForest          = new RandomForest();
            var randomForestParameter = new RandomForestParameter(1, 100, 35);

            randomForest.Train(iris.GetInstanceList(), randomForestParameter);
            Assert.AreEqual(2.00, 100 * randomForest.Test(iris.GetInstanceList()).GetErrorRate(), 0.01);
            randomForest.Train(bupa.GetInstanceList(), randomForestParameter);
            Assert.AreEqual(42.03, 100 * randomForest.Test(bupa.GetInstanceList()).GetErrorRate(), 0.01);
            randomForest.Train(dermatology.GetInstanceList(), randomForestParameter);
            Assert.AreEqual(2.46, 100 * randomForest.Test(dermatology.GetInstanceList()).GetErrorRate(), 0.01);
            randomForest.Train(car.GetInstanceList(), randomForestParameter);
            Assert.AreEqual(0.0, 100 * randomForest.Test(car.GetInstanceList()).GetErrorRate(), 0.01);
            randomForest.Train(tictactoe.GetInstanceList(), randomForestParameter);
            Assert.AreEqual(0.0, 100 * randomForest.Test(tictactoe.GetInstanceList()).GetErrorRate(), 0.01);
            randomForest.Train(nursery.GetInstanceList(), randomForestParameter);
            Assert.AreEqual(0.0, 100 * randomForest.Test(nursery.GetInstanceList()).GetErrorRate(), 0.01);
        }
        /**
         * <summary> The createChildrenForDiscreteIndexed method creates an List of DecisionNodes as children and a partition with respect to
         * indexed attribute.</summary>
         *
         * <param name="attributeIndex">Index of the attribute.</param>
         * <param name="attributeValue">Value of the attribute.</param>
         * <param name="parameter">     RandomForestParameter like seed, ensembleSize, attributeSubsetSize.</param>
         * <param name="isStump">       Refers to decision trees with only 1 splitting rule.</param>
         */
        private void CreateChildrenForDiscreteIndexed(int attributeIndex, int attributeValue,
                                                      RandomForestParameter parameter, bool isStump)
        {
            var childrenData = _data.DivideWithRespectToIndexedAttribute(attributeIndex, attributeValue);

            _children = new List <DecisionNode>
            {
                new DecisionNode(childrenData.Get(0),
                                 new DecisionCondition(attributeIndex,
                                                       new DiscreteIndexedAttribute("", attributeValue,
                                                                                    ((DiscreteIndexedAttribute)_data.Get(0).GetAttribute(attributeIndex)).GetMaxIndex())),
                                 parameter, isStump),
                new DecisionNode(childrenData.Get(1),
                                 new DecisionCondition(attributeIndex,
                                                       new DiscreteIndexedAttribute("", -1,
                                                                                    ((DiscreteIndexedAttribute)_data.Get(0).GetAttribute(attributeIndex)).GetMaxIndex())),
                                 parameter, isStump)
            };
        }
        /**
         * <summary> The DecisionNode method takes {@link InstanceList} data as input and then it sets the class label parameter by finding
         * the most occurred class label of given data, it then gets distinct class labels as class labels List. Later, it adds ordered
         * indices to the indexList and shuffles them randomly. Then, it gets the class distribution of given data and finds the best entropy value
         * of these class distribution.
         * <p/>
         * If an attribute of given data is {@link DiscreteIndexedAttribute}, it creates a Distribution according to discrete indexed attribute class distribution
         * and finds the entropy. If it is better than the last best entropy it reassigns the best entropy, best attribute and best split value according to
         * the newly founded best entropy's index. At the end, it also add new distribution to the class distribution .
         * <p/>
         * If an attribute of given data is {@link DiscreteAttribute}, it directly finds the entropy. If it is better than the last best entropy it
         * reassigns the best entropy, best attribute and best split value according to the newly founded best entropy's index.
         * <p/>
         * If an attribute of given data is {@link ContinuousAttribute}, it creates two distributions; left and right according to class distribution
         * and discrete distribution respectively, and finds the entropy. If it is better than the last best entropy it reassigns the best entropy,
         * best attribute and best split value according to the newly founded best entropy's index. At the end, it also add new distribution to
         * the right distribution and removes from left distribution.</summary>
         *
         * <param name="data">     {@link InstanceList} input.</param>
         * <param name="condition">{@link DecisionCondition} to check.</param>
         * <param name="parameter">RandomForestParameter like seed, ensembleSize, attributeSubsetSize.</param>
         * <param name="isStump">  Refers to decision trees with only 1 splitting rule.</param>
         */
        public DecisionNode(InstanceList.InstanceList data, DecisionCondition condition,
                            RandomForestParameter parameter,
                            bool isStump)
        {
            int    bestAttribute = -1, size;
            double bestSplitValue = 0;

            this._condition = condition;
            this._data      = data;
            _classLabel     = Classifier.Classifier.GetMaximum(data.GetClassLabels());
            _leaf           = true;
            var classLabels = data.GetDistinctClassLabels();

            if (classLabels.Count == 1)
            {
                return;
            }

            if (isStump && condition != null)
            {
                return;
            }

            var indexList = new List <int>();

            for (var i = 0; i < data.Get(0).AttributeSize(); i++)
            {
                indexList.Add(i);
            }

            if (parameter != null && parameter.GetAttributeSubsetSize() < data.Get(0).AttributeSize())
            {
                size = parameter.GetAttributeSubsetSize();
            }
            else
            {
                size = data.Get(0).AttributeSize();
            }

            var classDistribution = data.ClassDistribution();
            var bestEntropy       = data.ClassDistribution().Entropy();

            for (var j = 0; j < size; j++)
            {
                var    index = indexList[j];
                double entropy;
                if (data.Get(0).GetAttribute(index) is DiscreteIndexedAttribute)
                {
                    for (var k = 0; k < ((DiscreteIndexedAttribute)data.Get(0).GetAttribute(index)).GetMaxIndex(); k++)
                    {
                        var distribution = data.DiscreteIndexedAttributeClassDistribution(index, k);
                        if (distribution.GetSum() > 0)
                        {
                            classDistribution.RemoveDistribution(distribution);
                            entropy = (classDistribution.Entropy() * classDistribution.GetSum() +
                                       distribution.Entropy() * distribution.GetSum()) / data.Size();
                            if (entropy < bestEntropy)
                            {
                                bestEntropy    = entropy;
                                bestAttribute  = index;
                                bestSplitValue = k;
                            }

                            classDistribution.AddDistribution(distribution);
                        }
                    }
                }
                else
                {
                    if (data.Get(0).GetAttribute(index) is DiscreteAttribute)
                    {
                        entropy = EntropyForDiscreteAttribute(index);
                        if (entropy < bestEntropy)
                        {
                            bestEntropy   = entropy;
                            bestAttribute = index;
                        }
                    }
                    else
                    {
                        if (data.Get(0).GetAttribute(index) is ContinuousAttribute)
                        {
                            data.Sort(index);
                            var previousValue     = double.MinValue;
                            var leftDistribution  = data.ClassDistribution();
                            var rightDistribution = new DiscreteDistribution();
                            for (var k = 0; k < data.Size(); k++)
                            {
                                var instance = data.Get(k);
                                if (k == 0)
                                {
                                    previousValue = ((ContinuousAttribute)instance.GetAttribute(index)).GetValue();
                                }
                                else
                                {
                                    if (((ContinuousAttribute)instance.GetAttribute(index)).GetValue() !=
                                        previousValue)
                                    {
                                        var splitValue =
                                            (previousValue + ((ContinuousAttribute)instance.GetAttribute(index))
                                             .GetValue()) / 2;
                                        previousValue = ((ContinuousAttribute)instance.GetAttribute(index)).GetValue();
                                        entropy       =
                                            (leftDistribution.GetSum() / data.Size()) * leftDistribution.Entropy() +
                                            (rightDistribution.GetSum() / data.Size()) * rightDistribution.Entropy();
                                        if (entropy < bestEntropy)
                                        {
                                            bestEntropy    = entropy;
                                            bestSplitValue = splitValue;
                                            bestAttribute  = index;
                                        }
                                    }
                                }

                                leftDistribution.RemoveItem(instance.GetClassLabel());
                                rightDistribution.AddItem(instance.GetClassLabel());
                            }
                        }
                    }
                }
            }

            if (bestAttribute != -1)
            {
                _leaf = false;
                if (data.Get(0).GetAttribute(bestAttribute) is DiscreteIndexedAttribute)
                {
                    CreateChildrenForDiscreteIndexed(bestAttribute, (int)bestSplitValue, parameter, isStump);
                }
                else
                {
                    if (data.Get(0).GetAttribute(bestAttribute) is DiscreteAttribute)
                    {
                        CreateChildrenForDiscrete(bestAttribute, parameter, isStump);
                    }
                    else
                    {
                        if (data.Get(0).GetAttribute(bestAttribute) is ContinuousAttribute)
                        {
                            CreateChildrenForContinuous(bestAttribute, bestSplitValue, parameter, isStump);
                        }
                    }
                }
            }
        }
        /**
         * <summary> The createChildrenForContinuous method creates an List of DecisionNodes as children and a partition with respect to
         * continuous attribute and the given split value.</summary>
         *
         * <param name="attributeIndex">Index of the attribute.</param>
         * <param name="parameter">     RandomForestParameter like seed, ensembleSize, attributeSubsetSize.</param>
         * <param name="isStump">       Refers to decision trees with only 1 splitting rule.</param>
         * <param name="splitValue">    Split value is used for partitioning.</param>
         */
        private void CreateChildrenForContinuous(int attributeIndex, double splitValue, RandomForestParameter parameter,
                                                 bool isStump)
        {
            var childrenData = _data.DivideWithRespectToAttribute(attributeIndex, splitValue);

            _children = new List <DecisionNode>
            {
                new DecisionNode(childrenData.Get(0),
                                 new DecisionCondition(attributeIndex, '<', new ContinuousAttribute(splitValue)), parameter,
                                 isStump),
                new DecisionNode(childrenData.Get(1),
                                 new DecisionCondition(attributeIndex, '>', new ContinuousAttribute(splitValue)), parameter, isStump)
            };
        }