示例#1
0
        /**
         * <summary> Returns the accumulated {@link String} of class labels of the {@link InstanceList}.</summary>
         *
         * <returns>The accumulated {@link String} of class labels of the {@link InstanceList}.</returns>
         */
        public string GetClasses()
        {
            var classLabels = _instances.GetDistinctClassLabels();
            var result      = classLabels[0];

            for (var i = 1; i < classLabels.Count; i++)
            {
                result = result + ";" + classLabels[i];
            }

            return(result);
        }
        /**
         * <summary> The DecisionNode method takes {@link InstanceList} data as input and then it sets the class label parameter by finding
         * the most occurred class label of given data, it then gets distinct class labels as class labels List. Later, it adds ordered
         * indices to the indexList and shuffles them randomly. Then, it gets the class distribution of given data and finds the best entropy value
         * of these class distribution.
         * <p/>
         * If an attribute of given data is {@link DiscreteIndexedAttribute}, it creates a Distribution according to discrete indexed attribute class distribution
         * and finds the entropy. If it is better than the last best entropy it reassigns the best entropy, best attribute and best split value according to
         * the newly founded best entropy's index. At the end, it also add new distribution to the class distribution .
         * <p/>
         * If an attribute of given data is {@link DiscreteAttribute}, it directly finds the entropy. If it is better than the last best entropy it
         * reassigns the best entropy, best attribute and best split value according to the newly founded best entropy's index.
         * <p/>
         * If an attribute of given data is {@link ContinuousAttribute}, it creates two distributions; left and right according to class distribution
         * and discrete distribution respectively, and finds the entropy. If it is better than the last best entropy it reassigns the best entropy,
         * best attribute and best split value according to the newly founded best entropy's index. At the end, it also add new distribution to
         * the right distribution and removes from left distribution.</summary>
         *
         * <param name="data">     {@link InstanceList} input.</param>
         * <param name="condition">{@link DecisionCondition} to check.</param>
         * <param name="parameter">RandomForestParameter like seed, ensembleSize, attributeSubsetSize.</param>
         * <param name="isStump">  Refers to decision trees with only 1 splitting rule.</param>
         */
        public DecisionNode(InstanceList.InstanceList data, DecisionCondition condition,
                            RandomForestParameter parameter,
                            bool isStump)
        {
            int    bestAttribute = -1, size;
            double bestSplitValue = 0;

            this._condition = condition;
            this._data      = data;
            _classLabel     = Classifier.Classifier.GetMaximum(data.GetClassLabels());
            _leaf           = true;
            var classLabels = data.GetDistinctClassLabels();

            if (classLabels.Count == 1)
            {
                return;
            }

            if (isStump && condition != null)
            {
                return;
            }

            var indexList = new List <int>();

            for (var i = 0; i < data.Get(0).AttributeSize(); i++)
            {
                indexList.Add(i);
            }

            if (parameter != null && parameter.GetAttributeSubsetSize() < data.Get(0).AttributeSize())
            {
                size = parameter.GetAttributeSubsetSize();
            }
            else
            {
                size = data.Get(0).AttributeSize();
            }

            var classDistribution = data.ClassDistribution();
            var bestEntropy       = data.ClassDistribution().Entropy();

            for (var j = 0; j < size; j++)
            {
                var    index = indexList[j];
                double entropy;
                if (data.Get(0).GetAttribute(index) is DiscreteIndexedAttribute)
                {
                    for (var k = 0; k < ((DiscreteIndexedAttribute)data.Get(0).GetAttribute(index)).GetMaxIndex(); k++)
                    {
                        var distribution = data.DiscreteIndexedAttributeClassDistribution(index, k);
                        if (distribution.GetSum() > 0)
                        {
                            classDistribution.RemoveDistribution(distribution);
                            entropy = (classDistribution.Entropy() * classDistribution.GetSum() +
                                       distribution.Entropy() * distribution.GetSum()) / data.Size();
                            if (entropy < bestEntropy)
                            {
                                bestEntropy    = entropy;
                                bestAttribute  = index;
                                bestSplitValue = k;
                            }

                            classDistribution.AddDistribution(distribution);
                        }
                    }
                }
                else
                {
                    if (data.Get(0).GetAttribute(index) is DiscreteAttribute)
                    {
                        entropy = EntropyForDiscreteAttribute(index);
                        if (entropy < bestEntropy)
                        {
                            bestEntropy   = entropy;
                            bestAttribute = index;
                        }
                    }
                    else
                    {
                        if (data.Get(0).GetAttribute(index) is ContinuousAttribute)
                        {
                            data.Sort(index);
                            var previousValue     = double.MinValue;
                            var leftDistribution  = data.ClassDistribution();
                            var rightDistribution = new DiscreteDistribution();
                            for (var k = 0; k < data.Size(); k++)
                            {
                                var instance = data.Get(k);
                                if (k == 0)
                                {
                                    previousValue = ((ContinuousAttribute)instance.GetAttribute(index)).GetValue();
                                }
                                else
                                {
                                    if (((ContinuousAttribute)instance.GetAttribute(index)).GetValue() !=
                                        previousValue)
                                    {
                                        var splitValue =
                                            (previousValue + ((ContinuousAttribute)instance.GetAttribute(index))
                                             .GetValue()) / 2;
                                        previousValue = ((ContinuousAttribute)instance.GetAttribute(index)).GetValue();
                                        entropy       =
                                            (leftDistribution.GetSum() / data.Size()) * leftDistribution.Entropy() +
                                            (rightDistribution.GetSum() / data.Size()) * rightDistribution.Entropy();
                                        if (entropy < bestEntropy)
                                        {
                                            bestEntropy    = entropy;
                                            bestSplitValue = splitValue;
                                            bestAttribute  = index;
                                        }
                                    }
                                }

                                leftDistribution.RemoveItem(instance.GetClassLabel());
                                rightDistribution.AddItem(instance.GetClassLabel());
                            }
                        }
                    }
                }
            }

            if (bestAttribute != -1)
            {
                _leaf = false;
                if (data.Get(0).GetAttribute(bestAttribute) is DiscreteIndexedAttribute)
                {
                    CreateChildrenForDiscreteIndexed(bestAttribute, (int)bestSplitValue, parameter, isStump);
                }
                else
                {
                    if (data.Get(0).GetAttribute(bestAttribute) is DiscreteAttribute)
                    {
                        CreateChildrenForDiscrete(bestAttribute, parameter, isStump);
                    }
                    else
                    {
                        if (data.Get(0).GetAttribute(bestAttribute) is ContinuousAttribute)
                        {
                            CreateChildrenForContinuous(bestAttribute, bestSplitValue, parameter, isStump);
                        }
                    }
                }
            }
        }
 /**
  * <summary> Constructor that sets the class labels, their sizes as K and the size of the continuous attributes as d.</summary>
  *
  * <param name="trainSet">{@link InstanceList} to use as train set.</param>
  */
 public NeuralNetworkModel(InstanceList.InstanceList trainSet)
 {
     classLabels = trainSet.GetDistinctClassLabels();
     K           = classLabels.Count;
     d           = trainSet.Get(0).ContinuousAttributeSize();
 }