示例#1
0
        /**
         * <summary> Training algorithm for the quadratic discriminant analysis classifier (Introduction to Machine Learning, Alpaydin, 2015).</summary>
         *
         * <param name="trainSet">  Training data given to the algorithm.</param>
         * <param name="parameters">-</param>
         */
        public override void Train(InstanceList.InstanceList trainSet, Parameter.Parameter parameters)
        {
            var w0         = new Dictionary <string, double>();
            var w          = new Dictionary <string, Vector>();
            var W          = new Dictionary <string, Matrix>();
            var classLists = trainSet.DivideIntoClasses();

            var priorDistribution = trainSet.ClassDistribution();

            for (var i = 0; i < classLists.Size(); i++)
            {
                var ci              = ((InstanceListOfSameClass)classLists.Get(i)).GetClassLabel();
                var averageVector   = new Vector(classLists.Get(i).ContinuousAttributeAverage());
                var classCovariance = classLists.Get(i).Covariance(averageVector);
                var determinant     = classCovariance.Determinant();
                classCovariance.Inverse();

                var Wi = (Matrix)classCovariance.Clone();
                Wi.MultiplyWithConstant(-0.5);
                W[ci] = Wi;
                var wi = classCovariance.MultiplyWithVectorFromLeft(averageVector);
                w[ci] = wi;
                var w0i = -0.5 * (wi.DotProduct(averageVector) + System.Math.Log(determinant)) +
                          System.Math.Log(priorDistribution.GetProbability(ci));
                w0[ci] = w0i;
            }

            model = new QdaModel(priorDistribution, W, w, w0);
        }
示例#2
0
        /**
         * <summary> Training algorithm for the linear discriminant analysis classifier (Introduction to Machine Learning, Alpaydin, 2015).</summary>
         *
         * <param name="trainSet">  Training data given to the algorithm.</param>
         * <param name="parameters">-</param>
         */
        public override void Train(InstanceList.InstanceList trainSet, Parameter.Parameter parameters)
        {
            Vector averageVector;
            var    w0 = new Dictionary <string, double>();
            var    w  = new Dictionary <string, Vector>();
            var    priorDistribution = trainSet.ClassDistribution();
            var    classLists        = trainSet.DivideIntoClasses();
            var    covariance        = new Matrix(trainSet.Get(0).ContinuousAttributeSize(),
                                                  trainSet.Get(0).ContinuousAttributeSize());

            for (var i = 0; i < classLists.Size(); i++)
            {
                averageVector = new Vector(classLists.Get(i).ContinuousAttributeAverage());
                var classCovariance = classLists.Get(i).Covariance(averageVector);
                classCovariance.MultiplyWithConstant(classLists.Get(i).Size() - 1);
                covariance.Add(classCovariance);
            }

            covariance.DivideByConstant(trainSet.Size() - classLists.Size());
            covariance.Inverse();

            for (var i = 0; i < classLists.Size(); i++)
            {
                var ci = ((InstanceListOfSameClass)classLists.Get(i)).GetClassLabel();
                averageVector = new Vector(classLists.Get(i).ContinuousAttributeAverage());
                var wi = covariance.MultiplyWithVectorFromRight(averageVector);
                w[ci] = wi;
                var w0i = -0.5 * wi.DotProduct(averageVector) + System.Math.Log(priorDistribution.GetProbability(ci));
                w0[ci] = w0i;
            }

            model = new LdaModel(priorDistribution, w, w0);
        }
        /**
         * <summary> The predict method takes an {@link Instance} as input and performs prediction on the DecisionNodes and returns the prediction
         * for that instance.</summary>
         *
         * <param name="instance">Instance to make prediction.</param>
         * <returns>The prediction for given instance.</returns>
         */
        public string Predict(Instance.Instance instance)
        {
            if (instance is CompositeInstance compositeInstance)
            {
                var possibleClassLabels = compositeInstance.GetPossibleClassLabels();
                var distribution        = _data.ClassDistribution();
                var predictedClass      = distribution.GetMaxItem(possibleClassLabels);
                if (_leaf)
                {
                    return(predictedClass);
                }

                foreach (var node in _children)
                {
                    if (node._condition.Satisfy(compositeInstance))
                    {
                        var childPrediction = node.Predict(compositeInstance);
                        if (childPrediction != null)
                        {
                            return(childPrediction);
                        }

                        return(predictedClass);
                    }
                }

                return(predictedClass);
            }

            if (_leaf)
            {
                return(_classLabel);
            }

            foreach (var node in _children)
            {
                if (node._condition.Satisfy(instance))
                {
                    return(node.Predict(instance));
                }
            }

            return(_classLabel);
        }
        /**
         * <summary> Training algorithm for K-Means classifier. K-Means finds the mean of each class for training.</summary>
         *
         * <param name="trainSet">  Training data given to the algorithm.</param>
         * <param name="parameters">distanceMetric: distance metric used to calculate the distance between two instances.</param>
         */
        public override void Train(InstanceList.InstanceList trainSet, Parameter.Parameter parameters)
        {
            var priorDistribution = trainSet.ClassDistribution();
            var classMeans        = new InstanceList.InstanceList();
            var classLists        = trainSet.DivideIntoClasses();

            for (var i = 0; i < classLists.Size(); i++)
            {
                classMeans.Add(classLists.Get(i).Average());
            }

            model = new KMeansModel(priorDistribution, classMeans, ((KMeansParameter)parameters).GetDistanceMetric());
        }
示例#5
0
        /**
         * <summary> Training algorithm for Naive Bayes algorithm. It basically calls trainContinuousVersion for continuous data sets,
         * trainDiscreteVersion for discrete data sets.</summary>
         * <param name="trainSet">Training data given to the algorithm</param>
         * <param name="parameters">-</param>
         */
        public override void Train(InstanceList.InstanceList trainSet, Parameter.Parameter parameters)
        {
            var priorDistribution = trainSet.ClassDistribution();
            var classLists        = trainSet.DivideIntoClasses();

            if (classLists.Get(0).Get(0).GetAttribute(0) is DiscreteAttribute)
            {
                TrainDiscreteVersion(priorDistribution, classLists);
            }
            else
            {
                TrainContinuousVersion(priorDistribution, classLists);
            }
        }
        /**
         * <summary> The DecisionNode method takes {@link InstanceList} data as input and then it sets the class label parameter by finding
         * the most occurred class label of given data, it then gets distinct class labels as class labels List. Later, it adds ordered
         * indices to the indexList and shuffles them randomly. Then, it gets the class distribution of given data and finds the best entropy value
         * of these class distribution.
         * <p/>
         * If an attribute of given data is {@link DiscreteIndexedAttribute}, it creates a Distribution according to discrete indexed attribute class distribution
         * and finds the entropy. If it is better than the last best entropy it reassigns the best entropy, best attribute and best split value according to
         * the newly founded best entropy's index. At the end, it also add new distribution to the class distribution .
         * <p/>
         * If an attribute of given data is {@link DiscreteAttribute}, it directly finds the entropy. If it is better than the last best entropy it
         * reassigns the best entropy, best attribute and best split value according to the newly founded best entropy's index.
         * <p/>
         * If an attribute of given data is {@link ContinuousAttribute}, it creates two distributions; left and right according to class distribution
         * and discrete distribution respectively, and finds the entropy. If it is better than the last best entropy it reassigns the best entropy,
         * best attribute and best split value according to the newly founded best entropy's index. At the end, it also add new distribution to
         * the right distribution and removes from left distribution.</summary>
         *
         * <param name="data">     {@link InstanceList} input.</param>
         * <param name="condition">{@link DecisionCondition} to check.</param>
         * <param name="parameter">RandomForestParameter like seed, ensembleSize, attributeSubsetSize.</param>
         * <param name="isStump">  Refers to decision trees with only 1 splitting rule.</param>
         */
        public DecisionNode(InstanceList.InstanceList data, DecisionCondition condition,
                            RandomForestParameter parameter,
                            bool isStump)
        {
            int    bestAttribute = -1, size;
            double bestSplitValue = 0;

            this._condition = condition;
            this._data      = data;
            _classLabel     = Classifier.Classifier.GetMaximum(data.GetClassLabels());
            _leaf           = true;
            var classLabels = data.GetDistinctClassLabels();

            if (classLabels.Count == 1)
            {
                return;
            }

            if (isStump && condition != null)
            {
                return;
            }

            var indexList = new List <int>();

            for (var i = 0; i < data.Get(0).AttributeSize(); i++)
            {
                indexList.Add(i);
            }

            if (parameter != null && parameter.GetAttributeSubsetSize() < data.Get(0).AttributeSize())
            {
                size = parameter.GetAttributeSubsetSize();
            }
            else
            {
                size = data.Get(0).AttributeSize();
            }

            var classDistribution = data.ClassDistribution();
            var bestEntropy       = data.ClassDistribution().Entropy();

            for (var j = 0; j < size; j++)
            {
                var    index = indexList[j];
                double entropy;
                if (data.Get(0).GetAttribute(index) is DiscreteIndexedAttribute)
                {
                    for (var k = 0; k < ((DiscreteIndexedAttribute)data.Get(0).GetAttribute(index)).GetMaxIndex(); k++)
                    {
                        var distribution = data.DiscreteIndexedAttributeClassDistribution(index, k);
                        if (distribution.GetSum() > 0)
                        {
                            classDistribution.RemoveDistribution(distribution);
                            entropy = (classDistribution.Entropy() * classDistribution.GetSum() +
                                       distribution.Entropy() * distribution.GetSum()) / data.Size();
                            if (entropy < bestEntropy)
                            {
                                bestEntropy    = entropy;
                                bestAttribute  = index;
                                bestSplitValue = k;
                            }

                            classDistribution.AddDistribution(distribution);
                        }
                    }
                }
                else
                {
                    if (data.Get(0).GetAttribute(index) is DiscreteAttribute)
                    {
                        entropy = EntropyForDiscreteAttribute(index);
                        if (entropy < bestEntropy)
                        {
                            bestEntropy   = entropy;
                            bestAttribute = index;
                        }
                    }
                    else
                    {
                        if (data.Get(0).GetAttribute(index) is ContinuousAttribute)
                        {
                            data.Sort(index);
                            var previousValue     = double.MinValue;
                            var leftDistribution  = data.ClassDistribution();
                            var rightDistribution = new DiscreteDistribution();
                            for (var k = 0; k < data.Size(); k++)
                            {
                                var instance = data.Get(k);
                                if (k == 0)
                                {
                                    previousValue = ((ContinuousAttribute)instance.GetAttribute(index)).GetValue();
                                }
                                else
                                {
                                    if (((ContinuousAttribute)instance.GetAttribute(index)).GetValue() !=
                                        previousValue)
                                    {
                                        var splitValue =
                                            (previousValue + ((ContinuousAttribute)instance.GetAttribute(index))
                                             .GetValue()) / 2;
                                        previousValue = ((ContinuousAttribute)instance.GetAttribute(index)).GetValue();
                                        entropy       =
                                            (leftDistribution.GetSum() / data.Size()) * leftDistribution.Entropy() +
                                            (rightDistribution.GetSum() / data.Size()) * rightDistribution.Entropy();
                                        if (entropy < bestEntropy)
                                        {
                                            bestEntropy    = entropy;
                                            bestSplitValue = splitValue;
                                            bestAttribute  = index;
                                        }
                                    }
                                }

                                leftDistribution.RemoveItem(instance.GetClassLabel());
                                rightDistribution.AddItem(instance.GetClassLabel());
                            }
                        }
                    }
                }
            }

            if (bestAttribute != -1)
            {
                _leaf = false;
                if (data.Get(0).GetAttribute(bestAttribute) is DiscreteIndexedAttribute)
                {
                    CreateChildrenForDiscreteIndexed(bestAttribute, (int)bestSplitValue, parameter, isStump);
                }
                else
                {
                    if (data.Get(0).GetAttribute(bestAttribute) is DiscreteAttribute)
                    {
                        CreateChildrenForDiscrete(bestAttribute, parameter, isStump);
                    }
                    else
                    {
                        if (data.Get(0).GetAttribute(bestAttribute) is ContinuousAttribute)
                        {
                            CreateChildrenForContinuous(bestAttribute, bestSplitValue, parameter, isStump);
                        }
                    }
                }
            }
        }
示例#7
0
 /**
  * <summary> Returns the size of the class label distribution of {@link InstanceList}.</summary>
  *
  * <returns>Size of the class label distribution of {@link InstanceList}.</returns>
  */
 public int ClassCount()
 {
     return(_instances.ClassDistribution().Count);
 }
 /**
  * <summary> Constructor which sets the distribution using the given {@link InstanceList}.</summary>
  *
  * <param name="trainSet">{@link InstanceList} which is used to get the class distribution.</param>
  */
 public DummyModel(InstanceList.InstanceList trainSet)
 {
     this._distribution = trainSet.ClassDistribution();
 }
 /**
  * <summary> Training algorithm for random classifier.</summary>
  *
  * <param name="trainSet">  Training data given to the algorithm.</param>
  * <param name="parameters">-</param>
  */
 public override void Train(InstanceList.InstanceList trainSet, Parameter.Parameter parameters)
 {
     model = new RandomModel(new List <String>(trainSet.ClassDistribution().Keys), parameters.GetSeed());
 }