示例#1
0
        /**
         * <summary> Training algorithm for the linear discriminant analysis classifier (Introduction to Machine Learning, Alpaydin, 2015).</summary>
         *
         * <param name="trainSet">  Training data given to the algorithm.</param>
         * <param name="parameters">-</param>
         */
        public override void Train(InstanceList.InstanceList trainSet, Parameter.Parameter parameters)
        {
            Vector averageVector;
            var    w0 = new Dictionary <string, double>();
            var    w  = new Dictionary <string, Vector>();
            var    priorDistribution = trainSet.ClassDistribution();
            var    classLists        = trainSet.DivideIntoClasses();
            var    covariance        = new Matrix(trainSet.Get(0).ContinuousAttributeSize(),
                                                  trainSet.Get(0).ContinuousAttributeSize());

            for (var i = 0; i < classLists.Size(); i++)
            {
                averageVector = new Vector(classLists.Get(i).ContinuousAttributeAverage());
                var classCovariance = classLists.Get(i).Covariance(averageVector);
                classCovariance.MultiplyWithConstant(classLists.Get(i).Size() - 1);
                covariance.Add(classCovariance);
            }

            covariance.DivideByConstant(trainSet.Size() - classLists.Size());
            covariance.Inverse();

            for (var i = 0; i < classLists.Size(); i++)
            {
                var ci = ((InstanceListOfSameClass)classLists.Get(i)).GetClassLabel();
                averageVector = new Vector(classLists.Get(i).ContinuousAttributeAverage());
                var wi = covariance.MultiplyWithVectorFromRight(averageVector);
                w[ci] = wi;
                var w0i = -0.5 * wi.DotProduct(averageVector) + System.Math.Log(priorDistribution.GetProbability(ci));
                w0[ci] = w0i;
            }

            model = new LdaModel(priorDistribution, w, w0);
        }
示例#2
0
        /**
         * <summary> Constructor that takes {@link InstanceList}s as trainsSet and validationSet. Initially it allocates layer weights,
         * then creates an input vector by using given trainSet and finds error. Via the validationSet it finds the classification
         * performance and at the end it reassigns the allocated weight Matrix with the matrix that has the best accuracy.</summary>
         *
         * <param name="trainSet">     InstanceList that is used to train.</param>
         * <param name="validationSet">InstanceList that is used to validate.</param>
         * <param name="parameters">   Linear perceptron parameters; learningRate, etaDecrease, crossValidationRatio, epoch.</param>
         */
        public LinearPerceptronModel(InstanceList.InstanceList trainSet, InstanceList.InstanceList validationSet,
                                     LinearPerceptronParameter parameters) : base(trainSet)
        {
            W = AllocateLayerWeights(K, d + 1, new Random(parameters.GetSeed()));
            var bestW = (Matrix)W.Clone();
            var bestClassificationPerformance = new ClassificationPerformance(0.0);
            var epoch        = parameters.GetEpoch();
            var learningRate = parameters.GetLearningRate();

            for (var i = 0; i < epoch; i++)
            {
                trainSet.Shuffle(parameters.GetSeed());
                for (var j = 0; j < trainSet.Size(); j++)
                {
                    CreateInputVector(trainSet.Get(j));
                    var rMinusY = CalculateRMinusY(trainSet.Get(j), x, W);
                    var deltaW  = rMinusY.Multiply(x);
                    deltaW.MultiplyWithConstant(learningRate);
                    W.Add(deltaW);
                }

                var currentClassificationPerformance = TestClassifier(validationSet);
                if (currentClassificationPerformance.GetAccuracy() > bestClassificationPerformance.GetAccuracy())
                {
                    bestClassificationPerformance = currentClassificationPerformance;
                    bestW = (Matrix)W.Clone();
                }

                learningRate *= parameters.GetEtaDecrease();
            }

            W = bestW;
        }
示例#3
0
        /**
         * <summary>Return a subset generated via the given {@link FeatureSubSet}.</summary>
         *
         * <param name="featureSubSet">{@link FeatureSubSet} input.</param>
         * <returns>Subset generated via the given {@link FeatureSubSet}.</returns>
         */
        public DataSet GetSubSetOfFeatures(FeatureSubSet featureSubSet)
        {
            var result = new DataSet(_definition.GetSubSetOfFeatures(featureSubSet));

            for (var i = 0; i < _instances.Size(); i++)
            {
                result.AddInstance(_instances.Get(i).GetSubSetOfFeatures(featureSubSet));
            }

            return(result);
        }
        /**
         * <summary> The calculateMetric method takes an {@link Instance} and a String as inputs. It loops through the class means, if
         * the corresponding class label is same as the given String it returns the negated distance between given instance and the
         * current item of class means. Otherwise it returns the smallest negative number.</summary>
         *
         * <param name="instance">{@link Instance} input.</param>
         * <param name="Ci">      String input.</param>
         * <returns>The negated distance between given instance and the current item of class means.</returns>
         */
        protected override double CalculateMetric(Instance.Instance instance, string ci)
        {
            for (var i = 0; i < _classMeans.Size(); i++)
            {
                if (_classMeans.Get(i).GetClassLabel() == ci)
                {
                    return(-_distanceMetric.Distance(instance, _classMeans.Get(i)));
                }
            }

            return(double.MinValue);
        }
        /**
         * <summary> The testAutoEncoder method takes an {@link InstanceList} as an input and tries to predict a value and finds the difference with the
         * actual value for each item of that InstanceList. At the end, it returns an error rate by finding the mean of total errors.</summary>
         *
         * <param name="data">{@link InstanceList} to use as validation set.</param>
         * <returns>Error rate by finding the mean of total errors.</returns>
         */
        public Performance.Performance TestAutoEncoder(InstanceList.InstanceList data)
        {
            double total = data.Size();
            var    error = 0.0;

            for (var i = 0; i < total; i++)
            {
                y      = PredictInput(data.Get(i));
                r      = data.Get(i).ToVector();
                error += r.Difference(y).DotProduct();
            }

            return(new Performance.Performance(error / total));
        }
示例#6
0
        /**
         * <summary> The testClassifier method takes an {@link InstanceList} as an input and returns an accuracy value as {@link ClassificationPerformance}.</summary>
         *
         * <param name="data">{@link InstanceList} to test.</param>
         * <returns>Accuracy value as {@link ClassificationPerformance}.</returns>
         */
        public ClassificationPerformance TestClassifier(InstanceList.InstanceList data)
        {
            double total = data.Size();
            var    count = 0;

            for (var i = 0; i < data.Size(); i++)
            {
                if (data.Get(i).GetClassLabel() == Predict(data.Get(i)))
                {
                    count++;
                }
            }

            return(new ClassificationPerformance(count / total));
        }
        /**
         * <summary> The {@link AutoEncoderModel} method takes two {@link InstanceList}s as inputs; train set and validation set. First it allocates
         * the weights of W and V matrices using given {@link MultiLayerPerceptronParameter} and takes the clones of these matrices as the bestW and bestV.
         * Then, it gets the epoch and starts to iterate over them. First it shuffles the train set and tries to find the new W and V matrices.
         * At the end it tests the autoencoder with given validation set and if its performance is better than the previous one,
         * it reassigns the bestW and bestV matrices. Continue to iterate with a lower learning rate till the end of an episode.</summary>
         *
         * <param name="trainSet">     {@link InstanceList} to use as train set.</param>
         * <param name="validationSet">{@link InstanceList} to use as validation set.</param>
         * <param name="parameters">   {@link MultiLayerPerceptronParameter} is used to get the parameters.</param>
         */
        public AutoEncoderModel(InstanceList.InstanceList trainSet, InstanceList.InstanceList validationSet,
                                MultiLayerPerceptronParameter parameters) : base(trainSet)
        {
            K = trainSet.Get(0).ContinuousAttributeSize();
            AllocateWeights(parameters.GetHiddenNodes(), new Random(parameters.GetSeed()));
            var bestW           = (Matrix)_W.Clone();
            var bestV           = (Matrix)_V.Clone();
            var bestPerformance = new Performance.Performance(double.MaxValue);
            var epoch           = parameters.GetEpoch();
            var learningRate    = parameters.GetLearningRate();

            for (var i = 0; i < epoch; i++)
            {
                trainSet.Shuffle(parameters.GetSeed());
                for (var j = 0; j < trainSet.Size(); j++)
                {
                    CreateInputVector(trainSet.Get(j));
                    r = trainSet.Get(j).ToVector();
                    var hidden       = CalculateHidden(x, _W, ActivationFunction.SIGMOID);
                    var hiddenBiased = hidden.Biased();
                    y = _V.MultiplyWithVectorFromRight(hiddenBiased);
                    var rMinusY        = r.Difference(y);
                    var deltaV         = rMinusY.Multiply(hiddenBiased);
                    var oneMinusHidden = CalculateOneMinusHidden(hidden);
                    var tmph           = _V.MultiplyWithVectorFromLeft(rMinusY);
                    tmph.Remove(0);
                    var tmpHidden = oneMinusHidden.ElementProduct(hidden.ElementProduct(tmph));
                    var deltaW    = tmpHidden.Multiply(x);
                    deltaV.MultiplyWithConstant(learningRate);
                    _V.Add(deltaV);
                    deltaW.MultiplyWithConstant(learningRate);
                    _W.Add(deltaW);
                }

                var currentPerformance = TestAutoEncoder(validationSet);
                if (currentPerformance.GetErrorRate() < bestPerformance.GetErrorRate())
                {
                    bestPerformance = currentPerformance;
                    bestW           = (Matrix)_W.Clone();
                    bestV           = (Matrix)_V.Clone();
                }

                learningRate *= 0.95;
            }

            _W = bestW;
            _V = bestV;
        }
示例#8
0
        /**
         * <summary> TestClassification an instance list with the current model.</summary>
         *
         * <param name="testSet">Test data (list of instances) to be tested.</param>
         * <returns>The accuracy (and error) of the model as an instance of Performance class.</returns>
         */
        public virtual Performance.Performance Test(InstanceList.InstanceList testSet)
        {
            var classLabels = testSet.GetUnionOfPossibleClassLabels();
            var confusion   = new ConfusionMatrix(classLabels);

            for (var i = 0; i < testSet.Size(); i++)
            {
                var instance = testSet.Get(i);
                confusion.Classify(instance.GetClassLabel(), model.Predict(instance));
            }

            return(new DetailedClassificationPerformance(confusion));
        }
        /**
         * <summary> Constructor that takes two {@link InstanceList} train set and validation set and {@link DeepNetworkParameter} as inputs.
         * First it sets the class labels, their sizes as K and the size of the continuous attributes as d of given train set and
         * allocates weights and sets the best weights. At each epoch, it shuffles the train set and loops through the each item of that train set,
         * it multiplies the weights Matrix with input Vector than applies the sigmoid function and stores the result as hidden and add bias.
         * Then updates weights and at the end it compares the performance of these weights with validation set. It updates the bestClassificationPerformance and
         * bestWeights according to the current situation. At the end it updates the learning rate via etaDecrease value and finishes
         * with clearing the weights.</summary>
         *
         * <param name="trainSet">     {@link InstanceList} to be used as trainSet.</param>
         * <param name="validationSet">{@link InstanceList} to be used as validationSet.</param>
         * <param name="parameters">   {@link DeepNetworkParameter} input.</param>
         */
        public DeepNetworkModel(InstanceList.InstanceList trainSet, InstanceList.InstanceList validationSet,
                                DeepNetworkParameter parameters) : base(trainSet)
        {
            var deltaWeights = new List <Matrix>();
            var hidden       = new List <Vector>();
            var hiddenBiased = new List <Vector>();

            _activationFunction = parameters.GetActivationFunction();
            AllocateWeights(parameters);
            var    bestWeights = SetBestWeights();
            var    bestClassificationPerformance = new ClassificationPerformance(0.0);
            var    epoch        = parameters.GetEpoch();
            var    learningRate = parameters.GetLearningRate();
            Vector tmph;
            var    tmpHidden            = new Vector(1, 0.0);
            var    activationDerivative = new Vector(1, 0.0);

            for (var i = 0; i < epoch; i++)
            {
                trainSet.Shuffle(parameters.GetSeed());
                for (var j = 0; j < trainSet.Size(); j++)
                {
                    CreateInputVector(trainSet.Get(j));
                    hidden.Clear();
                    hiddenBiased.Clear();
                    deltaWeights.Clear();
                    for (var k = 0; k < _hiddenLayerSize; k++)
                    {
                        if (k == 0)
                        {
                            hidden.Add(CalculateHidden(x, _weights[k], _activationFunction));
                        }
                        else
                        {
                            hidden.Add(CalculateHidden(hiddenBiased[k - 1], _weights[k], _activationFunction));
                        }

                        hiddenBiased.Add(hidden[k].Biased());
                    }

                    var rMinusY = CalculateRMinusY(trainSet.Get(j), hiddenBiased[_hiddenLayerSize - 1],
                                                   _weights[_weights.Count - 1]);
                    deltaWeights.Insert(0, rMinusY.Multiply(hiddenBiased[_hiddenLayerSize - 1]));
                    for (var k = _weights.Count - 2; k >= 0; k--)
                    {
                        if (k == _weights.Count - 2)
                        {
                            tmph = _weights[k + 1].MultiplyWithVectorFromLeft(rMinusY);
                        }
                        else
                        {
                            tmph = _weights[k + 1].MultiplyWithVectorFromLeft(tmpHidden);
                        }
                        tmph.Remove(0);
                        switch (_activationFunction)
                        {
                        case ActivationFunction.SIGMOID:
                            var oneMinusHidden = CalculateOneMinusHidden(hidden[k]);
                            activationDerivative = oneMinusHidden.ElementProduct(hidden[k]);
                            break;

                        case ActivationFunction.TANH:
                            var one = new Vector(hidden.Count, 1.0);
                            hidden[k].Tanh();
                            activationDerivative = one.Difference(hidden[k].ElementProduct(hidden[k]));
                            break;

                        case ActivationFunction.RELU:
                            hidden[k].ReluDerivative();
                            activationDerivative = hidden[k];
                            break;
                        }
                        tmpHidden = tmph.ElementProduct(activationDerivative);
                        if (k == 0)
                        {
                            deltaWeights.Insert(0, tmpHidden.Multiply(x));
                        }
                        else
                        {
                            deltaWeights.Insert(0, tmpHidden.Multiply(hiddenBiased[k - 1]));
                        }
                    }

                    for (var k = 0; k < _weights.Count; k++)
                    {
                        deltaWeights[k].MultiplyWithConstant(learningRate);
                        _weights[k].Add(deltaWeights[k]);
                    }
                }

                var currentClassificationPerformance = TestClassifier(validationSet);
                if (currentClassificationPerformance.GetAccuracy() > bestClassificationPerformance.GetAccuracy())
                {
                    bestClassificationPerformance = currentClassificationPerformance;
                    bestWeights = SetBestWeights();
                }

                learningRate *= parameters.GetEtaDecrease();
            }

            _weights.Clear();
            foreach (var m in bestWeights)
            {
                _weights.Add(m);
            }
        }
        /**
         * <summary> The DecisionNode method takes {@link InstanceList} data as input and then it sets the class label parameter by finding
         * the most occurred class label of given data, it then gets distinct class labels as class labels List. Later, it adds ordered
         * indices to the indexList and shuffles them randomly. Then, it gets the class distribution of given data and finds the best entropy value
         * of these class distribution.
         * <p/>
         * If an attribute of given data is {@link DiscreteIndexedAttribute}, it creates a Distribution according to discrete indexed attribute class distribution
         * and finds the entropy. If it is better than the last best entropy it reassigns the best entropy, best attribute and best split value according to
         * the newly founded best entropy's index. At the end, it also add new distribution to the class distribution .
         * <p/>
         * If an attribute of given data is {@link DiscreteAttribute}, it directly finds the entropy. If it is better than the last best entropy it
         * reassigns the best entropy, best attribute and best split value according to the newly founded best entropy's index.
         * <p/>
         * If an attribute of given data is {@link ContinuousAttribute}, it creates two distributions; left and right according to class distribution
         * and discrete distribution respectively, and finds the entropy. If it is better than the last best entropy it reassigns the best entropy,
         * best attribute and best split value according to the newly founded best entropy's index. At the end, it also add new distribution to
         * the right distribution and removes from left distribution.</summary>
         *
         * <param name="data">     {@link InstanceList} input.</param>
         * <param name="condition">{@link DecisionCondition} to check.</param>
         * <param name="parameter">RandomForestParameter like seed, ensembleSize, attributeSubsetSize.</param>
         * <param name="isStump">  Refers to decision trees with only 1 splitting rule.</param>
         */
        public DecisionNode(InstanceList.InstanceList data, DecisionCondition condition,
                            RandomForestParameter parameter,
                            bool isStump)
        {
            int    bestAttribute = -1, size;
            double bestSplitValue = 0;

            this._condition = condition;
            this._data      = data;
            _classLabel     = Classifier.Classifier.GetMaximum(data.GetClassLabels());
            _leaf           = true;
            var classLabels = data.GetDistinctClassLabels();

            if (classLabels.Count == 1)
            {
                return;
            }

            if (isStump && condition != null)
            {
                return;
            }

            var indexList = new List <int>();

            for (var i = 0; i < data.Get(0).AttributeSize(); i++)
            {
                indexList.Add(i);
            }

            if (parameter != null && parameter.GetAttributeSubsetSize() < data.Get(0).AttributeSize())
            {
                size = parameter.GetAttributeSubsetSize();
            }
            else
            {
                size = data.Get(0).AttributeSize();
            }

            var classDistribution = data.ClassDistribution();
            var bestEntropy       = data.ClassDistribution().Entropy();

            for (var j = 0; j < size; j++)
            {
                var    index = indexList[j];
                double entropy;
                if (data.Get(0).GetAttribute(index) is DiscreteIndexedAttribute)
                {
                    for (var k = 0; k < ((DiscreteIndexedAttribute)data.Get(0).GetAttribute(index)).GetMaxIndex(); k++)
                    {
                        var distribution = data.DiscreteIndexedAttributeClassDistribution(index, k);
                        if (distribution.GetSum() > 0)
                        {
                            classDistribution.RemoveDistribution(distribution);
                            entropy = (classDistribution.Entropy() * classDistribution.GetSum() +
                                       distribution.Entropy() * distribution.GetSum()) / data.Size();
                            if (entropy < bestEntropy)
                            {
                                bestEntropy    = entropy;
                                bestAttribute  = index;
                                bestSplitValue = k;
                            }

                            classDistribution.AddDistribution(distribution);
                        }
                    }
                }
                else
                {
                    if (data.Get(0).GetAttribute(index) is DiscreteAttribute)
                    {
                        entropy = EntropyForDiscreteAttribute(index);
                        if (entropy < bestEntropy)
                        {
                            bestEntropy   = entropy;
                            bestAttribute = index;
                        }
                    }
                    else
                    {
                        if (data.Get(0).GetAttribute(index) is ContinuousAttribute)
                        {
                            data.Sort(index);
                            var previousValue     = double.MinValue;
                            var leftDistribution  = data.ClassDistribution();
                            var rightDistribution = new DiscreteDistribution();
                            for (var k = 0; k < data.Size(); k++)
                            {
                                var instance = data.Get(k);
                                if (k == 0)
                                {
                                    previousValue = ((ContinuousAttribute)instance.GetAttribute(index)).GetValue();
                                }
                                else
                                {
                                    if (((ContinuousAttribute)instance.GetAttribute(index)).GetValue() !=
                                        previousValue)
                                    {
                                        var splitValue =
                                            (previousValue + ((ContinuousAttribute)instance.GetAttribute(index))
                                             .GetValue()) / 2;
                                        previousValue = ((ContinuousAttribute)instance.GetAttribute(index)).GetValue();
                                        entropy       =
                                            (leftDistribution.GetSum() / data.Size()) * leftDistribution.Entropy() +
                                            (rightDistribution.GetSum() / data.Size()) * rightDistribution.Entropy();
                                        if (entropy < bestEntropy)
                                        {
                                            bestEntropy    = entropy;
                                            bestSplitValue = splitValue;
                                            bestAttribute  = index;
                                        }
                                    }
                                }

                                leftDistribution.RemoveItem(instance.GetClassLabel());
                                rightDistribution.AddItem(instance.GetClassLabel());
                            }
                        }
                    }
                }
            }

            if (bestAttribute != -1)
            {
                _leaf = false;
                if (data.Get(0).GetAttribute(bestAttribute) is DiscreteIndexedAttribute)
                {
                    CreateChildrenForDiscreteIndexed(bestAttribute, (int)bestSplitValue, parameter, isStump);
                }
                else
                {
                    if (data.Get(0).GetAttribute(bestAttribute) is DiscreteAttribute)
                    {
                        CreateChildrenForDiscrete(bestAttribute, parameter, isStump);
                    }
                    else
                    {
                        if (data.Get(0).GetAttribute(bestAttribute) is ContinuousAttribute)
                        {
                            CreateChildrenForContinuous(bestAttribute, bestSplitValue, parameter, isStump);
                        }
                    }
                }
            }
        }
        /**
         * <summary> The createChildrenForDiscreteIndexed method creates an List of DecisionNodes as children and a partition with respect to
         * indexed attribute.</summary>
         *
         * <param name="attributeIndex">Index of the attribute.</param>
         * <param name="attributeValue">Value of the attribute.</param>
         * <param name="parameter">     RandomForestParameter like seed, ensembleSize, attributeSubsetSize.</param>
         * <param name="isStump">       Refers to decision trees with only 1 splitting rule.</param>
         */
        private void CreateChildrenForDiscreteIndexed(int attributeIndex, int attributeValue,
                                                      RandomForestParameter parameter, bool isStump)
        {
            var childrenData = _data.DivideWithRespectToIndexedAttribute(attributeIndex, attributeValue);

            _children = new List <DecisionNode>
            {
                new DecisionNode(childrenData.Get(0),
                                 new DecisionCondition(attributeIndex,
                                                       new DiscreteIndexedAttribute("", attributeValue,
                                                                                    ((DiscreteIndexedAttribute)_data.Get(0).GetAttribute(attributeIndex)).GetMaxIndex())),
                                 parameter, isStump),
                new DecisionNode(childrenData.Get(1),
                                 new DecisionCondition(attributeIndex,
                                                       new DiscreteIndexedAttribute("", -1,
                                                                                    ((DiscreteIndexedAttribute)_data.Get(0).GetAttribute(attributeIndex)).GetMaxIndex())),
                                 parameter, isStump)
            };
        }
 /**
  * <summary> Constructor that sets the class labels, their sizes as K and the size of the continuous attributes as d.</summary>
  *
  * <param name="trainSet">{@link InstanceList} to use as train set.</param>
  */
 public NeuralNetworkModel(InstanceList.InstanceList trainSet)
 {
     classLabels = trainSet.GetDistinctClassLabels();
     K           = classLabels.Count;
     d           = trainSet.Get(0).ContinuousAttributeSize();
 }
        /**
         * <summary> A constructor that takes {@link InstanceList}s as trainsSet and validationSet. It  sets the {@link NeuralNetworkModel}
         * nodes with given {@link InstanceList} then creates an input vector by using given trainSet and finds error.
         * Via the validationSet it finds the classification performance and reassigns the allocated weight Matrix with the matrix
         * that has the best accuracy and the Matrix V with the best Vector input.</summary>
         *
         * <param name="trainSet">     InstanceList that is used to train.</param>
         * <param name="validationSet">InstanceList that is used to validate.</param>
         * <param name="parameters">   Multi layer perceptron parameters; seed, learningRate, etaDecrease, crossValidationRatio, epoch, hiddenNodes.</param>
         */
        public MultiLayerPerceptronModel(InstanceList.InstanceList trainSet, InstanceList.InstanceList validationSet,
                                         MultiLayerPerceptronParameter parameters) : base(trainSet)
        {
            _activationFunction = parameters.GetActivationFunction();
            AllocateWeights(parameters.GetHiddenNodes(), new Random(parameters.GetSeed()));
            var bestW = (Matrix)W.Clone();
            var bestV = (Matrix)_V.Clone();
            var bestClassificationPerformance = new ClassificationPerformance(0.0);
            var epoch                = parameters.GetEpoch();
            var learningRate         = parameters.GetLearningRate();
            var activationDerivative = new Vector(1, 0.0);

            for (var i = 0; i < epoch; i++)
            {
                trainSet.Shuffle(parameters.GetSeed());
                for (var j = 0; j < trainSet.Size(); j++)
                {
                    CreateInputVector(trainSet.Get(j));
                    var hidden       = CalculateHidden(x, W, _activationFunction);
                    var hiddenBiased = hidden.Biased();
                    var rMinusY      = CalculateRMinusY(trainSet.Get(j), hiddenBiased, _V);
                    var deltaV       = rMinusY.Multiply(hiddenBiased);
                    var tmph         = _V.MultiplyWithVectorFromLeft(rMinusY);
                    tmph.Remove(0);
                    switch (_activationFunction)
                    {
                    case ActivationFunction.SIGMOID:
                        var oneMinusHidden = CalculateOneMinusHidden(hidden);
                        activationDerivative = oneMinusHidden.ElementProduct(hidden);
                        break;

                    case ActivationFunction.TANH:
                        var one = new Vector(hidden.Size(), 1.0);
                        hidden.Tanh();
                        activationDerivative = one.Difference(hidden.ElementProduct(hidden));
                        break;

                    case ActivationFunction.RELU:
                        hidden.ReluDerivative();
                        activationDerivative = hidden;
                        break;
                    }
                    var tmpHidden = tmph.ElementProduct(activationDerivative);
                    var deltaW    = tmpHidden.Multiply(x);
                    deltaV.MultiplyWithConstant(learningRate);
                    _V.Add(deltaV);
                    deltaW.MultiplyWithConstant(learningRate);
                    W.Add(deltaW);
                }

                var currentClassificationPerformance = TestClassifier(validationSet);
                if (currentClassificationPerformance.GetAccuracy() > bestClassificationPerformance.GetAccuracy())
                {
                    bestClassificationPerformance = currentClassificationPerformance;
                    bestW = (Matrix)W.Clone();
                    bestV = (Matrix)_V.Clone();
                }

                learningRate *= parameters.GetEtaDecrease();
            }

            W  = bestW;
            _V = bestV;
        }