/** * <summary> Constructor that takes {@link InstanceList}s as trainsSet and validationSet. Initially it allocates layer weights, * then creates an input vector by using given trainSet and finds error. Via the validationSet it finds the classification * performance and at the end it reassigns the allocated weight Matrix with the matrix that has the best accuracy.</summary> * * <param name="trainSet"> InstanceList that is used to train.</param> * <param name="validationSet">InstanceList that is used to validate.</param> * <param name="parameters"> Linear perceptron parameters; learningRate, etaDecrease, crossValidationRatio, epoch.</param> */ public LinearPerceptronModel(InstanceList.InstanceList trainSet, InstanceList.InstanceList validationSet, LinearPerceptronParameter parameters) : base(trainSet) { W = AllocateLayerWeights(K, d + 1, new Random(parameters.GetSeed())); var bestW = (Matrix)W.Clone(); var bestClassificationPerformance = new ClassificationPerformance(0.0); var epoch = parameters.GetEpoch(); var learningRate = parameters.GetLearningRate(); for (var i = 0; i < epoch; i++) { trainSet.Shuffle(parameters.GetSeed()); for (var j = 0; j < trainSet.Size(); j++) { CreateInputVector(trainSet.Get(j)); var rMinusY = CalculateRMinusY(trainSet.Get(j), x, W); var deltaW = rMinusY.Multiply(x); deltaW.MultiplyWithConstant(learningRate); W.Add(deltaW); } var currentClassificationPerformance = TestClassifier(validationSet); if (currentClassificationPerformance.GetAccuracy() > bestClassificationPerformance.GetAccuracy()) { bestClassificationPerformance = currentClassificationPerformance; bestW = (Matrix)W.Clone(); } learningRate *= parameters.GetEtaDecrease(); } W = bestW; }
/** * <summary> Constructor that takes two {@link InstanceList} train set and validation set and {@link DeepNetworkParameter} as inputs. * First it sets the class labels, their sizes as K and the size of the continuous attributes as d of given train set and * allocates weights and sets the best weights. At each epoch, it shuffles the train set and loops through the each item of that train set, * it multiplies the weights Matrix with input Vector than applies the sigmoid function and stores the result as hidden and add bias. * Then updates weights and at the end it compares the performance of these weights with validation set. It updates the bestClassificationPerformance and * bestWeights according to the current situation. At the end it updates the learning rate via etaDecrease value and finishes * with clearing the weights.</summary> * * <param name="trainSet"> {@link InstanceList} to be used as trainSet.</param> * <param name="validationSet">{@link InstanceList} to be used as validationSet.</param> * <param name="parameters"> {@link DeepNetworkParameter} input.</param> */ public DeepNetworkModel(InstanceList.InstanceList trainSet, InstanceList.InstanceList validationSet, DeepNetworkParameter parameters) : base(trainSet) { var deltaWeights = new List <Matrix>(); var hidden = new List <Vector>(); var hiddenBiased = new List <Vector>(); _activationFunction = parameters.GetActivationFunction(); AllocateWeights(parameters); var bestWeights = SetBestWeights(); var bestClassificationPerformance = new ClassificationPerformance(0.0); var epoch = parameters.GetEpoch(); var learningRate = parameters.GetLearningRate(); Vector tmph; var tmpHidden = new Vector(1, 0.0); var activationDerivative = new Vector(1, 0.0); for (var i = 0; i < epoch; i++) { trainSet.Shuffle(parameters.GetSeed()); for (var j = 0; j < trainSet.Size(); j++) { CreateInputVector(trainSet.Get(j)); hidden.Clear(); hiddenBiased.Clear(); deltaWeights.Clear(); for (var k = 0; k < _hiddenLayerSize; k++) { if (k == 0) { hidden.Add(CalculateHidden(x, _weights[k], _activationFunction)); } else { hidden.Add(CalculateHidden(hiddenBiased[k - 1], _weights[k], _activationFunction)); } hiddenBiased.Add(hidden[k].Biased()); } var rMinusY = CalculateRMinusY(trainSet.Get(j), hiddenBiased[_hiddenLayerSize - 1], _weights[_weights.Count - 1]); deltaWeights.Insert(0, rMinusY.Multiply(hiddenBiased[_hiddenLayerSize - 1])); for (var k = _weights.Count - 2; k >= 0; k--) { if (k == _weights.Count - 2) { tmph = _weights[k + 1].MultiplyWithVectorFromLeft(rMinusY); } else { tmph = _weights[k + 1].MultiplyWithVectorFromLeft(tmpHidden); } tmph.Remove(0); switch (_activationFunction) { case ActivationFunction.SIGMOID: var oneMinusHidden = CalculateOneMinusHidden(hidden[k]); activationDerivative = oneMinusHidden.ElementProduct(hidden[k]); break; case ActivationFunction.TANH: var one = new Vector(hidden.Count, 1.0); hidden[k].Tanh(); activationDerivative = one.Difference(hidden[k].ElementProduct(hidden[k])); break; case ActivationFunction.RELU: hidden[k].ReluDerivative(); activationDerivative = hidden[k]; break; } tmpHidden = tmph.ElementProduct(activationDerivative); if (k == 0) { deltaWeights.Insert(0, tmpHidden.Multiply(x)); } else { deltaWeights.Insert(0, tmpHidden.Multiply(hiddenBiased[k - 1])); } } for (var k = 0; k < _weights.Count; k++) { deltaWeights[k].MultiplyWithConstant(learningRate); _weights[k].Add(deltaWeights[k]); } } var currentClassificationPerformance = TestClassifier(validationSet); if (currentClassificationPerformance.GetAccuracy() > bestClassificationPerformance.GetAccuracy()) { bestClassificationPerformance = currentClassificationPerformance; bestWeights = SetBestWeights(); } learningRate *= parameters.GetEtaDecrease(); } _weights.Clear(); foreach (var m in bestWeights) { _weights.Add(m); } }
/** * <summary> A constructor that takes {@link InstanceList}s as trainsSet and validationSet. It sets the {@link NeuralNetworkModel} * nodes with given {@link InstanceList} then creates an input vector by using given trainSet and finds error. * Via the validationSet it finds the classification performance and reassigns the allocated weight Matrix with the matrix * that has the best accuracy and the Matrix V with the best Vector input.</summary> * * <param name="trainSet"> InstanceList that is used to train.</param> * <param name="validationSet">InstanceList that is used to validate.</param> * <param name="parameters"> Multi layer perceptron parameters; seed, learningRate, etaDecrease, crossValidationRatio, epoch, hiddenNodes.</param> */ public MultiLayerPerceptronModel(InstanceList.InstanceList trainSet, InstanceList.InstanceList validationSet, MultiLayerPerceptronParameter parameters) : base(trainSet) { _activationFunction = parameters.GetActivationFunction(); AllocateWeights(parameters.GetHiddenNodes(), new Random(parameters.GetSeed())); var bestW = (Matrix)W.Clone(); var bestV = (Matrix)_V.Clone(); var bestClassificationPerformance = new ClassificationPerformance(0.0); var epoch = parameters.GetEpoch(); var learningRate = parameters.GetLearningRate(); var activationDerivative = new Vector(1, 0.0); for (var i = 0; i < epoch; i++) { trainSet.Shuffle(parameters.GetSeed()); for (var j = 0; j < trainSet.Size(); j++) { CreateInputVector(trainSet.Get(j)); var hidden = CalculateHidden(x, W, _activationFunction); var hiddenBiased = hidden.Biased(); var rMinusY = CalculateRMinusY(trainSet.Get(j), hiddenBiased, _V); var deltaV = rMinusY.Multiply(hiddenBiased); var tmph = _V.MultiplyWithVectorFromLeft(rMinusY); tmph.Remove(0); switch (_activationFunction) { case ActivationFunction.SIGMOID: var oneMinusHidden = CalculateOneMinusHidden(hidden); activationDerivative = oneMinusHidden.ElementProduct(hidden); break; case ActivationFunction.TANH: var one = new Vector(hidden.Size(), 1.0); hidden.Tanh(); activationDerivative = one.Difference(hidden.ElementProduct(hidden)); break; case ActivationFunction.RELU: hidden.ReluDerivative(); activationDerivative = hidden; break; } var tmpHidden = tmph.ElementProduct(activationDerivative); var deltaW = tmpHidden.Multiply(x); deltaV.MultiplyWithConstant(learningRate); _V.Add(deltaV); deltaW.MultiplyWithConstant(learningRate); W.Add(deltaW); } var currentClassificationPerformance = TestClassifier(validationSet); if (currentClassificationPerformance.GetAccuracy() > bestClassificationPerformance.GetAccuracy()) { bestClassificationPerformance = currentClassificationPerformance; bestW = (Matrix)W.Clone(); bestV = (Matrix)_V.Clone(); } learningRate *= parameters.GetEtaDecrease(); } W = bestW; _V = bestV; }