/** * <summary> Training algorithm for the linear discriminant analysis classifier (Introduction to Machine Learning, Alpaydin, 2015).</summary> * * <param name="trainSet"> Training data given to the algorithm.</param> * <param name="parameters">-</param> */ public override void Train(InstanceList.InstanceList trainSet, Parameter.Parameter parameters) { Vector averageVector; var w0 = new Dictionary <string, double>(); var w = new Dictionary <string, Vector>(); var priorDistribution = trainSet.ClassDistribution(); var classLists = trainSet.DivideIntoClasses(); var covariance = new Matrix(trainSet.Get(0).ContinuousAttributeSize(), trainSet.Get(0).ContinuousAttributeSize()); for (var i = 0; i < classLists.Size(); i++) { averageVector = new Vector(classLists.Get(i).ContinuousAttributeAverage()); var classCovariance = classLists.Get(i).Covariance(averageVector); classCovariance.MultiplyWithConstant(classLists.Get(i).Size() - 1); covariance.Add(classCovariance); } covariance.DivideByConstant(trainSet.Size() - classLists.Size()); covariance.Inverse(); for (var i = 0; i < classLists.Size(); i++) { var ci = ((InstanceListOfSameClass)classLists.Get(i)).GetClassLabel(); averageVector = new Vector(classLists.Get(i).ContinuousAttributeAverage()); var wi = covariance.MultiplyWithVectorFromRight(averageVector); w[ci] = wi; var w0i = -0.5 * wi.DotProduct(averageVector) + System.Math.Log(priorDistribution.GetProbability(ci)); w0[ci] = w0i; } model = new LdaModel(priorDistribution, w, w0); }
/** * <summary> Constructor that takes {@link InstanceList}s as trainsSet and validationSet. Initially it allocates layer weights, * then creates an input vector by using given trainSet and finds error. Via the validationSet it finds the classification * performance and at the end it reassigns the allocated weight Matrix with the matrix that has the best accuracy.</summary> * * <param name="trainSet"> InstanceList that is used to train.</param> * <param name="validationSet">InstanceList that is used to validate.</param> * <param name="parameters"> Linear perceptron parameters; learningRate, etaDecrease, crossValidationRatio, epoch.</param> */ public LinearPerceptronModel(InstanceList.InstanceList trainSet, InstanceList.InstanceList validationSet, LinearPerceptronParameter parameters) : base(trainSet) { W = AllocateLayerWeights(K, d + 1, new Random(parameters.GetSeed())); var bestW = (Matrix)W.Clone(); var bestClassificationPerformance = new ClassificationPerformance(0.0); var epoch = parameters.GetEpoch(); var learningRate = parameters.GetLearningRate(); for (var i = 0; i < epoch; i++) { trainSet.Shuffle(parameters.GetSeed()); for (var j = 0; j < trainSet.Size(); j++) { CreateInputVector(trainSet.Get(j)); var rMinusY = CalculateRMinusY(trainSet.Get(j), x, W); var deltaW = rMinusY.Multiply(x); deltaW.MultiplyWithConstant(learningRate); W.Add(deltaW); } var currentClassificationPerformance = TestClassifier(validationSet); if (currentClassificationPerformance.GetAccuracy() > bestClassificationPerformance.GetAccuracy()) { bestClassificationPerformance = currentClassificationPerformance; bestW = (Matrix)W.Clone(); } learningRate *= parameters.GetEtaDecrease(); } W = bestW; }
/** * <summary>Return a subset generated via the given {@link FeatureSubSet}.</summary> * * <param name="featureSubSet">{@link FeatureSubSet} input.</param> * <returns>Subset generated via the given {@link FeatureSubSet}.</returns> */ public DataSet GetSubSetOfFeatures(FeatureSubSet featureSubSet) { var result = new DataSet(_definition.GetSubSetOfFeatures(featureSubSet)); for (var i = 0; i < _instances.Size(); i++) { result.AddInstance(_instances.Get(i).GetSubSetOfFeatures(featureSubSet)); } return(result); }
/** * <summary> The calculateMetric method takes an {@link Instance} and a String as inputs. It loops through the class means, if * the corresponding class label is same as the given String it returns the negated distance between given instance and the * current item of class means. Otherwise it returns the smallest negative number.</summary> * * <param name="instance">{@link Instance} input.</param> * <param name="Ci"> String input.</param> * <returns>The negated distance between given instance and the current item of class means.</returns> */ protected override double CalculateMetric(Instance.Instance instance, string ci) { for (var i = 0; i < _classMeans.Size(); i++) { if (_classMeans.Get(i).GetClassLabel() == ci) { return(-_distanceMetric.Distance(instance, _classMeans.Get(i))); } } return(double.MinValue); }
/** * <summary> The testAutoEncoder method takes an {@link InstanceList} as an input and tries to predict a value and finds the difference with the * actual value for each item of that InstanceList. At the end, it returns an error rate by finding the mean of total errors.</summary> * * <param name="data">{@link InstanceList} to use as validation set.</param> * <returns>Error rate by finding the mean of total errors.</returns> */ public Performance.Performance TestAutoEncoder(InstanceList.InstanceList data) { double total = data.Size(); var error = 0.0; for (var i = 0; i < total; i++) { y = PredictInput(data.Get(i)); r = data.Get(i).ToVector(); error += r.Difference(y).DotProduct(); } return(new Performance.Performance(error / total)); }
/** * <summary> The testClassifier method takes an {@link InstanceList} as an input and returns an accuracy value as {@link ClassificationPerformance}.</summary> * * <param name="data">{@link InstanceList} to test.</param> * <returns>Accuracy value as {@link ClassificationPerformance}.</returns> */ public ClassificationPerformance TestClassifier(InstanceList.InstanceList data) { double total = data.Size(); var count = 0; for (var i = 0; i < data.Size(); i++) { if (data.Get(i).GetClassLabel() == Predict(data.Get(i))) { count++; } } return(new ClassificationPerformance(count / total)); }
/** * <summary> The {@link AutoEncoderModel} method takes two {@link InstanceList}s as inputs; train set and validation set. First it allocates * the weights of W and V matrices using given {@link MultiLayerPerceptronParameter} and takes the clones of these matrices as the bestW and bestV. * Then, it gets the epoch and starts to iterate over them. First it shuffles the train set and tries to find the new W and V matrices. * At the end it tests the autoencoder with given validation set and if its performance is better than the previous one, * it reassigns the bestW and bestV matrices. Continue to iterate with a lower learning rate till the end of an episode.</summary> * * <param name="trainSet"> {@link InstanceList} to use as train set.</param> * <param name="validationSet">{@link InstanceList} to use as validation set.</param> * <param name="parameters"> {@link MultiLayerPerceptronParameter} is used to get the parameters.</param> */ public AutoEncoderModel(InstanceList.InstanceList trainSet, InstanceList.InstanceList validationSet, MultiLayerPerceptronParameter parameters) : base(trainSet) { K = trainSet.Get(0).ContinuousAttributeSize(); AllocateWeights(parameters.GetHiddenNodes(), new Random(parameters.GetSeed())); var bestW = (Matrix)_W.Clone(); var bestV = (Matrix)_V.Clone(); var bestPerformance = new Performance.Performance(double.MaxValue); var epoch = parameters.GetEpoch(); var learningRate = parameters.GetLearningRate(); for (var i = 0; i < epoch; i++) { trainSet.Shuffle(parameters.GetSeed()); for (var j = 0; j < trainSet.Size(); j++) { CreateInputVector(trainSet.Get(j)); r = trainSet.Get(j).ToVector(); var hidden = CalculateHidden(x, _W, ActivationFunction.SIGMOID); var hiddenBiased = hidden.Biased(); y = _V.MultiplyWithVectorFromRight(hiddenBiased); var rMinusY = r.Difference(y); var deltaV = rMinusY.Multiply(hiddenBiased); var oneMinusHidden = CalculateOneMinusHidden(hidden); var tmph = _V.MultiplyWithVectorFromLeft(rMinusY); tmph.Remove(0); var tmpHidden = oneMinusHidden.ElementProduct(hidden.ElementProduct(tmph)); var deltaW = tmpHidden.Multiply(x); deltaV.MultiplyWithConstant(learningRate); _V.Add(deltaV); deltaW.MultiplyWithConstant(learningRate); _W.Add(deltaW); } var currentPerformance = TestAutoEncoder(validationSet); if (currentPerformance.GetErrorRate() < bestPerformance.GetErrorRate()) { bestPerformance = currentPerformance; bestW = (Matrix)_W.Clone(); bestV = (Matrix)_V.Clone(); } learningRate *= 0.95; } _W = bestW; _V = bestV; }
/** * <summary> TestClassification an instance list with the current model.</summary> * * <param name="testSet">Test data (list of instances) to be tested.</param> * <returns>The accuracy (and error) of the model as an instance of Performance class.</returns> */ public virtual Performance.Performance Test(InstanceList.InstanceList testSet) { var classLabels = testSet.GetUnionOfPossibleClassLabels(); var confusion = new ConfusionMatrix(classLabels); for (var i = 0; i < testSet.Size(); i++) { var instance = testSet.Get(i); confusion.Classify(instance.GetClassLabel(), model.Predict(instance)); } return(new DetailedClassificationPerformance(confusion)); }
/** * <summary> Constructor that takes two {@link InstanceList} train set and validation set and {@link DeepNetworkParameter} as inputs. * First it sets the class labels, their sizes as K and the size of the continuous attributes as d of given train set and * allocates weights and sets the best weights. At each epoch, it shuffles the train set and loops through the each item of that train set, * it multiplies the weights Matrix with input Vector than applies the sigmoid function and stores the result as hidden and add bias. * Then updates weights and at the end it compares the performance of these weights with validation set. It updates the bestClassificationPerformance and * bestWeights according to the current situation. At the end it updates the learning rate via etaDecrease value and finishes * with clearing the weights.</summary> * * <param name="trainSet"> {@link InstanceList} to be used as trainSet.</param> * <param name="validationSet">{@link InstanceList} to be used as validationSet.</param> * <param name="parameters"> {@link DeepNetworkParameter} input.</param> */ public DeepNetworkModel(InstanceList.InstanceList trainSet, InstanceList.InstanceList validationSet, DeepNetworkParameter parameters) : base(trainSet) { var deltaWeights = new List <Matrix>(); var hidden = new List <Vector>(); var hiddenBiased = new List <Vector>(); _activationFunction = parameters.GetActivationFunction(); AllocateWeights(parameters); var bestWeights = SetBestWeights(); var bestClassificationPerformance = new ClassificationPerformance(0.0); var epoch = parameters.GetEpoch(); var learningRate = parameters.GetLearningRate(); Vector tmph; var tmpHidden = new Vector(1, 0.0); var activationDerivative = new Vector(1, 0.0); for (var i = 0; i < epoch; i++) { trainSet.Shuffle(parameters.GetSeed()); for (var j = 0; j < trainSet.Size(); j++) { CreateInputVector(trainSet.Get(j)); hidden.Clear(); hiddenBiased.Clear(); deltaWeights.Clear(); for (var k = 0; k < _hiddenLayerSize; k++) { if (k == 0) { hidden.Add(CalculateHidden(x, _weights[k], _activationFunction)); } else { hidden.Add(CalculateHidden(hiddenBiased[k - 1], _weights[k], _activationFunction)); } hiddenBiased.Add(hidden[k].Biased()); } var rMinusY = CalculateRMinusY(trainSet.Get(j), hiddenBiased[_hiddenLayerSize - 1], _weights[_weights.Count - 1]); deltaWeights.Insert(0, rMinusY.Multiply(hiddenBiased[_hiddenLayerSize - 1])); for (var k = _weights.Count - 2; k >= 0; k--) { if (k == _weights.Count - 2) { tmph = _weights[k + 1].MultiplyWithVectorFromLeft(rMinusY); } else { tmph = _weights[k + 1].MultiplyWithVectorFromLeft(tmpHidden); } tmph.Remove(0); switch (_activationFunction) { case ActivationFunction.SIGMOID: var oneMinusHidden = CalculateOneMinusHidden(hidden[k]); activationDerivative = oneMinusHidden.ElementProduct(hidden[k]); break; case ActivationFunction.TANH: var one = new Vector(hidden.Count, 1.0); hidden[k].Tanh(); activationDerivative = one.Difference(hidden[k].ElementProduct(hidden[k])); break; case ActivationFunction.RELU: hidden[k].ReluDerivative(); activationDerivative = hidden[k]; break; } tmpHidden = tmph.ElementProduct(activationDerivative); if (k == 0) { deltaWeights.Insert(0, tmpHidden.Multiply(x)); } else { deltaWeights.Insert(0, tmpHidden.Multiply(hiddenBiased[k - 1])); } } for (var k = 0; k < _weights.Count; k++) { deltaWeights[k].MultiplyWithConstant(learningRate); _weights[k].Add(deltaWeights[k]); } } var currentClassificationPerformance = TestClassifier(validationSet); if (currentClassificationPerformance.GetAccuracy() > bestClassificationPerformance.GetAccuracy()) { bestClassificationPerformance = currentClassificationPerformance; bestWeights = SetBestWeights(); } learningRate *= parameters.GetEtaDecrease(); } _weights.Clear(); foreach (var m in bestWeights) { _weights.Add(m); } }
/** * <summary> The DecisionNode method takes {@link InstanceList} data as input and then it sets the class label parameter by finding * the most occurred class label of given data, it then gets distinct class labels as class labels List. Later, it adds ordered * indices to the indexList and shuffles them randomly. Then, it gets the class distribution of given data and finds the best entropy value * of these class distribution. * <p/> * If an attribute of given data is {@link DiscreteIndexedAttribute}, it creates a Distribution according to discrete indexed attribute class distribution * and finds the entropy. If it is better than the last best entropy it reassigns the best entropy, best attribute and best split value according to * the newly founded best entropy's index. At the end, it also add new distribution to the class distribution . * <p/> * If an attribute of given data is {@link DiscreteAttribute}, it directly finds the entropy. If it is better than the last best entropy it * reassigns the best entropy, best attribute and best split value according to the newly founded best entropy's index. * <p/> * If an attribute of given data is {@link ContinuousAttribute}, it creates two distributions; left and right according to class distribution * and discrete distribution respectively, and finds the entropy. If it is better than the last best entropy it reassigns the best entropy, * best attribute and best split value according to the newly founded best entropy's index. At the end, it also add new distribution to * the right distribution and removes from left distribution.</summary> * * <param name="data"> {@link InstanceList} input.</param> * <param name="condition">{@link DecisionCondition} to check.</param> * <param name="parameter">RandomForestParameter like seed, ensembleSize, attributeSubsetSize.</param> * <param name="isStump"> Refers to decision trees with only 1 splitting rule.</param> */ public DecisionNode(InstanceList.InstanceList data, DecisionCondition condition, RandomForestParameter parameter, bool isStump) { int bestAttribute = -1, size; double bestSplitValue = 0; this._condition = condition; this._data = data; _classLabel = Classifier.Classifier.GetMaximum(data.GetClassLabels()); _leaf = true; var classLabels = data.GetDistinctClassLabels(); if (classLabels.Count == 1) { return; } if (isStump && condition != null) { return; } var indexList = new List <int>(); for (var i = 0; i < data.Get(0).AttributeSize(); i++) { indexList.Add(i); } if (parameter != null && parameter.GetAttributeSubsetSize() < data.Get(0).AttributeSize()) { size = parameter.GetAttributeSubsetSize(); } else { size = data.Get(0).AttributeSize(); } var classDistribution = data.ClassDistribution(); var bestEntropy = data.ClassDistribution().Entropy(); for (var j = 0; j < size; j++) { var index = indexList[j]; double entropy; if (data.Get(0).GetAttribute(index) is DiscreteIndexedAttribute) { for (var k = 0; k < ((DiscreteIndexedAttribute)data.Get(0).GetAttribute(index)).GetMaxIndex(); k++) { var distribution = data.DiscreteIndexedAttributeClassDistribution(index, k); if (distribution.GetSum() > 0) { classDistribution.RemoveDistribution(distribution); entropy = (classDistribution.Entropy() * classDistribution.GetSum() + distribution.Entropy() * distribution.GetSum()) / data.Size(); if (entropy < bestEntropy) { bestEntropy = entropy; bestAttribute = index; bestSplitValue = k; } classDistribution.AddDistribution(distribution); } } } else { if (data.Get(0).GetAttribute(index) is DiscreteAttribute) { entropy = EntropyForDiscreteAttribute(index); if (entropy < bestEntropy) { bestEntropy = entropy; bestAttribute = index; } } else { if (data.Get(0).GetAttribute(index) is ContinuousAttribute) { data.Sort(index); var previousValue = double.MinValue; var leftDistribution = data.ClassDistribution(); var rightDistribution = new DiscreteDistribution(); for (var k = 0; k < data.Size(); k++) { var instance = data.Get(k); if (k == 0) { previousValue = ((ContinuousAttribute)instance.GetAttribute(index)).GetValue(); } else { if (((ContinuousAttribute)instance.GetAttribute(index)).GetValue() != previousValue) { var splitValue = (previousValue + ((ContinuousAttribute)instance.GetAttribute(index)) .GetValue()) / 2; previousValue = ((ContinuousAttribute)instance.GetAttribute(index)).GetValue(); entropy = (leftDistribution.GetSum() / data.Size()) * leftDistribution.Entropy() + (rightDistribution.GetSum() / data.Size()) * rightDistribution.Entropy(); if (entropy < bestEntropy) { bestEntropy = entropy; bestSplitValue = splitValue; bestAttribute = index; } } } leftDistribution.RemoveItem(instance.GetClassLabel()); rightDistribution.AddItem(instance.GetClassLabel()); } } } } } if (bestAttribute != -1) { _leaf = false; if (data.Get(0).GetAttribute(bestAttribute) is DiscreteIndexedAttribute) { CreateChildrenForDiscreteIndexed(bestAttribute, (int)bestSplitValue, parameter, isStump); } else { if (data.Get(0).GetAttribute(bestAttribute) is DiscreteAttribute) { CreateChildrenForDiscrete(bestAttribute, parameter, isStump); } else { if (data.Get(0).GetAttribute(bestAttribute) is ContinuousAttribute) { CreateChildrenForContinuous(bestAttribute, bestSplitValue, parameter, isStump); } } } } }
/** * <summary> The createChildrenForDiscreteIndexed method creates an List of DecisionNodes as children and a partition with respect to * indexed attribute.</summary> * * <param name="attributeIndex">Index of the attribute.</param> * <param name="attributeValue">Value of the attribute.</param> * <param name="parameter"> RandomForestParameter like seed, ensembleSize, attributeSubsetSize.</param> * <param name="isStump"> Refers to decision trees with only 1 splitting rule.</param> */ private void CreateChildrenForDiscreteIndexed(int attributeIndex, int attributeValue, RandomForestParameter parameter, bool isStump) { var childrenData = _data.DivideWithRespectToIndexedAttribute(attributeIndex, attributeValue); _children = new List <DecisionNode> { new DecisionNode(childrenData.Get(0), new DecisionCondition(attributeIndex, new DiscreteIndexedAttribute("", attributeValue, ((DiscreteIndexedAttribute)_data.Get(0).GetAttribute(attributeIndex)).GetMaxIndex())), parameter, isStump), new DecisionNode(childrenData.Get(1), new DecisionCondition(attributeIndex, new DiscreteIndexedAttribute("", -1, ((DiscreteIndexedAttribute)_data.Get(0).GetAttribute(attributeIndex)).GetMaxIndex())), parameter, isStump) }; }
/** * <summary> Constructor that sets the class labels, their sizes as K and the size of the continuous attributes as d.</summary> * * <param name="trainSet">{@link InstanceList} to use as train set.</param> */ public NeuralNetworkModel(InstanceList.InstanceList trainSet) { classLabels = trainSet.GetDistinctClassLabels(); K = classLabels.Count; d = trainSet.Get(0).ContinuousAttributeSize(); }
/** * <summary> A constructor that takes {@link InstanceList}s as trainsSet and validationSet. It sets the {@link NeuralNetworkModel} * nodes with given {@link InstanceList} then creates an input vector by using given trainSet and finds error. * Via the validationSet it finds the classification performance and reassigns the allocated weight Matrix with the matrix * that has the best accuracy and the Matrix V with the best Vector input.</summary> * * <param name="trainSet"> InstanceList that is used to train.</param> * <param name="validationSet">InstanceList that is used to validate.</param> * <param name="parameters"> Multi layer perceptron parameters; seed, learningRate, etaDecrease, crossValidationRatio, epoch, hiddenNodes.</param> */ public MultiLayerPerceptronModel(InstanceList.InstanceList trainSet, InstanceList.InstanceList validationSet, MultiLayerPerceptronParameter parameters) : base(trainSet) { _activationFunction = parameters.GetActivationFunction(); AllocateWeights(parameters.GetHiddenNodes(), new Random(parameters.GetSeed())); var bestW = (Matrix)W.Clone(); var bestV = (Matrix)_V.Clone(); var bestClassificationPerformance = new ClassificationPerformance(0.0); var epoch = parameters.GetEpoch(); var learningRate = parameters.GetLearningRate(); var activationDerivative = new Vector(1, 0.0); for (var i = 0; i < epoch; i++) { trainSet.Shuffle(parameters.GetSeed()); for (var j = 0; j < trainSet.Size(); j++) { CreateInputVector(trainSet.Get(j)); var hidden = CalculateHidden(x, W, _activationFunction); var hiddenBiased = hidden.Biased(); var rMinusY = CalculateRMinusY(trainSet.Get(j), hiddenBiased, _V); var deltaV = rMinusY.Multiply(hiddenBiased); var tmph = _V.MultiplyWithVectorFromLeft(rMinusY); tmph.Remove(0); switch (_activationFunction) { case ActivationFunction.SIGMOID: var oneMinusHidden = CalculateOneMinusHidden(hidden); activationDerivative = oneMinusHidden.ElementProduct(hidden); break; case ActivationFunction.TANH: var one = new Vector(hidden.Size(), 1.0); hidden.Tanh(); activationDerivative = one.Difference(hidden.ElementProduct(hidden)); break; case ActivationFunction.RELU: hidden.ReluDerivative(); activationDerivative = hidden; break; } var tmpHidden = tmph.ElementProduct(activationDerivative); var deltaW = tmpHidden.Multiply(x); deltaV.MultiplyWithConstant(learningRate); _V.Add(deltaV); deltaW.MultiplyWithConstant(learningRate); W.Add(deltaW); } var currentClassificationPerformance = TestClassifier(validationSet); if (currentClassificationPerformance.GetAccuracy() > bestClassificationPerformance.GetAccuracy()) { bestClassificationPerformance = currentClassificationPerformance; bestW = (Matrix)W.Clone(); bestV = (Matrix)_V.Clone(); } learningRate *= parameters.GetEtaDecrease(); } W = bestW; _V = bestV; }