public void Train2(DataSet.DataSet dataSet, int epochs) { Console.WriteLine("Initial Loss:" + CalculateMeanErrorOverDataSet(dataSet)); for (int i = 0; i < epochs; i++) { dataSet.Shuffle(); List <DataRow> batch = dataSet.NextBatch(this.Batching); int count = 0; foreach (DataRow example in batch) { count++; double[] result = this.FeedForward(example.GetFeatures()); double[] labels = example.GetLabels(); if (result.Length != labels.Length) { throw new Exception("Inconsistent array size, Incorrect implementation."); } else { double error = CalculateExampleLost(example); for (int l = this.Layers.Count - 1; l > 0; l--) { if (l == this.Layers.Count - 1) { for (int j = 0; j < this.Layers[l].CostDerivatives.Length; j++) { this.Layers[l].CostDerivatives[j] = ErrorFunction.GetDerivativeValue(labels[j], this.Layers[l].Activations[j]); } } else { for (int j = 0; j < this.Layers[l].CostDerivatives.Length; j++) { double acum = 0; for (int j2 = 0; j2 < Layers[l + 1].Size; j2++) { acum += Layers[l + 1].WeightMatrix[j2, j] * this.Layers[l + 1].ActivationFunction.GetDerivativeValue(Layers[l + 1].WeightedSum[j2]) * Layers[l + 1].CostDerivatives[j2]; } this.Layers[l].CostDerivatives[j] = acum; } } for (int j = 0; j < this.Layers[l].Activations.Length; j++) { this.Layers[l].BiasVectorChangeRecord[j] += this.Layers[l].ActivationFunction.GetDerivativeValue(Layers[l].WeightedSum[j]) * Layers[l].CostDerivatives[j]; for (int k = 0; k < Layers[l].WeightMatrix.GetLength(1); k++) { this.Layers[l].WeightMatrixChangeRecord[j, k] += Layers[l - 1].Activations[k] * this.Layers[l].ActivationFunction.GetDerivativeValue(Layers[l].WeightedSum[j]) * Layers[l].CostDerivatives[j]; } } } } } TakeGradientDescentStep(batch.Count); if ((i + 1) % (epochs / 10) == 0) { Console.WriteLine("Epoch " + (i + 1) + ", Avg.Loss:" + CalculateMeanErrorOverDataSet(dataSet)); } } }
private void Train(DataSet.DataSet dataSet, int epochs) { Console.WriteLine("MSE:" + CalculateMeanErrorOverDataSet(dataSet)); for (int i = 0; i < epochs; i++) { dataSet.Shuffle(); List <List <DataRow> > batch = dataSet.Batch(this.Batching); int step = 0; foreach (List <DataRow> row in batch) { foreach (DataRow example in row) { double[] result = this.FeedForward(example.GetFeatures()); double[] labels = example.GetLabels(); if (result.Length != labels.Length) { throw new Exception("Inconsistent array size, Incorrect implementation."); } else { double error = labels.Zip(result, (x, y) => Math.Pow(x - y, 2)).Sum(); for (int l = this.Layers.Count - 1; l > 0; l--) { if (l == this.Layers.Count - 1) { for (int j = 0; j < this.Layers[l].CostDerivatives.Length; j++) { this.Layers[l].CostDerivatives[j] = 2.0 * (this.Layers[l].Activations[j] - labels[j]); } } else { for (int j = 0; j < this.Layers[l].CostDerivatives.Length; j++) { //this.Layers[l].CostDerivatives[j] = 2.0 * (this.Layers[l].Activations[j] - labels[j]); double acum = 0; for (int j2 = 0; j2 < Layers[l + 1].Size; j2++) { acum += Layers[l + 1].WeightMatrix[j2, j] * Layers[l + 1].ActivationFunction.GetDerivativeValue(Layers[l + 1].WeightedSum[j2]) * Layers[l + 1].CostDerivatives[j2]; } this.Layers[l].CostDerivatives[j] = acum; } } for (int j = 0; j < this.Layers[l].Activations.Length; j++) { this.Layers[l].BiasVectorChangeRecord[j] += Layers[l].ActivationFunction.GetDerivativeValue(Layers[l].WeightedSum[j]) * Layers[l].CostDerivatives[j]; for (int k = 0; k < Layers[l].WeightMatrix.GetLength(1); k++) { this.Layers[l].WeightMatrixChangeRecord[j, k] += Layers[l - 1].Activations[k] * Layers[l].ActivationFunction.GetDerivativeValue(Layers[l].WeightedSum[j]) * Layers[l].CostDerivatives[j]; } } } } } // Console.WriteLine("Step "+step); step++; TakeGradientDescentStep(row.Count); // } Console.WriteLine(i + ":" + CalculateMeanErrorOverDataSet(dataSet)); } }