private static List <double[]> TrainWithSGD(Network n, DataSet ds, DataSet dt) { Gradientdescent br = new Gradientdescent(); //Calling the Train method of the trainer with the desired parameters //n, ds, learningRate: .3, numberOfEpochs: 200, shuffle: false, debug: n.Debug, nestrov:false, momentum:0.9, resilient: false, resilientUpdateAccelerationRate: 0.3, //resilientUpdateSlowDownRate: 0.1, regularization: AA1_MLP.Enums.Regularizations.L2, regularizationRate: 0.001, validationSet: dt, batchSize: 7 GradientDescentParams passedParams = new GradientDescentParams(); passedParams.network = n; passedParams.trainingSet = ds; passedParams.learningRate = 0.8; passedParams.numberOfEpochs = 100; passedParams.shuffle = false; passedParams.debug = n.Debug; passedParams.nestrov = false; passedParams.momentum = 0.7; passedParams.resilient = false; passedParams.resilientUpdateAccelerationRate = 0.3; passedParams.resilientUpdateSlowDownRate = 0.1; passedParams.regularization = Regularizations.L2; passedParams.regularizationRate = 0.001; passedParams.validationSet = dt; passedParams.batchSize = null; var learningCurve = br.Train(passedParams); return(learningCurve); }
private static void Parallel_PerformExampleComputations(GradientDescentParams passedParams, Dictionary <int, Matrix <double> > weightsUpdates, int k, double[] examplesLosses, int slotInexamplesLosses) { //Console.WriteLine(k); var nwOutput = passedParams.network.Predict(passedParams.trainingSet.Inputs.Row(k)); // network.Layers[0].LayerActivationsSumInputs = Dataset.Inputs.Row(k); var label = passedParams.trainingSet.Labels.Row(k); //comute the loss //batchLoss += ((label - nwOutput.Map(s => s >= 0.5 ? 1.0 : 0.0)).PointwiseMultiply(label - nwOutput.Map(s => s > 0.5 ? 1.0 : 0.0))).Sum(); //TODO: get the loss computation out as a parameter to the function, so that the user can specify it freely var loss = ((label - nwOutput).PointwiseMultiply(label - nwOutput)).Sum(); var residual = label - nwOutput; if (passedParams.debug) { Console.WriteLine("Target:{0}", label); Console.WriteLine("Calculated:{0}", nwOutput); Console.WriteLine("Target-calculated (residual):{0}", residual); } residual = residual.Map(r => double.IsNaN(r) ? 0 : r); Parallel_BackPropForExample(passedParams, weightsUpdates, residual);//weightsupdates will be set here examplesLosses[slotInexamplesLosses] = passedParams.MEE ? Math.Sqrt(loss) : loss; }
/// <summary> /// For outputting the final cup results /// </summary> private static void TrainAndPRoduceFinalResult() { AA1_MLP.DataManagers.CupDataManager dm = new AA1_MLP.DataManagers.CupDataManager(); DataSet trainDS = dm.LoadData(@"D:\dropbox\Dropbox\Master Course\SEM-3\ML\CM_CUP_Datasets\ML-17-PRJ lecture package-20171225\ML-CUP17-TR.csv", 10, 2, skip: 1, standardize: true); DataSet FinalTestDS = dm.LoadData(@"D:\dropbox\Dropbox\Master Course\SEM-3\ML\CM_CUP_Datasets\ML-17-PRJ lecture package-20171225\ML-CUP17-TS.csv", 10, skip: 1, reportOsutput: false, standardize: true); /*AdamParams passedParams = new AdamParams(); * IOptimizer trainer = new Adam();*/ GradientDescentParams passedParams = new GradientDescentParams(); Gradientdescent trainer = new Gradientdescent(); passedParams.numberOfEpochs = 5000; passedParams.batchSize = 10; passedParams.trainingSet = trainDS; passedParams.learningRate = 0.001; passedParams.regularization = Regularizations.L2; passedParams.regularizationRate = 0.001; passedParams.nestrov = true; passedParams.resilient = false; passedParams.resilientUpdateAccelerationRate = 2; passedParams.resilientUpdateSlowDownRate = 0.5; passedParams.momentum = 0.5; passedParams.NumberOfHiddenUnits = 100; passedParams.trueThreshold = null; string path = "cupTrain" + passedParams.NumberOfHiddenUnits + "_lr" + passedParams.learningRate + "_reg" + passedParams.regularizationRate; //building the architecture Network n = new Network(new List <Layer>() { new Layer(new ActivationIdentity(), true, 10), new Layer(new ActivationTanh(), true, passedParams.NumberOfHiddenUnits), // new Layer(new ActivationLeakyRelu(),true,40), new Layer(new ActivationIdentity(), false, 2), }, false, AA1_MLP.Enums.WeightsInitialization.Xavier); passedParams.network = n; var watch = System.Diagnostics.Stopwatch.StartNew(); List <double[]> learningCurve = trainer.Train(passedParams); watch.Stop(); var elapsedMs = watch.ElapsedMilliseconds; Console.WriteLine("elapsed Time:{0} ms", elapsedMs); File.WriteAllText(path + ".txt", string.Join("\n", learningCurve.Select(s => string.Join(",", s)))); ModelManager.SaveNetowrk(n, path + ".n"); var predictions = ModelManager.GeneratorCUP(FinalTestDS, n); File.WriteAllText("OMG_LOC-OSM2-TS.txt", string.Join("\n", predictions.Select(s => string.Join(",", s)))); }
private static double ComputeValidationLoss(GradientDescentParams passedParams, List <int> testSetIndices, DataSet test) { // computing the test loss: double validationError = 0; if (passedParams.validationSet != null) { for (int i = 0; i < testSetIndices.Count; i++) { var nwOutput = passedParams.network.Predict(test.Inputs.Row(i)); var loss = ((test.Labels.Row(i) - nwOutput).PointwiseMultiply(test.Labels.Row(i) - nwOutput)).Sum(); validationError += passedParams.MEE ? Math.Sqrt(loss) : loss; } validationError /= testSetIndices.Count; } return(validationError); }
private static void PerformBatchComputations(GradientDescentParams passedParams, Matrix <double> batchesIndices, ref Dictionary <int, Matrix <double> > previousWeightsUpdate, Dictionary <int, Matrix <double> > PreviousUpdateSigns, int epoch, ref double epochLoss, int batchIdx) { if (passedParams.parallelize) { Parallel_PerformBatchComputations(passedParams, batchesIndices, ref previousWeightsUpdate, PreviousUpdateSigns, epoch, ref epochLoss, batchIdx); } else { Dictionary <int, Matrix <double> > momentumUpdate = new Dictionary <int, Matrix <double> >(); double batchLoss = 0; Dictionary <int, Matrix <double> > weightsUpdates = new Dictionary <int, Matrix <double> >(); int numberOfBatchExamples = (((int)batchesIndices.Row(batchIdx).At(1) - (int)batchesIndices.Row(batchIdx).At(0)) + 1);//not all batches have batchSize, unfortunately, the last one could be smaller var batchElementsIndices = Enumerable.Range((int)batchesIndices.Row(batchIdx).At(0), (int)batchesIndices.Row(batchIdx).At(1) - (int)batchesIndices.Row(batchIdx).At(0) + 1).ToList(); if (passedParams.shuffle) { batchElementsIndices.Shuffle(); } foreach (int k in batchElementsIndices)//for each elemnt in th batch { batchLoss = PerformExampleComputations(passedParams, batchIdx, momentumUpdate, batchLoss, weightsUpdates, k); }//per example in the batch if (passedParams.debug) { Console.WriteLine("batch end"); } //EpochBatchesLosses.Add(new double[] { batchLoss / numberOfBatchExamples }); // batchLoss /= (((int)batchesIndices.Row(batchIndex).At(1) - (int)batchesIndices.Row(batchIndex).At(0)) + 1); UpdateWeights(passedParams, previousWeightsUpdate, PreviousUpdateSigns, epoch, momentumUpdate, weightsUpdates, batchElementsIndices.Count); previousWeightsUpdate = ClonePrevWeightsUpdates(previousWeightsUpdate, weightsUpdates); epochLoss += batchLoss / ((int)batchesIndices.Row(batchIdx).At(1) - (int)batchesIndices.Row(batchIdx).At(0) + 1); if (passedParams.network.Debug) { Console.WriteLine("Batch: {0} Error: {1}", batchIdx, batchLoss); } } }
private static double Parallel_ComputeValidationLoss(GradientDescentParams passedParams, List <int> testSetIndices, DataSet test) { // computing the test loss: double validationError = 0; double[] validationErrors = new double[testSetIndices.Count]; if (passedParams.validationSet != null) { Parallel.For(0, testSetIndices.Count, threadidx => { int i = testSetIndices[threadidx]; var nwOutput = passedParams.network.Predict(test.Inputs.Row(i)); var loss = ((test.Labels.Row(i) - nwOutput).PointwiseMultiply(test.Labels.Row(i) - nwOutput)).Sum(); validationErrors[threadidx] = passedParams.MEE ? Math.Sqrt(loss) : loss; }); validationError = validationErrors.Sum() / testSetIndices.Count; } return(validationError); }
private static void UpdateWeights(GradientDescentParams passedParams, Dictionary <int, Matrix <double> > previousWeightsUpdate, Dictionary <int, Matrix <double> > PreviousUpdateSigns, int epoch, Dictionary <int, Matrix <double> > momentumUpdate, Dictionary <int, Matrix <double> > weightsUpdates, int numberOfBatchExamplesInBatch) { for (int y = 0; y < weightsUpdates.Keys.Count; y++) { Matrix <double> finalUpdate = null; weightsUpdates[y] /= numberOfBatchExamplesInBatch; var resilientLearningRates = CreateMatrix.Dense(passedParams.network.Weights[y].RowCount, passedParams.network.Weights[y].ColumnCount, (epoch == 0) && passedParams.resilient ? passedParams.resilientUpdateSlowDownRate * passedParams.learningRate : passedParams.learningRate); if (passedParams.resilient && PreviousUpdateSigns.ContainsKey(y)) { var currentUpdateSigns = weightsUpdates[y].PointwiseSign(); resilientLearningRates = PreviousUpdateSigns[y].PointwiseMultiply(currentUpdateSigns).Map(s => s > 0 ? passedParams.learningRate * passedParams.resilientUpdateAccelerationRate : passedParams.learningRate * passedParams.resilientUpdateSlowDownRate); } var prev_v = momentumUpdate[y].Clone(); if (previousWeightsUpdate != null) { if (passedParams.regularization == Regularizations.L2) { momentumUpdate[y] += passedParams.momentum * previousWeightsUpdate[y] + resilientLearningRates.PointwiseMultiply(((weightsUpdates[y] - 2 * passedParams.regularizationRate * passedParams.network.Weights[y]))); } else { momentumUpdate[y] += passedParams.momentum * previousWeightsUpdate[y] + resilientLearningRates.PointwiseMultiply(weightsUpdates[y]); } } else { if (passedParams.regularization == Regularizations.L2) { momentumUpdate[y] += resilientLearningRates.PointwiseMultiply(((weightsUpdates[y] - 2 * passedParams.regularizationRate * passedParams.network.Weights[y]))); } else { momentumUpdate[y] += resilientLearningRates.PointwiseMultiply(weightsUpdates[y]); } } if (passedParams.nestrov) { finalUpdate = (1 + passedParams.momentum) * momentumUpdate[y] - passedParams.momentum * prev_v; momentumUpdate[y] = finalUpdate.Clone(); //for check // var v_prev = v # back this up // v = mu * v - learning_rate * dx # velocity update stays the same // x += -mu * v_prev + (1 + mu) * v # position update changes form*/ } else//no nestrove ad no regularization { finalUpdate = /*resilientLearningRates.PointwiseMultiply(weightsUpdates[y]) +*/ momentumUpdate[y]; } passedParams.network.Weights[y] += finalUpdate; weightsUpdates[y] = finalUpdate.Clone(); if (!PreviousUpdateSigns.ContainsKey(y)) { PreviousUpdateSigns.Add(y, null); } PreviousUpdateSigns[y] = weightsUpdates[y].PointwiseSign(); } }
private static void Parallel_BackPropForExample(GradientDescentParams passedParams, Dictionary <int, Matrix <double> > weightsUpdates, Vector <double> residual) { var layers = new Layer[passedParams.network.Layers.Count]; for (int lyrIdx = 0; lyrIdx < passedParams.network.Layers.Count; lyrIdx++) { layers[lyrIdx] = passedParams.network.Layers[lyrIdx].GetDeepClone(); } //backprop for (int layerIndex = layers.Length - 1; layerIndex >= 1; layerIndex--) { if (passedParams.debug) { Console.WriteLine("##### enting backpropagation layer index: {0} ######", layerIndex); } Vector <double> derivative = layers[layerIndex].Activation.CalculateDerivative(layers[layerIndex].LayerActivationsSumInputs); if (passedParams.debug) { Console.WriteLine("output sum(the sum inputted to the activation(LayerActivationsSumInputs)): {0}", layers[layerIndex].LayerActivationsSumInputs); Console.WriteLine("derivative: {0}", derivative); Console.WriteLine("output sum margin of error(residual): {0}", residual); Console.WriteLine("Delta output sum of Layer(residual*derivative): {0}", layerIndex); // Console.WriteLine(residualTimesDerivative); } if (layerIndex == layers.Length - 1) { layers[layerIndex].Delta = residual.PointwiseMultiply(derivative); } else { Matrix <double> wei = passedParams.network.Weights[layerIndex]; if (wei.RowCount > derivative.Count)//there is a bias { wei = wei.SubMatrix(0, wei.RowCount - 1, 0, wei.ColumnCount); } layers[layerIndex].Delta = (wei * layers[layerIndex + 1].Delta).PointwiseMultiply(derivative); } if (passedParams.network.Debug) { Console.ForegroundColor = ConsoleColor.Red; Console.WriteLine("Weights layer:{0} {1}", layerIndex - 1, passedParams.network.Weights[layerIndex - 1]); Console.ResetColor(); } Matrix <double> weightsUpdate = null; var acti = layers[layerIndex - 1].LayerActivations; if (layers[layerIndex - 1].Bias && acti.Count - layers[layerIndex - 1].NumberOfNeurons < 1)//if the user asked a bias should be added, we need to add a dummy neuron of activation =1 as a bias at the end of the layer's activations { var l = acti.ToList(); l.Add(1);//adding the bias acti = CreateVector.Dense(l.ToArray()); } weightsUpdate = acti.OuterProduct(layers[layerIndex].Delta); lock (thisLock) { weightsUpdates[layerIndex - 1] = weightsUpdates[layerIndex - 1].Add(weightsUpdate); } //accumulating it for each example in the batch -TODO::should we divide by the number of examples? // weightsUpdates[layerIndex - 1] = weightsupdatematrix; if (passedParams.debug) { Console.WriteLine("weights updates of weightsMatrix(learning rate* outerproduct(Layer{1} delta,layer{2} output from activations) ): {0} ", layerIndex - 1, layerIndex, layerIndex - 1); Console.WriteLine("learning rate:{0}", passedParams.learningRate); Console.WriteLine("Layer:{0} delta: {1}", layerIndex, layers[layerIndex].Delta); Console.WriteLine("layer{0} output from activations:{1}", layerIndex - 1, layers[layerIndex - 1].LayerActivations); Console.WriteLine(weightsUpdate); Console.WriteLine("----------- Gradientdescent LayerIndex{0} ------------", layerIndex); } }//back propagating per layer }
//Network network, DataSet trainingSet, double learningRate, int numberOfEpochs, bool shuffle = false, int? batchSize = null, bool debug = false, double regularizationRate = 0, Regularizations regularization = Regularizations.None, double momentum = 0, bool resilient = false, double resilientUpdateAccelerationRate = 1, double resilientUpdateSlowDownRate = 1, DataSet validationSet = null, double? trueThreshold = 0.5, bool MEE = false, bool reduceLearningRate = false, double learningRateReduction = 0.5, int learningRateReductionAfterEpochs = 1000, int numberOfReductions = 2, bool nestrov = false public override List <double[]> Train(TrainerParams trainParams) { GradientDescentParams passedParams = (GradientDescentParams)trainParams; /* if (passedParams.resilient) * { * // passedParams.learningRate = 1; * * }*/ //int valSplitSize = 0; List <double[]> learningCurve = new List <double[]>(); List <int> trainingSetIndices = Enumerable.Range(0, passedParams.trainingSet.Labels.RowCount).ToList(); List <int> testSetIndices = null; DataSet test = new DataSet(null, null); if (passedParams.validationSet != null) { testSetIndices = Enumerable.Range(0, passedParams.validationSet.Labels.RowCount).ToList(); /* if (shuffle) * { * testSetIndices.Shuffle(); * } */ test.Inputs = CreateMatrix.Dense(testSetIndices.Count, passedParams.validationSet.Inputs.ColumnCount, 0.0); test.Labels = CreateMatrix.Dense(testSetIndices.Count, passedParams.validationSet.Labels.ColumnCount, 0.0); for (int i = 0; i < testSetIndices.Count; i++) { test.Inputs.SetRow(i, passedParams.validationSet.Inputs.Row(testSetIndices[i])); //, 1, 0, Dataset.Inputs.ColumnCount)); test.Labels.SetRow(i, passedParams.validationSet.Labels.Row(testSetIndices[i])); //.SubMatrix(trainingSetIndices[batchIndex], 1, 0, Dataset.Labels.ColumnCount)); } } if (passedParams.shuffle) { trainingSetIndices.Shuffle(); } Matrix <double> batchesIndices = null; //a 2d matrix of shape(nmberOfBatches,2), rows are batches, row[0] =barchstart, row[1] = batchEnd Dictionary <int, Matrix <double> > previousWeightsUpdate = null; //for the momentum updates Dictionary <int, Matrix <double> > PreviousUpdateSigns = new Dictionary <int, Matrix <double> >(); //for the resilient backpropagation,if the sign changes we slow down with the slow down ratio, if it stays the same we accelerate with the acceleration ratio for (int epoch = 0; epoch < passedParams.numberOfEpochs; epoch++) { if (passedParams.batchSize != null)//will build a matrix "batchesIndices" describing the batches that in each row, contains the start and the end of a batch { var numberOfBatches = (int)Math.Ceiling(((passedParams.trainingSet.Labels.RowCount / (double)(passedParams.batchSize)))); batchesIndices = CreateMatrix.Dense(numberOfBatches, 2, 0.0); for (int j = 0; j < numberOfBatches; j++) { batchesIndices.SetRow(j, new double[] { j *(double)passedParams.batchSize, Math.Min(passedParams.trainingSet.Inputs.RowCount - 1, (j + 1) * (double)passedParams.batchSize - 1) }); } } else//put all of the dataset in one batch { batchesIndices = CreateMatrix.Dense(1, 2, 0.0); batchesIndices.SetRow(0, new double[] { 0, passedParams.trainingSet.Inputs.RowCount - 1 }); } double epochLoss = 0;//will hold the average of the batches average losses, each batch contributes to this with its loss average = batchloss/batchsize for (int batchIdx = 0; batchIdx < batchesIndices.RowCount; batchIdx++)//for each batch { PerformBatchComputations(passedParams, batchesIndices, ref previousWeightsUpdate, PreviousUpdateSigns, epoch, ref epochLoss, batchIdx); } epochLoss /= batchesIndices.RowCount; double validationError = passedParams.parallelize ? Parallel_ComputeValidationLoss(passedParams, testSetIndices, test) : ComputeValidationLoss(passedParams, testSetIndices, test); double trainingAccuracy = 0, validationSetAccuracy = 0; if (passedParams.trueThreshold != null) { trainingAccuracy = Utilities.Tools.ComputeAccuracy(passedParams.network, passedParams.trainingSet, passedParams.trueThreshold); validationSetAccuracy = Utilities.Tools.ComputeAccuracy(passedParams.network, passedParams.validationSet, passedParams.trueThreshold); } learningCurve.Add(new double[] { epochLoss, passedParams.validationSet != null ? validationError : 0, passedParams.trueThreshold != null ? trainingAccuracy : 0, passedParams.trueThreshold != null ? validationSetAccuracy : 0 }); if (passedParams.PrintLoss) { Console.ForegroundColor = ConsoleColor.Green; Console.WriteLine("Epoch:{0} train loss:{1} - validation loss:{2}", epoch, epochLoss, validationError); } if (passedParams.reduceLearningRate && epoch > 0 && passedParams.numberOfReductions > 0 && epoch % passedParams.learningRateReductionAfterEpochs == 0) { passedParams.learningRate *= passedParams.learningRateReduction; passedParams.numberOfReductions--; Console.ForegroundColor = ConsoleColor.Red; Console.WriteLine("Learning Rate Reduced, now: {0}", passedParams.learningRate); } Console.ResetColor(); } return(learningCurve); }
private static void Parallel_PerformBatchComputations(GradientDescentParams passedParams, Matrix <double> batchesIndices, ref Dictionary <int, Matrix <double> > previousWeightsUpdate, Dictionary <int, Matrix <double> > PreviousUpdateSigns, int epoch, ref double epochLoss, int batchIdx) { Dictionary <int, Matrix <double> > momentumUpdate = new Dictionary <int, Matrix <double> >(); double batchLoss = 0; Dictionary <int, Matrix <double> > weightsUpdates = new Dictionary <int, Matrix <double> >(); int numberOfBatchExamples = (((int)batchesIndices.Row(batchIdx).At(1) - (int)batchesIndices.Row(batchIdx).At(0)) + 1);//not all batches have batchSize, unfortunately, the last one could be smaller var batchElementsIndices = Enumerable.Range((int)batchesIndices.Row(batchIdx).At(0), (int)batchesIndices.Row(batchIdx).At(1) - (int)batchesIndices.Row(batchIdx).At(0) + 1).ToList(); if (passedParams.shuffle) { batchElementsIndices.Shuffle(); } for (int layerIndex = passedParams.network.Layers.Count - 1; layerIndex >= 1; layerIndex--) { if (!weightsUpdates.ContainsKey(layerIndex - 1)) { weightsUpdates.Add(layerIndex - 1, CreateMatrix.Dense(passedParams.network.Weights[layerIndex - 1].RowCount, passedParams.network.Weights[layerIndex - 1].ColumnCount, 0.0)); } if (!momentumUpdate.ContainsKey(layerIndex - 1)) { momentumUpdate.Add(layerIndex - 1, CreateMatrix.Dense(passedParams.network.Weights[layerIndex - 1].RowCount, passedParams.network.Weights[layerIndex - 1].ColumnCount, 0.0)); } } double[] examplesLosses = new double[batchElementsIndices.Count]; // Parallel_PerformExampleComputations(passedParams, momentumUpdate, weightsUpdates, batchElementsIndices[0], examplesLosses, 0); Parallel.For(0, batchElementsIndices.Count, theadindx => { Parallel_PerformExampleComputations(passedParams, weightsUpdates, batchElementsIndices[theadindx], examplesLosses, theadindx); }); //per example in the batch /* List<Thread> threads = new List<Thread>(); * for (int batchElementThreadIndx = 0; batchElementThreadIndx < batchElementsIndices.Count-1; batchElementThreadIndx++) * { * * threads.Add(new Thread(() => Parallel_PerformExampleComputations(passedParams, momentumUpdate, weightsUpdates, batchElementsIndices[batchElementThreadIndx], examplesLosses, batchElementThreadIndx))); * threads.Last().Start(); * * * * * }*/ //foreach (var thread in threads) //{ // thread.Join(); //} /* for (int batchElementThreadIndx = 0; batchElementThreadIndx < batchElementsIndices.Count; batchElementThreadIndx++) * { * examplesLosses[batchElementThreadIndx] = Parallel_PerformExampleComputations(passedParams, momentumUpdate, weightsUpdates, batchElementsIndices[batchElementThreadIndx]); * * * }*///per example in the batch batchLoss = examplesLosses.Sum(); if (passedParams.debug) { Console.WriteLine("batch end"); } //EpochBatchesLosses.Add(new double[] { batchLoss / numberOfBatchExamples }); // batchLoss /= (((int)batchesIndices.Row(batchIndex).At(1) - (int)batchesIndices.Row(batchIndex).At(0)) + 1); UpdateWeights(passedParams, previousWeightsUpdate, PreviousUpdateSigns, epoch, momentumUpdate, weightsUpdates, batchElementsIndices.Count); previousWeightsUpdate = ClonePrevWeightsUpdates(previousWeightsUpdate, weightsUpdates); epochLoss += batchLoss / ((int)batchesIndices.Row(batchIdx).At(1) - (int)batchesIndices.Row(batchIdx).At(0) + 1); if (passedParams.network.Debug) { Console.WriteLine("Batch: {0} Error: {1}", batchIdx, batchLoss); } }
public void ScreenGD(AA1_MLP.Entities.DataSet wholeSet, int k, List <double> momentums, List <double> learningRates, List <double> regularizationRates, List <int> humberOfHiddenNeurons, GradientDescentParams passedParams, int numOfEpochs) { //Calling the Train method of the trainer with the desired parameters //n, ds, learningRate: .3, numberOfEpochs: 200, shuffle: false, debug: n.Debug, nestrov:false, momentum:0.9, resilient: false, resilientUpdateAccelerationRate: 0.3, //resilientUpdateSlowDownRate: 0.1, regularization: AA1_MLP.Enums.RegularizationRates.L2, regularizationRate: 0.001, validationSet: dt, batchSize: 7 string reportsDirectory = "80SGDKFoldsReportsnonestrov"; if (Directory.Exists(reportsDirectory)) { Directory.Delete(reportsDirectory, true); } Directory.CreateDirectory(reportsDirectory); /*List<double> momentums = new List<double> { 0, 0.5 }; * List<double> learningRates = new List<double> { 0.005, 0.01 }; * List<double> regularizationRates = new List<double> { 0, 0.001 }; * List<int> humberOfHiddenNeurons = new List<int> { 100, 90, 80, 70, 60, 50, 40, 30, 20, 10 };*/ // GradientDescentParams passedParams = new GradientDescentParams(); IOptimizer trainer = new Gradientdescent(); //AdamParams passedParams = new AdamParams(); //IOptimizer trainer = new Adam(); passedParams.numberOfEpochs = numOfEpochs; passedParams.batchSize = 10; for (int idxnh = 0; idxnh < humberOfHiddenNeurons.Count; idxnh++) { for (int idxmo = 0; idxmo < momentums.Count; idxmo++) { for (int idxLR = 0; idxLR < learningRates.Count; idxLR++) { for (int idxReg = 0; idxReg < regularizationRates.Count; idxReg++) { int nh = humberOfHiddenNeurons[idxnh]; passedParams.learningRate = learningRates[idxLR]; passedParams.regularization = Regularizations.L2; passedParams.regularizationRate = regularizationRates[idxReg]; passedParams.momentum = momentums[idxmo]; passedParams.NumberOfHiddenUnits = nh; RunKFoldWithSetOfParams(wholeSet, k, passedParams, trainer, reportsDirectory); } } } } }