public static void Train(NeuralNetwork network, DataSet trainingSet, DataSet validationSet) { // Initialize parameters or load them if (trainingMode == "new" || trainingMode == "New") { // Setup miniBatchSize network.Set("MiniBatchSize", miniBatchSize); network.InitializeParameters("random"); } else if (trainingMode == "resume" || trainingMode == "Resume") { network.InitializeParameters("load"); } else { throw new InvalidOperationException("Please set TrainingMode to either ''New'' or ''Resume''."); } // Set dropout network.Set("DropoutFC", dropoutFC); network.Set("DropoutConv", dropoutConv); network.Set("DropoutInput", dropoutInput); Sequence indicesSequence = new Sequence(trainingSet.DataContainer.Count); int[] miniBatch = new int[miniBatchSize]; // Timers Stopwatch stopwatch = Stopwatch.StartNew(); Stopwatch stopwatchFwd = Stopwatch.StartNew(); Stopwatch stopwatchGrad = Stopwatch.StartNew(); Stopwatch stopwatchBwd = Stopwatch.StartNew(); int epoch = 0; int nBadEpochs = 0; int consecutiveAnnealingCounter = 0; bool stopFlag = false; int epochsRemainingToOutput = (evaluateBeforeTraining == true) ? 0 : consoleOutputLag; while (!stopFlag) // begin loop over training epochs { if (epochsRemainingToOutput == 0) { /************** * Evaluation * **************/ // Pre inference (for batch-norm) //network.Set("PreInference", true); //Console.WriteLine("Re-computing batch-norm means and variances..."); //NetworkEvaluator.PreEvaluateNetwork(network, trainingSet); // Evaluate on training set... network.Set("Inference", true); Console.WriteLine("Evaluating on TRAINING set..."); stopwatch.Restart(); NetworkEvaluator.EvaluateNetwork(network, trainingSet, out lossTraining, out errorTraining); Console.WriteLine("\tLoss = {0}\n\tError = {1}\n\tEval runtime = {2}ms\n", lossTraining, errorTraining, stopwatch.ElapsedMilliseconds); // ...and save loss and error to file using (System.IO.StreamWriter trainingEpochOutputFile = new System.IO.StreamWriter(trainingEpochSavePath, true)) { trainingEpochOutputFile.WriteLine(lossTraining.ToString() + "\t" + errorTraining.ToString()); } // Evaluate on validation set... if (validationSet != null) { Console.WriteLine("Evaluating on VALIDATION set..."); stopwatch.Restart(); NetworkEvaluator.EvaluateNetwork(network, validationSet, out newLossValidation, out newErrorValidation); Console.WriteLine("\tLoss = {0}\n\tError = {1}\n\tEval runtime = {2}ms\n", newLossValidation, newErrorValidation, stopwatch.ElapsedMilliseconds); // ...save loss and error to file using (System.IO.StreamWriter validationEpochOutputFile = new System.IO.StreamWriter(validationEpochSavePath, true)) { validationEpochOutputFile.WriteLine(newLossValidation.ToString() + "\t" + newErrorValidation.ToString()); } if (newLossValidation < minLossValidation) { // nice, validation loss is decreasing! minLossValidation = newLossValidation; errorValidation = newErrorValidation; // Save network to file Utils.SaveNetworkToFile(network, networkOutputFilePath); // and keep training nBadEpochs = 0; consecutiveAnnealingCounter = 0; } else { nBadEpochs++; Console.WriteLine("Loss on the validation set has been increasing for {0} epoch(s)...", nBadEpochs); if (patience - nBadEpochs > 0) { Console.WriteLine("...I'll be patient for {0} more epoch(s)!", patience - nBadEpochs); // keep training } else { //Console.WriteLine("...and I've run out of patience! Training ends here."); //stopFlag = true; //break; // Decrease learning rate Console.WriteLine("...and I've run out of patience!"); if (consecutiveAnnealingCounter > maxConsecutiveAnnealings) { Console.WriteLine("\nReached the numner of maximum consecutive annealings without progress. \nTraining ends here."); break; } Console.WriteLine("\nI'm annealing the learning rate:\n\tWas {0}\n\tSetting it to {1}.", learningRate, learningRate / learningRateDecayFactor); learningRate /= learningRateDecayFactor; consecutiveAnnealingCounter++; Console.WriteLine("\nAnd I'm loading the network saved {0} epochs ago and resume the training from there.", patience); string networkName = network.Name; network = null; // this is BAD PRACTICE GC.Collect(); // this is BAD PRACTICE network = Utils.LoadNetworkFromFile("../../../../Results/Networks/", networkName); network.Set("MiniBatchSize", miniBatchSize); network.InitializeParameters("load"); nBadEpochs = 0; } } } // Restore dropout network.Set("DropoutFC", dropoutFC); network.Set("DropoutConv", dropoutConv); network.Set("DropoutInput", dropoutInput); epochsRemainingToOutput = consoleOutputLag; } epochsRemainingToOutput--; epoch++; if (epoch > maxTrainingEpochs) { break; } /************ * Training * ************/ network.Set("Training", true); network.Set("EpochBeginning", true); Console.WriteLine("\nEpoch {0}...", epoch); stopwatch.Restart(); stopwatchFwd.Reset(); stopwatchGrad.Reset(); stopwatchBwd.Reset(); indicesSequence.Shuffle(); // shuffle examples order at every epoch int iMiniBatch = 0; // Run over mini-batches for (int iStartMiniBatch = 0; iStartMiniBatch < trainingSet.DataContainer.Count; iStartMiniBatch += miniBatchSize) { // Feed a mini-batch to the network miniBatch = indicesSequence.GetMiniBatchIndices(iStartMiniBatch, miniBatchSize); network.InputLayer.FeedData(trainingSet, miniBatch); // Forward pass stopwatchFwd.Start(); network.ForwardPass("beginning", "end"); stopwatchFwd.Stop(); // Compute gradient and backpropagate stopwatchGrad.Start(); network.CrossEntropyGradient(trainingSet, miniBatch); stopwatchGrad.Stop(); // Backpropagate gradient and update parameters stopwatchBwd.Start(); network.BackwardPass(learningRate, momentumCoefficient, weightDecayCoeff, weightMaxNorm); stopwatchBwd.Stop(); iMiniBatch++; CheckForKeyPress(ref network, ref stopFlag); if (stopFlag) { break; } } // end of training epoch Console.Write(" Training runtime = {0}ms\n", stopwatch.ElapsedMilliseconds); Console.WriteLine("Forward: {0}ms - Gradient: {1}ms - Backward: {2}ms\n", stopwatchFwd.ElapsedMilliseconds, stopwatchGrad.ElapsedMilliseconds, stopwatchBwd.ElapsedMilliseconds); #if TIMING_LAYERS Console.WriteLine("\n Detailed runtimes::"); Console.WriteLine("\nCONV: \n\tForward: {0}ms \n\tBackprop: {1}ms \n\tUpdateSpeeds: {2}ms \n\tUpdateParameters: {3}ms \n\tPadUnpad: {4}ms", Utils.ConvForwardTimer.ElapsedMilliseconds, Utils.ConvBackpropTimer.ElapsedMilliseconds, Utils.ConvUpdateSpeedsTimer.ElapsedMilliseconds, Utils.ConvUpdateParametersTimer.ElapsedMilliseconds, Utils.ConvPadUnpadTimer.ElapsedMilliseconds); Console.WriteLine("\nPOOLING: \n\tForward: {0}ms \n\tBackprop: {1}ms", Utils.PoolingForwardTimer.ElapsedMilliseconds, Utils.PoolingBackpropTimer.ElapsedMilliseconds); Console.WriteLine("\nNONLINEARITIES: \n\tForward: {0}ms \n\tBackprop: {1}ms", Utils.NonlinearityForwardTimer.ElapsedMilliseconds, Utils.NonlinearityBackpropTimer.ElapsedMilliseconds); Console.WriteLine("\nFULLY CONNECTED: \n\tForward: {0}ms \n\tBackprop: {1}ms \n\tUpdateSpeeds: {2}ms \n\tUpdateParameters: {3}ms", Utils.FCForwardTimer.ElapsedMilliseconds, Utils.FCBackpropTimer.ElapsedMilliseconds, Utils.FCUpdateSpeedsTimer.ElapsedMilliseconds, Utils.FCUpdateParametersTimer.ElapsedMilliseconds); Console.WriteLine("\nBATCHNORM FC \n\tForward: {0}ms \n\tBackprop: {1}ms \n\tUpdateSpeeds: {2}ms \n\tUpdateParameters: {3}ms", Utils.BNFCForwardTimer.ElapsedMilliseconds, Utils.BNFCBackpropTimer.ElapsedMilliseconds, Utils.BNFCUpdateSpeedsTimer.ElapsedMilliseconds, Utils.BNFCUpdateParametersTimer.ElapsedMilliseconds); Console.WriteLine("\nBATCHNORM CONV \n\tForward: {0}ms \n\tBackprop: {1}ms \n\tUpdateSpeeds: {2}ms \n\tUpdateParameters: {3}ms", Utils.BNConvForwardTimer.ElapsedMilliseconds, Utils.BNConvBackpropTimer.ElapsedMilliseconds, Utils.BNConvUpdateSpeedsTimer.ElapsedMilliseconds, Utils.BNConvUpdateParametersTimer.ElapsedMilliseconds); Console.WriteLine("\nSOFTMAX \n\tForward: {0}ms", Utils.SoftmaxTimer.ElapsedMilliseconds); Utils.ResetTimers(); #endif } stopwatch.Stop(); }
public static void Check(NeuralNetwork network, DataSet dataSet) { // Setup network network.Set("MiniBatchSize", miniBatchSize); network.InitializeParameters("random"); network.Set("DropoutFC", 1.0); network.Set("Training", true); network.Set("EpochBeginning", true); // Get a mini-batch of data Sequence indicesSequence = new Sequence(dataSet.DataContainer.Count); indicesSequence.Shuffle(); int[] miniBatch = indicesSequence.GetMiniBatchIndices(0, miniBatchSize); // Run network forward and backward network.InputLayer.FeedData(dataSet, miniBatch); network.ForwardPass("beginning", "end"); List <int> trueLabels = new List <int>(); for (int m = 0; m < miniBatchSize; m++) { trueLabels.Add(dataSet.DataContainer[miniBatch[m]].Label); } network.CrossEntropyGradient(dataSet, miniBatch); network.BackwardPass(0.0, 0.0, 0.0, 1e10); // no momentum, no learning rate, no weight decay // Re-forward pass (in case there are batch-norm layer) network.Set("PreInference", true); network.ForwardPass("beginning", "end"); network.Set("Inference", true); for (int iLayer = 1; iLayer < network.NumberOfLayers; iLayer++) { //if (network.Layers[iLayer].Type != "Input" && network.Layers[iLayer].Type != "MaxPooling" && network.Layers[iLayer].Type != "ReLU" && // network.Layers[iLayer].Type != "SoftMax" && network.Layers[iLayer].Type != "Convolutional" && network.Layers[iLayer].Type != "FullyConnected" // && network.Layers[iLayer].Type != "ELU") if (network.Layers[iLayer].Type == typeToCheck) { Console.WriteLine("\nChecking gradients in layer {0} ({1})...", iLayer, network.Layers[iLayer].Type); int nChecks = 0; int nErrors = 0; double cumulativeError = 0.0; double[] parametersBackup = network.Layers[iLayer].GetParameters(); double[] parameterGradients = network.Layers[iLayer].GetParameterGradients(); int nParameters = parametersBackup.Length; // First parameters Console.WriteLine("\n...with respect to PARAMETERS"); for (int j = 0; j < nParameters; j++) { // decrease jth parameter by EPSILON double[] parametersMinus = new double[nParameters]; Array.Copy(parametersBackup, parametersMinus, nParameters); parametersMinus[j] -= EPSILON; network.Layers[iLayer].SetParameters(parametersMinus); // then run network forward and compute loss network.ForwardPass(iLayer, "end"); List <double[]> outputClassScoresMinus = network.OutputLayer.OutputClassScores; double lossMinus = 0; for (int m = 0; m < miniBatchSize; m++) { int trueLabel = trueLabels[m]; lossMinus -= Math.Log(outputClassScoresMinus[m][trueLabel]); // score of true class in example m } lossMinus /= miniBatchSize; // increse jth parameter by EPSILON double[] parametersPlus = new double[nParameters]; Array.Copy(parametersBackup, parametersPlus, nParameters); parametersPlus[j] += EPSILON; network.Layers[iLayer].SetParameters(parametersPlus); // then run network forward and compute loss network.ForwardPass(iLayer, "end"); List <double[]> outputClassScoresPlus = network.OutputLayer.OutputClassScores; double lossPlus = 0; for (int m = 0; m < miniBatchSize; m++) { int trueLabel = trueLabels[m]; lossPlus -= Math.Log(outputClassScoresPlus[m][trueLabel]); // score of true class in example m } lossPlus /= miniBatchSize; // compute gradient numerically, trying to limit loss of significance! //double orderOfMagnitude = Math.Floor(Math.Log10(lossPlus)); //lossPlus *= Math.Pow(10, -orderOfMagnitude); //lossMinus *= Math.Pow(10, -orderOfMagnitude); double gradientNumerical = (lossPlus - lossMinus) / (2 * EPSILON); //gradientNumerical *= Math.Pow(10, orderOfMagnitude); // retrieve gradient computed with backprop double gradientBackprop = parameterGradients[j]; //if (Math.Abs(gradientNumerical) > EPSILON || Math.Abs(gradientBackprop) > EPSILON) // when the gradient is very small, finite arithmetics effects are too large => don't check //{ nChecks++; // compare the gradients, again trying to limit loss of significance! //orderOfMagnitude = Math.Floor(Math.Log10(Math.Abs(gradientNumerical))); //double gradientNumericalRescaled = gradientNumerical * Math.Pow(10, -orderOfMagnitude); //double gradientBackpropRescaled = gradientBackprop * Math.Pow(10, -orderOfMagnitude); //double error = Math.Abs(gradientNumericalRescaled - gradientBackpropRescaled) * Math.Pow(10, orderOfMagnitude); double error = Math.Abs(gradientNumerical - gradientBackprop); double relativeError = error / Math.Max(Math.Abs(gradientNumerical), Math.Abs(gradientBackprop)); if (relativeError > MAX_RELATIVE_ERROR) { Console.Write("\nGradient check failed for parameter {0}\n", j); Console.WriteLine("\tBackpropagation gradient: {0}", gradientBackprop); Console.WriteLine("\tFinite difference gradient: {0}", gradientNumerical); Console.WriteLine("\tRelative error: {0}", relativeError); nErrors++; } cumulativeError = (relativeError + (nChecks - 1) * cumulativeError) / nChecks; //} // restore original weights before checking next gradient network.Layers[iLayer].SetParameters(parametersBackup); } if (nChecks == 0) { Console.Write("\nAll gradients are zero... Something is probably wrong!"); } else if (nErrors == 0) { Console.Write("\nGradient check 100% passed!"); Console.Write("\nAverage error = {0}", cumulativeError); } else { Console.Write("\n{0} errors out of {1} checks.", nErrors, nChecks); Console.Write("\nAverage error = {0}", cumulativeError); } Console.Write("\n\n"); Console.Write("Press any key to continue..."); Console.Write("\n\n"); Console.ReadKey(); // Now inputs nChecks = 0; nErrors = 0; cumulativeError = 0.0; double[] inputBackup = network.Layers[iLayer].GetInput(); double[] inputGradients = network.Layers[iLayer].GetInputGradients(); int inputArraySize = inputBackup.Length; Console.WriteLine("\n...with respect to INPUT"); for (int j = 0; j < inputArraySize; j++) { // decrease jth parameter by EPSILON double[] inputMinus = new double[inputArraySize]; Array.Copy(inputBackup, inputMinus, inputArraySize); inputMinus[j] -= EPSILON; network.Layers[iLayer].SetInput(inputMinus); // then run network forward and compute loss network.ForwardPass(iLayer, "end"); List <double[]> outputClassScoresMinus = network.OutputLayer.OutputClassScores; double lossMinus = 0; for (int m = 0; m < miniBatchSize; m++) { int trueLabel = trueLabels[m]; lossMinus -= Math.Log(outputClassScoresMinus[m][trueLabel]); // score of true class in example m } lossMinus /= miniBatchSize; // increse jth parameter by EPSILON double[] inputPlus = new double[inputArraySize]; Array.Copy(inputBackup, inputPlus, inputArraySize); inputPlus[j] += EPSILON; network.Layers[iLayer].SetInput(inputPlus); // then run network forward and compute loss network.ForwardPass(iLayer, "end"); List <double[]> outputClassScoresPlus = network.OutputLayer.OutputClassScores; double lossPlus = 0; for (int m = 0; m < miniBatchSize; m++) { int trueLabel = trueLabels[m]; lossPlus -= Math.Log(outputClassScoresPlus[m][trueLabel]); // score of true class in example m } lossPlus /= miniBatchSize; // compute gradient numerically double gradientNumerical = (lossPlus - lossMinus) / (2 * EPSILON); // retrieve gradient computed with backprop double gradientBackprop = inputGradients[j] / miniBatchSize; // NOTE: it is divided by miniBatchSize because HERE the loss is defined as Loss / miniBatchSize //if (Math.Abs(gradientNumerical) > EPSILON || Math.Abs(gradientBackprop) > EPSILON) // when the gradient is very small, finite arithmetics effects are too large => don't check //{ nChecks++; // compare the gradients double relativeError = Math.Abs(gradientNumerical - gradientBackprop) / Math.Max(Math.Abs(gradientNumerical), Math.Abs(gradientBackprop)); if (relativeError > MAX_RELATIVE_ERROR) { Console.Write("\nGradient check failed for input {0}\n", j); Console.WriteLine("\tBackpropagation gradient: {0}", gradientBackprop); Console.WriteLine("\tFinite difference gradient: {0}", gradientNumerical); Console.WriteLine("\tRelative error: {0}", relativeError); nErrors++; } cumulativeError = (relativeError + (nChecks - 1) * cumulativeError) / nChecks; //} // restore original input before checking next gradient network.Layers[iLayer].SetInput(inputBackup); } if (nChecks == 0) { Console.Write("\nAll gradients are zero... Something is probably wrong!"); } else if (nErrors == 0) { Console.Write("\nGradient check 100% passed!"); Console.Write("\nAverage error = {0}", cumulativeError); } else { Console.Write("\n{0} errors out of {1} checks.", nErrors, nChecks); Console.Write("\nAverage error = {0}", cumulativeError); } Console.Write("\n\n"); Console.Write("Press any key to continue..."); Console.Write("\n\n"); Console.ReadKey(); } } }