/* * * [Obsolete("Deprecated. Use cross-entropy cost instead.")] * static double QuadraticCost(float[] targetValues, float[] networkOutputs, out float[] gradient) * { * if (targetValues.Length != networkOutputs.Length) * throw new System.InvalidOperationException("Mismatch between length of output array and target (label) array."); * * gradient = targetValues.Zip(networkOutputs, (x, y) => y - x).ToArray(); * var squaredErrors = gradient.Select(x => Math.Pow(x, 2)); * * return squaredErrors.Sum() / squaredErrors.Count(); * } * * * [Obsolete("Deprecated. Use cross-entropy cost instead.")] * static double QuadraticCost(NeuralNetwork network, DataSet dataSet) * { * float[] dummy; * double totalCost = 0; * * for (int i = 0; i < dataSet.Size; i++) * { * network.Layers[0].Input.SetHost(dataSet.GetDataPoint(i)); * * // Run forward * for (int l = 0; l < network.Layers.Count; l++) * { * network.Layers[l].FeedForward(); * } * * // Compute cost * totalCost += QuadraticCost(new float[] { (float)dataSet.GetLabel(i) }, network.Layers.Last().Output.GetHost(), out dummy); * } * * return totalCost / (2 * dataSet.Size); * } * */ #endregion #region Junk #if TOY /// <summary> /// Training on toy 2D data set (to test network structure, fully connected, tanh, softmax and respective backprops) /// </summary> /// <param name="network"></param> /// <param name="trainingSet"></param> /// <param name="finalError"></param> /// <param name="finalEpoch"></param> /// <returns></returns> public static void TrainSimpleTest(NeuralNetwork network, DataSet trainingSet) { // Initializations int nLayers = network.NumberOfLayers; int[] randomIntSequence = new int[trainingSet.Size]; int iDataPoint; bool stopFlag = false; double errorEpoch; bool isOutputEpoch = true; int epochsRemainingToOutput = 0; List <int[]> miniBatchList = new List <int[]>(); int nMiniBatches = trainingSet.Size / miniBatchSize; float[] outputScores = new float[trainingSet.NumberOfClasses]; float[] labelArray = new float[trainingSet.NumberOfClasses]; #if OPENCL_ENABLED int inputBufferBytesSize = sizeof(float) * trainingSet.GetDataPoint(0).Length; // Global and local work group size for gradient kernel IntPtr[] gradientGlobalWorkSizePtr = new IntPtr[] { (IntPtr)(miniBatchSize * trainingSet.NumberOfClasses) }; IntPtr[] gradientLocalWorkSizePtr = new IntPtr[] { (IntPtr)(trainingSet.NumberOfClasses) }; #endif int epoch = 0; do // loop over training epochs { randomIntSequence = Utils.GenerateRandomPermutation(trainingSet.Size); // new every epoch // Run over mini-batches for (int iStartMiniBatch = 0; iStartMiniBatch < trainingSet.Size; iStartMiniBatch += miniBatchSize) { // Run over a mini-batch for (int iWithinMiniBatch = 0; iWithinMiniBatch < miniBatchSize; iWithinMiniBatch++) { iDataPoint = randomIntSequence[iStartMiniBatch + iWithinMiniBatch]; // FEED INPUT DATA #if OPENCL_ENABLED // Feed by reference network.Layers[0].Input.ActivationsGPU = trainingSet.DataGPU(iDataPoint); // Copy data point in input buffer of the first layer /* * Cl.EnqueueCopyBuffer(CL.Queue, * trainingSet.DataGPU(iDataPoint), // source * network.Layers[0].Input.ActivationsGPU, // destination * (IntPtr)null, * (IntPtr)null, * (IntPtr)inputBufferBytesSize, * 0, * null, * out CL.Event); * CL.CheckErr(CL.Error, "NetworkTrainer.TrainSimpleTest: Cl.EnqueueCopyBuffer inputData"); */ #else network.Layers[0].Input.SetHost(trainingSet.GetDataPoint(iDataPoint)); #endif // FORWARD PASS network.ForwardPass(); // COMPUTE ERROR AND GRADIENT #if OPENCL_ENABLED // Set kernel arguments CL.Error = Cl.SetKernelArg(CL.CrossEntropyGradient, 0, network.Layers[nLayers - 1].Input.DeltaGPU); CL.Error |= Cl.SetKernelArg(CL.CrossEntropyGradient, 1, network.Layers[nLayers - 1].Output.ActivationsGPU); CL.Error |= Cl.SetKernelArg(CL.CrossEntropyGradient, 2, trainingSet.LabelArraysGPU(iDataPoint)); CL.CheckErr(CL.Error, "TrainSimpleTest.CrossEntropyGradient: Cl.SetKernelArg"); // Run kernel CL.Error = Cl.EnqueueNDRangeKernel(CL.Queue, CL.CrossEntropyGradient, 1, null, gradientGlobalWorkSizePtr, gradientLocalWorkSizePtr, 0, null, out CL.Event); CL.CheckErr(CL.Error, "TrainSimpleTest.CrossEntropyGradient: Cl.EnqueueNDRangeKernel"); #else outputScores = network.Layers.Last().Output.GetHost(); labelArray = trainingSet.GetLabelArray(iDataPoint); // Gradient of cross-entropy cost (directly write in INPUT delta) network.Layers.Last().Input.DeltaHost = outputScores.Zip(labelArray, (x, y) => (x - y)).ToArray(); #endif #if DEBUGGING_STEPBYSTEP /* ------------------------- DEBUGGING --------------------------------------------- */ // Display output activation #if OPENCL_ENABLED float[] outputScoresGPU = new float[network.Layers[nLayers - 1].Output.NumberOfUnits]; CL.Error = Cl.EnqueueReadBuffer(CL.Queue, network.Layers[nLayers - 1].Output.ActivationsGPU, // source Bool.True, (IntPtr)0, (IntPtr)(network.Layers[nLayers - 1].Output.NumberOfUnits * sizeof(float)), outputScoresGPU, // destination 0, null, out CL.Event); CL.CheckErr(CL.Error, "NetworkTrainer Cl.clEnqueueReadBuffer outputScoresGPU"); Console.WriteLine("\nOutput scores:"); for (int j = 0; j < outputScoresGPU.Length; j++) { Console.Write("{0} ", outputScoresGPU[j]); } Console.WriteLine(); #else Console.WriteLine("\nOutput scores:"); for (int j = 0; j < outputScores.Length; j++) { Console.Write("{0} ", outputScores[j]); } Console.WriteLine(); #endif /* ------------------------- END --------------------------------------------- */ #endif #if DEBUGGING_STEPBYSTEP /* ------------------------- DEBUGGING --------------------------------------------- */ // Display true data label CPU float[] labelArrayHost = new float[trainingSet.NumberOfClasses]; labelArrayHost = trainingSet.GetLabelArray(iDataPoint); Console.WriteLine("\nData label array on HOST:"); for (int j = 0; j < labelArrayHost.Length; j++) { Console.Write("{0} ", labelArrayHost[j]); } Console.WriteLine(); /* ------------------------- END --------------------------------------------- */ #endif #if DEBUGGING_STEPBYSTEP /* ------------------------- DEBUGGING --------------------------------------------- */ // Display true data label #if OPENCL_ENABLED float[] labelArrayGPU = new float[trainingSet.NumberOfClasses]; CL.Error = Cl.EnqueueReadBuffer(CL.Queue, trainingSet.LabelArraysGPU(iDataPoint), // source Bool.True, (IntPtr)0, (IntPtr)(trainingSet.NumberOfClasses * sizeof(float)), labelArrayGPU, // destination 0, null, out CL.Event); CL.CheckErr(CL.Error, "NetworkTrainer Cl.clEnqueueReadBuffer labelArrayGPU"); Console.WriteLine("\nData label array on DEVICE:"); for (int j = 0; j < labelArrayGPU.Length; j++) { Console.Write("{0} ", labelArrayGPU[j]); } Console.WriteLine(); #endif /* ------------------------- END --------------------------------------------- */ #endif #if DEBUGGING_STEPBYSTEP /* ------------------------- DEBUGGING --------------------------------------------- */ // Display gradient float[] gradient = new float[network.Layers[nLayers - 1].Input.NumberOfUnits]; #if OPENCL_ENABLED CL.Error = Cl.EnqueueReadBuffer(CL.Queue, network.Layers[nLayers - 1].Input.DeltaGPU, // source Bool.True, (IntPtr)0, (IntPtr)(network.Layers[nLayers - 1].Input.NumberOfUnits * sizeof(float)), gradient, // destination 0, null, out CL.Event); CL.CheckErr(CL.Error, "NetworkTrainer Cl.clEnqueueReadBuffer gradient"); #else gradient = network.Layers.Last().Input.DeltaHost; #endif Console.WriteLine("\nGradient written to final layer:"); for (int j = 0; j < gradient.Length; j++) { Console.Write("{0} ", gradient[j]); } Console.WriteLine(); Console.ReadKey(); /*------------------------- END DEBUGGING --------------------------------------------- */ #endif // BACKWARD PASS (includes parameter updating) network.BackwardPass(learningRate, momentumMultiplier); // TEST: try cleaning stuff } // end loop over mini-batches } if (isOutputEpoch) { //costEpoch = QuadraticCost(network, trainingSet); errorEpoch = NetworkEvaluator.ComputeClassificationError(network, trainingSet); Console.WriteLine("Epoch {0}: classification error = {1}", epoch, errorEpoch); if (errorEpoch < errorTolerance) { stopFlag = true; } epochsRemainingToOutput = consoleOutputLag; isOutputEpoch = false; } epochsRemainingToOutput--; isOutputEpoch = epochsRemainingToOutput == 0; // TO-DO: also implement early stopping (stop if validation error starts increasing) epoch++; } while (epoch < maxTrainingEpochs && !stopFlag); }
static void Main(string[] args) { string dirPath = "C:/Users/jacopo/Dropbox/Chalmers/MSc thesis"; /***************************************************** * (0) Setup OpenCL ****************************************************/ Console.WriteLine("\n========================================="); Console.WriteLine(" OpenCL setup"); Console.WriteLine("=========================================\n"); OpenCLSpace.SetupSpace(4); OpenCLSpace.KernelsPath = dirPath + "/ConvDotNet/Kernels"; OpenCLSpace.LoadKernels(); /***************************************************** * (1) Load data ******************************************************/ Console.WriteLine("\n========================================="); Console.WriteLine(" Importing data"); Console.WriteLine("=========================================\n"); // GTSRB greyscale test set 1 DataSet testSetGS1 = new DataSet(43); string GTSRBtestDataGS1 = dirPath + "/GTSRB/Preprocessed/14_test_images.dat"; string GTSRBtestLabelsGS1 = dirPath + "/GTSRB/Preprocessed/test_labels_full.dat"; Console.WriteLine("Importing test set (grayscale 1)..."); testSetGS1.ReadData(GTSRBtestDataGS1, GTSRBtestLabelsGS1); /* * // GTSRB greyscale test set 2 * DataSet testSetGS2 = new DataSet(43); * string GTSRBtestDataGS2 = dirPath + "/GTSRB/Preprocessed/18_test_images.dat"; * string GTSRBtestLabelsGS2 = dirPath + "/GTSRB/Preprocessed/test_labels_full.dat"; * Console.WriteLine("Importing test set (grayscale 2)..."); * testSetGS2.ReadData(GTSRBtestDataGS2); * testSetGS2.ReadLabels(GTSRBtestLabelsGS2); */ // GTSRB RGB test set 1 DataSet testSetRGB1 = new DataSet(43); string GTSRBtestDataRGB1 = dirPath + "/GTSRB/Preprocessed/16_test_images.dat"; string GTSRBtestLabelsRGB1 = dirPath + "/GTSRB/Preprocessed/test_labels_full.dat"; Console.WriteLine("Importing test set (RGB 1)..."); testSetRGB1.ReadData(GTSRBtestDataRGB1, GTSRBtestLabelsRGB1); /* * // GTSRB RGB test set 2 * DataSet testSetRGB2 = new DataSet(43); * string GTSRBtestDataRGB2 = dirPath + "/GTSRB/Preprocessed/20_test_images.dat"; * string GTSRBtestLabelsRGB2 = dirPath + "/GTSRB/Preprocessed/test_labels_full.dat"; * Console.WriteLine("Importing test set (RGB 2)..."); * testSetRGB2.ReadData(GTSRBtestDataRGB2); * testSetRGB2.ReadLabels(GTSRBtestLabelsRGB2); */ /***************************************************** * (2) Evaluate ensemble of networks *****************************************************/ List <NeuralNetwork> networkEnsemble = new List <NeuralNetwork>(); networkEnsemble.Add(Utils.LoadNetworkFromFile(dirPath + "/Results/Networks/", "FIXED_LeNet_GS_DropoutFC")); networkEnsemble.Add(Utils.LoadNetworkFromFile(dirPath + "/Results/Networks/", "FIXED_LeNet_RGB_DropoutFC")); //networkEnsemble.Add(Utils.LoadNetworkFromFile(dirPath + "/Results/Networks/", "LeNet_GSb_DropoutFC")); //networkEnsemble.Add(Utils.LoadNetworkFromFile(dirPath + "/Results/Networks/", "LeNet_RGBb_Dropout")); networkEnsemble.Add(Utils.LoadNetworkFromFile(dirPath + "/Results/Networks/", "FIXED_VGGv2_GS_DropoutFC")); networkEnsemble.Add(Utils.LoadNetworkFromFile(dirPath + "/Results/Networks/", "FIXED_VGGv2_RGB_DropoutFC")); //networkEnsemble.Add(Utils.LoadNetworkFromFile(dirPath + "/Results/Networks/", "VGG_GSb_DropoutFC")); //networkEnsemble.Add(Utils.LoadNetworkFromFile(dirPath + "/Results/Networks/", "VGG_RGBb_Dropout")); double error = 0.0; Console.WriteLine("\nEvaluating an ensemble of {0} networks...", networkEnsemble.Count); NetworkEvaluator.EvaluateEnsemble(networkEnsemble, testSetGS1, testSetRGB1, 64, out error); Console.WriteLine("\n\tTest set error = {0}\n\tAccuracy = {1}", error, 100 * (1 - error)); }
public static void Train(NeuralNetwork network, DataSet trainingSet, DataSet validationSet) { // Initialize parameters or load them if (trainingMode == "new" || trainingMode == "New") { // Setup miniBatchSize network.Set("MiniBatchSize", miniBatchSize); network.InitializeParameters("random"); } else if (trainingMode == "resume" || trainingMode == "Resume") { network.InitializeParameters("load"); } else { throw new InvalidOperationException("Please set TrainingMode to either ''New'' or ''Resume''."); } // Set dropout network.Set("DropoutFC", dropoutFC); network.Set("DropoutConv", dropoutConv); network.Set("DropoutInput", dropoutInput); Sequence indicesSequence = new Sequence(trainingSet.DataContainer.Count); int[] miniBatch = new int[miniBatchSize]; // Timers Stopwatch stopwatch = Stopwatch.StartNew(); Stopwatch stopwatchFwd = Stopwatch.StartNew(); Stopwatch stopwatchGrad = Stopwatch.StartNew(); Stopwatch stopwatchBwd = Stopwatch.StartNew(); int epoch = 0; int nBadEpochs = 0; int consecutiveAnnealingCounter = 0; bool stopFlag = false; int epochsRemainingToOutput = (evaluateBeforeTraining == true) ? 0 : consoleOutputLag; while (!stopFlag) // begin loop over training epochs { if (epochsRemainingToOutput == 0) { /************** * Evaluation * **************/ // Pre inference (for batch-norm) //network.Set("PreInference", true); //Console.WriteLine("Re-computing batch-norm means and variances..."); //NetworkEvaluator.PreEvaluateNetwork(network, trainingSet); // Evaluate on training set... network.Set("Inference", true); Console.WriteLine("Evaluating on TRAINING set..."); stopwatch.Restart(); NetworkEvaluator.EvaluateNetwork(network, trainingSet, out lossTraining, out errorTraining); Console.WriteLine("\tLoss = {0}\n\tError = {1}\n\tEval runtime = {2}ms\n", lossTraining, errorTraining, stopwatch.ElapsedMilliseconds); // ...and save loss and error to file using (System.IO.StreamWriter trainingEpochOutputFile = new System.IO.StreamWriter(trainingEpochSavePath, true)) { trainingEpochOutputFile.WriteLine(lossTraining.ToString() + "\t" + errorTraining.ToString()); } // Evaluate on validation set... if (validationSet != null) { Console.WriteLine("Evaluating on VALIDATION set..."); stopwatch.Restart(); NetworkEvaluator.EvaluateNetwork(network, validationSet, out newLossValidation, out newErrorValidation); Console.WriteLine("\tLoss = {0}\n\tError = {1}\n\tEval runtime = {2}ms\n", newLossValidation, newErrorValidation, stopwatch.ElapsedMilliseconds); // ...save loss and error to file using (System.IO.StreamWriter validationEpochOutputFile = new System.IO.StreamWriter(validationEpochSavePath, true)) { validationEpochOutputFile.WriteLine(newLossValidation.ToString() + "\t" + newErrorValidation.ToString()); } if (newLossValidation < minLossValidation) { // nice, validation loss is decreasing! minLossValidation = newLossValidation; errorValidation = newErrorValidation; // Save network to file Utils.SaveNetworkToFile(network, networkOutputFilePath); // and keep training nBadEpochs = 0; consecutiveAnnealingCounter = 0; } else { nBadEpochs++; Console.WriteLine("Loss on the validation set has been increasing for {0} epoch(s)...", nBadEpochs); if (patience - nBadEpochs > 0) { Console.WriteLine("...I'll be patient for {0} more epoch(s)!", patience - nBadEpochs); // keep training } else { //Console.WriteLine("...and I've run out of patience! Training ends here."); //stopFlag = true; //break; // Decrease learning rate Console.WriteLine("...and I've run out of patience!"); if (consecutiveAnnealingCounter > maxConsecutiveAnnealings) { Console.WriteLine("\nReached the numner of maximum consecutive annealings without progress. \nTraining ends here."); break; } Console.WriteLine("\nI'm annealing the learning rate:\n\tWas {0}\n\tSetting it to {1}.", learningRate, learningRate / learningRateDecayFactor); learningRate /= learningRateDecayFactor; consecutiveAnnealingCounter++; Console.WriteLine("\nAnd I'm loading the network saved {0} epochs ago and resume the training from there.", patience); string networkName = network.Name; network = null; // this is BAD PRACTICE GC.Collect(); // this is BAD PRACTICE network = Utils.LoadNetworkFromFile("../../../../Results/Networks/", networkName); network.Set("MiniBatchSize", miniBatchSize); network.InitializeParameters("load"); nBadEpochs = 0; } } } // Restore dropout network.Set("DropoutFC", dropoutFC); network.Set("DropoutConv", dropoutConv); network.Set("DropoutInput", dropoutInput); epochsRemainingToOutput = consoleOutputLag; } epochsRemainingToOutput--; epoch++; if (epoch > maxTrainingEpochs) { break; } /************ * Training * ************/ network.Set("Training", true); network.Set("EpochBeginning", true); Console.WriteLine("\nEpoch {0}...", epoch); stopwatch.Restart(); stopwatchFwd.Reset(); stopwatchGrad.Reset(); stopwatchBwd.Reset(); indicesSequence.Shuffle(); // shuffle examples order at every epoch int iMiniBatch = 0; // Run over mini-batches for (int iStartMiniBatch = 0; iStartMiniBatch < trainingSet.DataContainer.Count; iStartMiniBatch += miniBatchSize) { // Feed a mini-batch to the network miniBatch = indicesSequence.GetMiniBatchIndices(iStartMiniBatch, miniBatchSize); network.InputLayer.FeedData(trainingSet, miniBatch); // Forward pass stopwatchFwd.Start(); network.ForwardPass("beginning", "end"); stopwatchFwd.Stop(); // Compute gradient and backpropagate stopwatchGrad.Start(); network.CrossEntropyGradient(trainingSet, miniBatch); stopwatchGrad.Stop(); // Backpropagate gradient and update parameters stopwatchBwd.Start(); network.BackwardPass(learningRate, momentumCoefficient, weightDecayCoeff, weightMaxNorm); stopwatchBwd.Stop(); iMiniBatch++; CheckForKeyPress(ref network, ref stopFlag); if (stopFlag) { break; } } // end of training epoch Console.Write(" Training runtime = {0}ms\n", stopwatch.ElapsedMilliseconds); Console.WriteLine("Forward: {0}ms - Gradient: {1}ms - Backward: {2}ms\n", stopwatchFwd.ElapsedMilliseconds, stopwatchGrad.ElapsedMilliseconds, stopwatchBwd.ElapsedMilliseconds); #if TIMING_LAYERS Console.WriteLine("\n Detailed runtimes::"); Console.WriteLine("\nCONV: \n\tForward: {0}ms \n\tBackprop: {1}ms \n\tUpdateSpeeds: {2}ms \n\tUpdateParameters: {3}ms \n\tPadUnpad: {4}ms", Utils.ConvForwardTimer.ElapsedMilliseconds, Utils.ConvBackpropTimer.ElapsedMilliseconds, Utils.ConvUpdateSpeedsTimer.ElapsedMilliseconds, Utils.ConvUpdateParametersTimer.ElapsedMilliseconds, Utils.ConvPadUnpadTimer.ElapsedMilliseconds); Console.WriteLine("\nPOOLING: \n\tForward: {0}ms \n\tBackprop: {1}ms", Utils.PoolingForwardTimer.ElapsedMilliseconds, Utils.PoolingBackpropTimer.ElapsedMilliseconds); Console.WriteLine("\nNONLINEARITIES: \n\tForward: {0}ms \n\tBackprop: {1}ms", Utils.NonlinearityForwardTimer.ElapsedMilliseconds, Utils.NonlinearityBackpropTimer.ElapsedMilliseconds); Console.WriteLine("\nFULLY CONNECTED: \n\tForward: {0}ms \n\tBackprop: {1}ms \n\tUpdateSpeeds: {2}ms \n\tUpdateParameters: {3}ms", Utils.FCForwardTimer.ElapsedMilliseconds, Utils.FCBackpropTimer.ElapsedMilliseconds, Utils.FCUpdateSpeedsTimer.ElapsedMilliseconds, Utils.FCUpdateParametersTimer.ElapsedMilliseconds); Console.WriteLine("\nBATCHNORM FC \n\tForward: {0}ms \n\tBackprop: {1}ms \n\tUpdateSpeeds: {2}ms \n\tUpdateParameters: {3}ms", Utils.BNFCForwardTimer.ElapsedMilliseconds, Utils.BNFCBackpropTimer.ElapsedMilliseconds, Utils.BNFCUpdateSpeedsTimer.ElapsedMilliseconds, Utils.BNFCUpdateParametersTimer.ElapsedMilliseconds); Console.WriteLine("\nBATCHNORM CONV \n\tForward: {0}ms \n\tBackprop: {1}ms \n\tUpdateSpeeds: {2}ms \n\tUpdateParameters: {3}ms", Utils.BNConvForwardTimer.ElapsedMilliseconds, Utils.BNConvBackpropTimer.ElapsedMilliseconds, Utils.BNConvUpdateSpeedsTimer.ElapsedMilliseconds, Utils.BNConvUpdateParametersTimer.ElapsedMilliseconds); Console.WriteLine("\nSOFTMAX \n\tForward: {0}ms", Utils.SoftmaxTimer.ElapsedMilliseconds); Utils.ResetTimers(); #endif } stopwatch.Stop(); }