Ejemplo n.º 1
0
        /// <summary>
        /// Run this method before evaluation, passing the TRAINING set as second argument.
        /// This will compute cumulative averages needed for inference in BatchNormConv layers, if any.
        /// </summary>
        /// <param name="network"></param>
        /// <param name="dataSet"></param>
        public static void PreEvaluateNetwork(NeuralNetwork network, DataSet dataSet)
        {
            // Set network for pre-inference (needed for BatchNorm layers)
            network.Set("PreInference", true);

            // Turn off dropout
            network.Set("DropoutFC", 1.0);
            network.Set("DropoutConv", 1.0);
            network.Set("DropoutInput", 1.0);

            int miniBatchSize = network.Layers[0].OutputNeurons.MiniBatchSize;

            Sequence indicesSequence = new Sequence(dataSet.DataContainer.Count);

            // Run over mini-batches (in order, no shuffling)
            for (int iStartMiniBatch = 0; iStartMiniBatch < dataSet.DataContainer.Count; iStartMiniBatch += miniBatchSize)
            {
                // Feed a mini-batch to the network
                int[] miniBatch = indicesSequence.GetMiniBatchIndices(iStartMiniBatch, miniBatchSize);
                network.InputLayer.FeedData(dataSet, miniBatch);

                // Run network forward
                network.ForwardPass("beginning", "end");

                // Do not compute loss or error
            }
        }
Ejemplo n.º 2
0
        public static void SaveMisclassifiedExamples(NeuralNetwork network, DataSet dataSet, string outputFilePath)
        {
            List <int> misclassifiedExamplesList = new List <int>();
            List <int> wrongLabels = new List <int>();

            // Set network for inference (needed for BatchNorm layers)
            network.Set("Inference", true);

            // Turn off dropout
            network.Set("DropoutFC", 1.0);
            network.Set("DropoutConv", 1.0);
            network.Set("DropoutInput", 1.0);

            int miniBatchSize = network.Layers[0].OutputNeurons.MiniBatchSize;

            Sequence indicesSequence = new Sequence(dataSet.DataContainer.Count);

            // Run over mini-batches (in order, no shuffling here)
            for (int iStartMiniBatch = 0; iStartMiniBatch < dataSet.DataContainer.Count; iStartMiniBatch += miniBatchSize)
            {
                // Feed a mini-batch to the network
                int[] miniBatch = indicesSequence.GetMiniBatchIndices(iStartMiniBatch, miniBatchSize);
                network.InputLayer.FeedData(dataSet, miniBatch);

                // Run network forward
                network.ForwardPass("beginning", "end");

                for (int m = 0; m < Math.Min(miniBatchSize, dataSet.DataContainer.Count - iStartMiniBatch); m++) // In case dataSet.Size doesn't divide miniBatchSize, the last miniBatch contains copies! Don't want to re-evaluate them
                {
                    double[] outputScores = network.OutputLayer.OutputClassScores[m];

                    int assignedLabel = Utils.IndexOfMax(outputScores);
                    int trueLabel     = dataSet.DataContainer[miniBatch[m]].Label;

                    if (assignedLabel != trueLabel)
                    {
                        misclassifiedExamplesList.Add(miniBatch[m]);
                        wrongLabels.Add(assignedLabel);
                    }
                } // end loop within a mini-batch
            }     // end loop over mini-batches

            // Save the list to file
            using (System.IO.StreamWriter outputFile = new System.IO.StreamWriter(outputFilePath))
            {
                for (int i = 0; i < misclassifiedExamplesList.Count; ++i)
                {
                    outputFile.WriteLine(misclassifiedExamplesList[i].ToString() + "\t" + wrongLabels[i].ToString());
                }
                Console.WriteLine("Misclassified examples saved in file " + outputFilePath);
            }
        }
Ejemplo n.º 3
0
        public static void EvaluateNetwork(NeuralNetwork network, DataSet dataSet, out double loss, out double error)
        {
            // Set network for inference (needed for BatchNorm layers)
            network.Set("Inference", true);

            loss  = 0.0;
            error = 0.0;

            // Turn off dropout
            network.Set("DropoutFC", 1.0);
            network.Set("DropoutConv", 1.0);
            network.Set("DropoutInput", 1.0);

            int miniBatchSize = network.Layers[0].OutputNeurons.MiniBatchSize;

            Sequence indicesSequence = new Sequence(dataSet.DataContainer.Count);

            // Run over mini-batches (in order, no shuffling here)
            for (int iStartMiniBatch = 0; iStartMiniBatch < dataSet.DataContainer.Count; iStartMiniBatch += miniBatchSize)
            {
                // Feed a mini-batch to the network
                int[] miniBatch = indicesSequence.GetMiniBatchIndices(iStartMiniBatch, miniBatchSize);
                network.InputLayer.FeedData(dataSet, miniBatch);

                // Run network forward
                network.ForwardPass("beginning", "end");


                for (int m = 0; m < Math.Min(miniBatchSize, dataSet.DataContainer.Count - iStartMiniBatch); m++) // In case dataSet.Size doesn't divide miniBatchSize, the last miniBatch contains copies! Don't want to re-evaluate them
                {
                    double[] outputScores = network.OutputLayer.OutputClassScores[m];

                    int assignedLabel = Utils.IndexOfMax(outputScores);
                    int trueLabel     = dataSet.DataContainer[miniBatch[m]].Label;

                    // Cumulate loss and error
                    loss  -= Math.Log(outputScores[trueLabel]);
                    error += (assignedLabel == trueLabel) ? 0 : 1;
                } // end loop within a mini-batch
            }     // end loop over mini-batches

            error /= dataSet.DataContainer.Count;
            loss  /= dataSet.DataContainer.Count;
        }
Ejemplo n.º 4
0
        /*
         *
         * [Obsolete("Deprecated. Use cross-entropy cost instead.")]
         * static double QuadraticCost(float[] targetValues, float[] networkOutputs, out float[] gradient)
         * {
         *  if (targetValues.Length != networkOutputs.Length)
         *      throw new System.InvalidOperationException("Mismatch between length of output array and target (label) array.");
         *
         *  gradient = targetValues.Zip(networkOutputs, (x, y) => y - x).ToArray();
         *  var squaredErrors = gradient.Select(x => Math.Pow(x, 2));
         *
         *  return squaredErrors.Sum() / squaredErrors.Count();
         * }
         *
         *
         * [Obsolete("Deprecated. Use cross-entropy cost instead.")]
         * static double QuadraticCost(NeuralNetwork network, DataSet dataSet)
         * {
         *  float[] dummy;
         *  double totalCost = 0;
         *
         *  for (int i = 0; i < dataSet.Size; i++)
         *  {
         *      network.Layers[0].Input.SetHost(dataSet.GetDataPoint(i));
         *
         *      // Run forward
         *      for (int l = 0; l < network.Layers.Count; l++)
         *      {
         *          network.Layers[l].FeedForward();
         *      }
         *
         *      // Compute cost
         *      totalCost += QuadraticCost(new float[] { (float)dataSet.GetLabel(i) }, network.Layers.Last().Output.GetHost(), out dummy);
         *  }
         *
         *  return totalCost / (2 * dataSet.Size);
         * }
         * */

        #endregion


        #region Junk
#if TOY
        /// <summary>
        /// Training on toy 2D data set (to test network structure, fully connected, tanh, softmax and respective backprops)
        /// </summary>
        /// <param name="network"></param>
        /// <param name="trainingSet"></param>
        /// <param name="finalError"></param>
        /// <param name="finalEpoch"></param>
        /// <returns></returns>
        public static void TrainSimpleTest(NeuralNetwork network, DataSet trainingSet)
        {
            // Initializations
            int nLayers = network.NumberOfLayers;

            int[] randomIntSequence = new int[trainingSet.Size];
            int   iDataPoint;
            bool  stopFlag = false;

            double       errorEpoch;
            bool         isOutputEpoch           = true;
            int          epochsRemainingToOutput = 0;
            List <int[]> miniBatchList           = new List <int[]>();
            int          nMiniBatches            = trainingSet.Size / miniBatchSize;

            float[] outputScores = new float[trainingSet.NumberOfClasses];
            float[] labelArray   = new float[trainingSet.NumberOfClasses];

#if OPENCL_ENABLED
            int inputBufferBytesSize = sizeof(float) * trainingSet.GetDataPoint(0).Length;

            // Global and local work group size for gradient kernel
            IntPtr[] gradientGlobalWorkSizePtr = new IntPtr[] { (IntPtr)(miniBatchSize * trainingSet.NumberOfClasses) };
            IntPtr[] gradientLocalWorkSizePtr  = new IntPtr[] { (IntPtr)(trainingSet.NumberOfClasses) };
#endif

            int epoch = 0;
            do                                                                         // loop over training epochs
            {
                randomIntSequence = Utils.GenerateRandomPermutation(trainingSet.Size); // new every epoch

                // Run over mini-batches
                for (int iStartMiniBatch = 0; iStartMiniBatch < trainingSet.Size; iStartMiniBatch += miniBatchSize)
                {
                    // Run over a mini-batch
                    for (int iWithinMiniBatch = 0; iWithinMiniBatch < miniBatchSize; iWithinMiniBatch++)
                    {
                        iDataPoint = randomIntSequence[iStartMiniBatch + iWithinMiniBatch];

                        // FEED INPUT DATA
#if OPENCL_ENABLED
                        // Feed by reference
                        network.Layers[0].Input.ActivationsGPU = trainingSet.DataGPU(iDataPoint);

                        // Copy data point in input buffer of the first layer

                        /*
                         * Cl.EnqueueCopyBuffer(CL.Queue,
                         *                      trainingSet.DataGPU(iDataPoint),        // source
                         *                      network.Layers[0].Input.ActivationsGPU, // destination
                         *                      (IntPtr)null,
                         *                      (IntPtr)null,
                         *                      (IntPtr)inputBufferBytesSize,
                         *                      0,
                         *                      null,
                         *                      out CL.Event);
                         * CL.CheckErr(CL.Error, "NetworkTrainer.TrainSimpleTest: Cl.EnqueueCopyBuffer inputData");
                         */
#else
                        network.Layers[0].Input.SetHost(trainingSet.GetDataPoint(iDataPoint));
#endif



                        // FORWARD PASS
                        network.ForwardPass();

                        // COMPUTE ERROR AND GRADIENT
#if OPENCL_ENABLED
                        // Set kernel arguments
                        CL.Error  = Cl.SetKernelArg(CL.CrossEntropyGradient, 0, network.Layers[nLayers - 1].Input.DeltaGPU);
                        CL.Error |= Cl.SetKernelArg(CL.CrossEntropyGradient, 1, network.Layers[nLayers - 1].Output.ActivationsGPU);
                        CL.Error |= Cl.SetKernelArg(CL.CrossEntropyGradient, 2, trainingSet.LabelArraysGPU(iDataPoint));
                        CL.CheckErr(CL.Error, "TrainSimpleTest.CrossEntropyGradient: Cl.SetKernelArg");

                        // Run kernel
                        CL.Error = Cl.EnqueueNDRangeKernel(CL.Queue,
                                                           CL.CrossEntropyGradient,
                                                           1,
                                                           null,
                                                           gradientGlobalWorkSizePtr,
                                                           gradientLocalWorkSizePtr,
                                                           0,
                                                           null,
                                                           out CL.Event);
                        CL.CheckErr(CL.Error, "TrainSimpleTest.CrossEntropyGradient: Cl.EnqueueNDRangeKernel");
#else
                        outputScores = network.Layers.Last().Output.GetHost();
                        labelArray   = trainingSet.GetLabelArray(iDataPoint);

                        // Gradient of cross-entropy cost (directly write in INPUT delta)
                        network.Layers.Last().Input.DeltaHost = outputScores.Zip(labelArray, (x, y) => (x - y)).ToArray();
#endif

#if DEBUGGING_STEPBYSTEP
                        /* ------------------------- DEBUGGING --------------------------------------------- */
                        // Display output activation
#if OPENCL_ENABLED
                        float[] outputScoresGPU = new float[network.Layers[nLayers - 1].Output.NumberOfUnits];
                        CL.Error = Cl.EnqueueReadBuffer(CL.Queue,
                                                        network.Layers[nLayers - 1].Output.ActivationsGPU, // source
                                                        Bool.True,
                                                        (IntPtr)0,
                                                        (IntPtr)(network.Layers[nLayers - 1].Output.NumberOfUnits * sizeof(float)),
                                                        outputScoresGPU,  // destination
                                                        0,
                                                        null,
                                                        out CL.Event);
                        CL.CheckErr(CL.Error, "NetworkTrainer Cl.clEnqueueReadBuffer outputScoresGPU");

                        Console.WriteLine("\nOutput scores:");
                        for (int j = 0; j < outputScoresGPU.Length; j++)
                        {
                            Console.Write("{0}  ", outputScoresGPU[j]);
                        }
                        Console.WriteLine();
#else
                        Console.WriteLine("\nOutput scores:");
                        for (int j = 0; j < outputScores.Length; j++)
                        {
                            Console.Write("{0}  ", outputScores[j]);
                        }
                        Console.WriteLine();
#endif
                        /* ------------------------- END --------------------------------------------- */
#endif


#if DEBUGGING_STEPBYSTEP
                        /* ------------------------- DEBUGGING --------------------------------------------- */

                        // Display true data label CPU

                        float[] labelArrayHost = new float[trainingSet.NumberOfClasses];
                        labelArrayHost = trainingSet.GetLabelArray(iDataPoint);

                        Console.WriteLine("\nData label array on HOST:");
                        for (int j = 0; j < labelArrayHost.Length; j++)
                        {
                            Console.Write("{0}  ", labelArrayHost[j]);
                        }
                        Console.WriteLine();
                        /* ------------------------- END --------------------------------------------- */
#endif

#if DEBUGGING_STEPBYSTEP
                        /* ------------------------- DEBUGGING --------------------------------------------- */
                        // Display true data label
#if OPENCL_ENABLED
                        float[] labelArrayGPU = new float[trainingSet.NumberOfClasses];
                        CL.Error = Cl.EnqueueReadBuffer(CL.Queue,
                                                        trainingSet.LabelArraysGPU(iDataPoint), // source
                                                        Bool.True,
                                                        (IntPtr)0,
                                                        (IntPtr)(trainingSet.NumberOfClasses * sizeof(float)),
                                                        labelArrayGPU,  // destination
                                                        0,
                                                        null,
                                                        out CL.Event);
                        CL.CheckErr(CL.Error, "NetworkTrainer Cl.clEnqueueReadBuffer labelArrayGPU");

                        Console.WriteLine("\nData label array on DEVICE:");
                        for (int j = 0; j < labelArrayGPU.Length; j++)
                        {
                            Console.Write("{0}  ", labelArrayGPU[j]);
                        }
                        Console.WriteLine();
#endif
                        /* ------------------------- END --------------------------------------------- */
#endif

#if DEBUGGING_STEPBYSTEP
                        /* ------------------------- DEBUGGING --------------------------------------------- */
                        // Display gradient

                        float[] gradient = new float[network.Layers[nLayers - 1].Input.NumberOfUnits];
#if OPENCL_ENABLED
                        CL.Error = Cl.EnqueueReadBuffer(CL.Queue,
                                                        network.Layers[nLayers - 1].Input.DeltaGPU, // source
                                                        Bool.True,
                                                        (IntPtr)0,
                                                        (IntPtr)(network.Layers[nLayers - 1].Input.NumberOfUnits * sizeof(float)),
                                                        gradient,  // destination
                                                        0,
                                                        null,
                                                        out CL.Event);
                        CL.CheckErr(CL.Error, "NetworkTrainer Cl.clEnqueueReadBuffer gradient");
#else
                        gradient = network.Layers.Last().Input.DeltaHost;
#endif
                        Console.WriteLine("\nGradient written to final layer:");
                        for (int j = 0; j < gradient.Length; j++)
                        {
                            Console.Write("{0}  ", gradient[j]);
                        }
                        Console.WriteLine();
                        Console.ReadKey();


                        /*------------------------- END DEBUGGING --------------------------------------------- */
#endif


                        // BACKWARD PASS (includes parameter updating)

                        network.BackwardPass(learningRate, momentumMultiplier);

                        // TEST: try cleaning stuff
                    } // end loop over mini-batches
                }


                if (isOutputEpoch)
                {
                    //costEpoch = QuadraticCost(network, trainingSet);
                    errorEpoch = NetworkEvaluator.ComputeClassificationError(network, trainingSet);
                    Console.WriteLine("Epoch {0}: classification error = {1}", epoch, errorEpoch);

                    if (errorEpoch < errorTolerance)
                    {
                        stopFlag = true;
                    }

                    epochsRemainingToOutput = consoleOutputLag;
                    isOutputEpoch           = false;
                }
                epochsRemainingToOutput--;
                isOutputEpoch = epochsRemainingToOutput == 0;


                // TO-DO: also implement early stopping (stop if validation error starts increasing)
                epoch++;
            } while (epoch < maxTrainingEpochs && !stopFlag);
        }
Ejemplo n.º 5
0
        public static void Train(NeuralNetwork network, DataSet trainingSet, DataSet validationSet)
        {
            // Initialize parameters or load them
            if (trainingMode == "new" || trainingMode == "New")
            {
                // Setup miniBatchSize
                network.Set("MiniBatchSize", miniBatchSize);
                network.InitializeParameters("random");
            }
            else if (trainingMode == "resume" || trainingMode == "Resume")
            {
                network.InitializeParameters("load");
            }
            else
            {
                throw new InvalidOperationException("Please set TrainingMode to either ''New'' or ''Resume''.");
            }

            // Set dropout
            network.Set("DropoutFC", dropoutFC);
            network.Set("DropoutConv", dropoutConv);
            network.Set("DropoutInput", dropoutInput);

            Sequence indicesSequence = new Sequence(trainingSet.DataContainer.Count);

            int[] miniBatch = new int[miniBatchSize];

            // Timers
            Stopwatch stopwatch     = Stopwatch.StartNew();
            Stopwatch stopwatchFwd  = Stopwatch.StartNew();
            Stopwatch stopwatchGrad = Stopwatch.StartNew();
            Stopwatch stopwatchBwd  = Stopwatch.StartNew();

            int  epoch      = 0;
            int  nBadEpochs = 0;
            int  consecutiveAnnealingCounter = 0;
            bool stopFlag = false;
            int  epochsRemainingToOutput = (evaluateBeforeTraining == true) ? 0 : consoleOutputLag;


            while (!stopFlag) // begin loop over training epochs
            {
                if (epochsRemainingToOutput == 0)
                {
                    /**************
                    * Evaluation *
                    **************/

                    // Pre inference (for batch-norm)
                    //network.Set("PreInference", true);
                    //Console.WriteLine("Re-computing batch-norm means and variances...");
                    //NetworkEvaluator.PreEvaluateNetwork(network, trainingSet);

                    // Evaluate on training set...
                    network.Set("Inference", true);
                    Console.WriteLine("Evaluating on TRAINING set...");
                    stopwatch.Restart();
                    NetworkEvaluator.EvaluateNetwork(network, trainingSet, out lossTraining, out errorTraining);
                    Console.WriteLine("\tLoss = {0}\n\tError = {1}\n\tEval runtime = {2}ms\n",
                                      lossTraining, errorTraining, stopwatch.ElapsedMilliseconds);
                    // ...and save loss and error to file
                    using (System.IO.StreamWriter trainingEpochOutputFile = new System.IO.StreamWriter(trainingEpochSavePath, true))
                    {
                        trainingEpochOutputFile.WriteLine(lossTraining.ToString() + "\t" + errorTraining.ToString());
                    }

                    // Evaluate on validation set...
                    if (validationSet != null)
                    {
                        Console.WriteLine("Evaluating on VALIDATION set...");
                        stopwatch.Restart();
                        NetworkEvaluator.EvaluateNetwork(network, validationSet, out newLossValidation, out newErrorValidation);
                        Console.WriteLine("\tLoss = {0}\n\tError = {1}\n\tEval runtime = {2}ms\n",
                                          newLossValidation, newErrorValidation, stopwatch.ElapsedMilliseconds);
                        // ...save loss and error to file
                        using (System.IO.StreamWriter validationEpochOutputFile = new System.IO.StreamWriter(validationEpochSavePath, true))
                        {
                            validationEpochOutputFile.WriteLine(newLossValidation.ToString() + "\t" + newErrorValidation.ToString());
                        }

                        if (newLossValidation < minLossValidation)
                        {
                            // nice, validation loss is decreasing!
                            minLossValidation = newLossValidation;
                            errorValidation   = newErrorValidation;

                            // Save network to file
                            Utils.SaveNetworkToFile(network, networkOutputFilePath);

                            // and keep training
                            nBadEpochs = 0;
                            consecutiveAnnealingCounter = 0;
                        }
                        else
                        {
                            nBadEpochs++;
                            Console.WriteLine("Loss on the validation set has been increasing for {0} epoch(s)...", nBadEpochs);
                            if (patience - nBadEpochs > 0)
                            {
                                Console.WriteLine("...I'll be patient for {0} more epoch(s)!", patience - nBadEpochs); // keep training
                            }
                            else
                            {
                                //Console.WriteLine("...and I've run out of patience! Training ends here.");
                                //stopFlag = true;
                                //break;

                                // Decrease learning rate
                                Console.WriteLine("...and I've run out of patience!");

                                if (consecutiveAnnealingCounter > maxConsecutiveAnnealings)
                                {
                                    Console.WriteLine("\nReached the numner of maximum consecutive annealings without progress. \nTraining ends here.");
                                    break;
                                }

                                Console.WriteLine("\nI'm annealing the learning rate:\n\tWas {0}\n\tSetting it to {1}.", learningRate, learningRate / learningRateDecayFactor);
                                learningRate /= learningRateDecayFactor;
                                consecutiveAnnealingCounter++;

                                Console.WriteLine("\nAnd I'm loading the network saved {0} epochs ago and resume the training from there.", patience);

                                string networkName = network.Name;
                                network = null; // this is BAD PRACTICE
                                GC.Collect();   // this is BAD PRACTICE
                                network = Utils.LoadNetworkFromFile("../../../../Results/Networks/", networkName);
                                network.Set("MiniBatchSize", miniBatchSize);
                                network.InitializeParameters("load");


                                nBadEpochs = 0;
                            }
                        }
                    }

                    // Restore dropout
                    network.Set("DropoutFC", dropoutFC);
                    network.Set("DropoutConv", dropoutConv);
                    network.Set("DropoutInput", dropoutInput);

                    epochsRemainingToOutput = consoleOutputLag;
                }
                epochsRemainingToOutput--;

                epoch++;

                if (epoch > maxTrainingEpochs)
                {
                    break;
                }

                /************
                * Training *
                ************/

                network.Set("Training", true);
                network.Set("EpochBeginning", true);

                Console.WriteLine("\nEpoch {0}...", epoch);


                stopwatch.Restart();
                stopwatchFwd.Reset();
                stopwatchGrad.Reset();
                stopwatchBwd.Reset();

                indicesSequence.Shuffle(); // shuffle examples order at every epoch

                int iMiniBatch = 0;
                // Run over mini-batches
                for (int iStartMiniBatch = 0; iStartMiniBatch < trainingSet.DataContainer.Count; iStartMiniBatch += miniBatchSize)
                {
                    // Feed a mini-batch to the network
                    miniBatch = indicesSequence.GetMiniBatchIndices(iStartMiniBatch, miniBatchSize);
                    network.InputLayer.FeedData(trainingSet, miniBatch);

                    // Forward pass
                    stopwatchFwd.Start();
                    network.ForwardPass("beginning", "end");
                    stopwatchFwd.Stop();

                    // Compute gradient and backpropagate
                    stopwatchGrad.Start();
                    network.CrossEntropyGradient(trainingSet, miniBatch);
                    stopwatchGrad.Stop();

                    // Backpropagate gradient and update parameters
                    stopwatchBwd.Start();
                    network.BackwardPass(learningRate, momentumCoefficient, weightDecayCoeff, weightMaxNorm);
                    stopwatchBwd.Stop();

                    iMiniBatch++;

                    CheckForKeyPress(ref network, ref stopFlag);
                    if (stopFlag)
                    {
                        break;
                    }
                } // end of training epoch

                Console.Write(" Training runtime = {0}ms\n", stopwatch.ElapsedMilliseconds);

                Console.WriteLine("Forward: {0}ms - Gradient: {1}ms - Backward: {2}ms\n",
                                  stopwatchFwd.ElapsedMilliseconds, stopwatchGrad.ElapsedMilliseconds, stopwatchBwd.ElapsedMilliseconds);

#if TIMING_LAYERS
                Console.WriteLine("\n Detailed runtimes::");

                Console.WriteLine("\nCONV: \n\tForward: {0}ms \n\tBackprop: {1}ms \n\tUpdateSpeeds: {2}ms \n\tUpdateParameters: {3}ms \n\tPadUnpad: {4}ms",
                                  Utils.ConvForwardTimer.ElapsedMilliseconds, Utils.ConvBackpropTimer.ElapsedMilliseconds,
                                  Utils.ConvUpdateSpeedsTimer.ElapsedMilliseconds, Utils.ConvUpdateParametersTimer.ElapsedMilliseconds, Utils.ConvPadUnpadTimer.ElapsedMilliseconds);

                Console.WriteLine("\nPOOLING: \n\tForward: {0}ms \n\tBackprop: {1}ms",
                                  Utils.PoolingForwardTimer.ElapsedMilliseconds, Utils.PoolingBackpropTimer.ElapsedMilliseconds);

                Console.WriteLine("\nNONLINEARITIES: \n\tForward: {0}ms \n\tBackprop: {1}ms",
                                  Utils.NonlinearityForwardTimer.ElapsedMilliseconds, Utils.NonlinearityBackpropTimer.ElapsedMilliseconds);

                Console.WriteLine("\nFULLY CONNECTED: \n\tForward: {0}ms \n\tBackprop: {1}ms \n\tUpdateSpeeds: {2}ms \n\tUpdateParameters: {3}ms",
                                  Utils.FCForwardTimer.ElapsedMilliseconds, Utils.FCBackpropTimer.ElapsedMilliseconds,
                                  Utils.FCUpdateSpeedsTimer.ElapsedMilliseconds, Utils.FCUpdateParametersTimer.ElapsedMilliseconds);

                Console.WriteLine("\nBATCHNORM FC \n\tForward: {0}ms \n\tBackprop: {1}ms \n\tUpdateSpeeds: {2}ms \n\tUpdateParameters: {3}ms",
                                  Utils.BNFCForwardTimer.ElapsedMilliseconds, Utils.BNFCBackpropTimer.ElapsedMilliseconds,
                                  Utils.BNFCUpdateSpeedsTimer.ElapsedMilliseconds, Utils.BNFCUpdateParametersTimer.ElapsedMilliseconds);

                Console.WriteLine("\nBATCHNORM CONV \n\tForward: {0}ms \n\tBackprop: {1}ms \n\tUpdateSpeeds: {2}ms \n\tUpdateParameters: {3}ms",
                                  Utils.BNConvForwardTimer.ElapsedMilliseconds, Utils.BNConvBackpropTimer.ElapsedMilliseconds,
                                  Utils.BNConvUpdateSpeedsTimer.ElapsedMilliseconds, Utils.BNConvUpdateParametersTimer.ElapsedMilliseconds);

                Console.WriteLine("\nSOFTMAX \n\tForward: {0}ms", Utils.SoftmaxTimer.ElapsedMilliseconds);

                Utils.ResetTimers();
#endif
            }

            stopwatch.Stop();
        }
Ejemplo n.º 6
0
        public static void Check(NeuralNetwork network, DataSet dataSet)
        {
            // Setup network

            network.Set("MiniBatchSize", miniBatchSize);
            network.InitializeParameters("random");
            network.Set("DropoutFC", 1.0);
            network.Set("Training", true);
            network.Set("EpochBeginning", true);

            // Get a mini-batch of data

            Sequence indicesSequence = new Sequence(dataSet.DataContainer.Count);

            indicesSequence.Shuffle();
            int[] miniBatch = indicesSequence.GetMiniBatchIndices(0, miniBatchSize);

            // Run network forward and backward

            network.InputLayer.FeedData(dataSet, miniBatch);
            network.ForwardPass("beginning", "end");
            List <int> trueLabels = new List <int>();

            for (int m = 0; m < miniBatchSize; m++)
            {
                trueLabels.Add(dataSet.DataContainer[miniBatch[m]].Label);
            }
            network.CrossEntropyGradient(dataSet, miniBatch);
            network.BackwardPass(0.0, 0.0, 0.0, 1e10); // no momentum, no learning rate, no weight decay

            // Re-forward pass (in case there are batch-norm layer)
            network.Set("PreInference", true);
            network.ForwardPass("beginning", "end");
            network.Set("Inference", true);

            for (int iLayer = 1; iLayer < network.NumberOfLayers; iLayer++)
            {
                //if (network.Layers[iLayer].Type != "Input" && network.Layers[iLayer].Type != "MaxPooling" && network.Layers[iLayer].Type != "ReLU" &&
                //    network.Layers[iLayer].Type != "SoftMax" && network.Layers[iLayer].Type != "Convolutional" && network.Layers[iLayer].Type != "FullyConnected"
                //    && network.Layers[iLayer].Type != "ELU")
                if (network.Layers[iLayer].Type == typeToCheck)
                {
                    Console.WriteLine("\nChecking gradients in layer {0} ({1})...", iLayer, network.Layers[iLayer].Type);
                    int    nChecks         = 0;
                    int    nErrors         = 0;
                    double cumulativeError = 0.0;

                    double[] parametersBackup   = network.Layers[iLayer].GetParameters();
                    double[] parameterGradients = network.Layers[iLayer].GetParameterGradients();
                    int      nParameters        = parametersBackup.Length;

                    // First parameters

                    Console.WriteLine("\n...with respect to PARAMETERS");
                    for (int j = 0; j < nParameters; j++)
                    {
                        // decrease jth parameter by EPSILON
                        double[] parametersMinus = new double[nParameters];
                        Array.Copy(parametersBackup, parametersMinus, nParameters);
                        parametersMinus[j] -= EPSILON;
                        network.Layers[iLayer].SetParameters(parametersMinus);
                        // then run network forward and compute loss
                        network.ForwardPass(iLayer, "end");
                        List <double[]> outputClassScoresMinus = network.OutputLayer.OutputClassScores;
                        double          lossMinus = 0;
                        for (int m = 0; m < miniBatchSize; m++)
                        {
                            int trueLabel = trueLabels[m];
                            lossMinus -= Math.Log(outputClassScoresMinus[m][trueLabel]); // score of true class in example m
                        }
                        lossMinus /= miniBatchSize;

                        // increse jth parameter by EPSILON
                        double[] parametersPlus = new double[nParameters];
                        Array.Copy(parametersBackup, parametersPlus, nParameters);
                        parametersPlus[j] += EPSILON;
                        network.Layers[iLayer].SetParameters(parametersPlus);
                        // then run network forward and compute loss
                        network.ForwardPass(iLayer, "end");
                        List <double[]> outputClassScoresPlus = network.OutputLayer.OutputClassScores;
                        double          lossPlus = 0;
                        for (int m = 0; m < miniBatchSize; m++)
                        {
                            int trueLabel = trueLabels[m];
                            lossPlus -= Math.Log(outputClassScoresPlus[m][trueLabel]); // score of true class in example m
                        }
                        lossPlus /= miniBatchSize;

                        // compute gradient numerically, trying to limit loss of significance!
                        //double orderOfMagnitude = Math.Floor(Math.Log10(lossPlus));
                        //lossPlus *= Math.Pow(10, -orderOfMagnitude);
                        //lossMinus *= Math.Pow(10, -orderOfMagnitude);
                        double gradientNumerical = (lossPlus - lossMinus) / (2 * EPSILON);
                        //gradientNumerical *= Math.Pow(10, orderOfMagnitude);

                        // retrieve gradient computed with backprop
                        double gradientBackprop = parameterGradients[j];

                        //if (Math.Abs(gradientNumerical) > EPSILON || Math.Abs(gradientBackprop) > EPSILON) // when the gradient is very small, finite arithmetics effects are too large => don't check
                        //{
                        nChecks++;

                        // compare the gradients, again trying to limit loss of significance!
                        //orderOfMagnitude = Math.Floor(Math.Log10(Math.Abs(gradientNumerical)));
                        //double gradientNumericalRescaled = gradientNumerical * Math.Pow(10, -orderOfMagnitude);
                        //double gradientBackpropRescaled = gradientBackprop * Math.Pow(10, -orderOfMagnitude);
                        //double error = Math.Abs(gradientNumericalRescaled - gradientBackpropRescaled) * Math.Pow(10, orderOfMagnitude);
                        double error         = Math.Abs(gradientNumerical - gradientBackprop);
                        double relativeError = error / Math.Max(Math.Abs(gradientNumerical), Math.Abs(gradientBackprop));
                        if (relativeError > MAX_RELATIVE_ERROR)
                        {
                            Console.Write("\nGradient check failed for parameter {0}\n", j);
                            Console.WriteLine("\tBackpropagation gradient: {0}", gradientBackprop);
                            Console.WriteLine("\tFinite difference gradient: {0}", gradientNumerical);
                            Console.WriteLine("\tRelative error: {0}", relativeError);

                            nErrors++;
                        }
                        cumulativeError = (relativeError + (nChecks - 1) * cumulativeError) / nChecks;
                        //}

                        // restore original weights before checking next gradient
                        network.Layers[iLayer].SetParameters(parametersBackup);
                    }

                    if (nChecks == 0)
                    {
                        Console.Write("\nAll gradients are zero... Something is probably wrong!");
                    }
                    else if (nErrors == 0)
                    {
                        Console.Write("\nGradient check 100% passed!");
                        Console.Write("\nAverage error = {0}", cumulativeError);
                    }
                    else
                    {
                        Console.Write("\n{0} errors out of {1} checks.", nErrors, nChecks);
                        Console.Write("\nAverage error = {0}", cumulativeError);
                    }
                    Console.Write("\n\n");
                    Console.Write("Press any key to continue...");
                    Console.Write("\n\n");
                    Console.ReadKey();

                    // Now inputs

                    nChecks         = 0;
                    nErrors         = 0;
                    cumulativeError = 0.0;

                    double[] inputBackup    = network.Layers[iLayer].GetInput();
                    double[] inputGradients = network.Layers[iLayer].GetInputGradients();
                    int      inputArraySize = inputBackup.Length;

                    Console.WriteLine("\n...with respect to INPUT");
                    for (int j = 0; j < inputArraySize; j++)
                    {
                        // decrease jth parameter by EPSILON
                        double[] inputMinus = new double[inputArraySize];
                        Array.Copy(inputBackup, inputMinus, inputArraySize);
                        inputMinus[j] -= EPSILON;
                        network.Layers[iLayer].SetInput(inputMinus);
                        // then run network forward and compute loss
                        network.ForwardPass(iLayer, "end");
                        List <double[]> outputClassScoresMinus = network.OutputLayer.OutputClassScores;
                        double          lossMinus = 0;
                        for (int m = 0; m < miniBatchSize; m++)
                        {
                            int trueLabel = trueLabels[m];
                            lossMinus -= Math.Log(outputClassScoresMinus[m][trueLabel]); // score of true class in example m
                        }
                        lossMinus /= miniBatchSize;

                        // increse jth parameter by EPSILON
                        double[] inputPlus = new double[inputArraySize];
                        Array.Copy(inputBackup, inputPlus, inputArraySize);
                        inputPlus[j] += EPSILON;
                        network.Layers[iLayer].SetInput(inputPlus);
                        // then run network forward and compute loss
                        network.ForwardPass(iLayer, "end");
                        List <double[]> outputClassScoresPlus = network.OutputLayer.OutputClassScores;
                        double          lossPlus = 0;
                        for (int m = 0; m < miniBatchSize; m++)
                        {
                            int trueLabel = trueLabels[m];
                            lossPlus -= Math.Log(outputClassScoresPlus[m][trueLabel]); // score of true class in example m
                        }
                        lossPlus /= miniBatchSize;

                        // compute gradient numerically
                        double gradientNumerical = (lossPlus - lossMinus) / (2 * EPSILON);


                        // retrieve gradient computed with backprop
                        double gradientBackprop = inputGradients[j] / miniBatchSize;
                        // NOTE: it is divided by miniBatchSize because HERE the loss is defined as Loss / miniBatchSize

                        //if (Math.Abs(gradientNumerical) > EPSILON || Math.Abs(gradientBackprop) > EPSILON) // when the gradient is very small, finite arithmetics effects are too large => don't check
                        //{
                        nChecks++;

                        // compare the gradients
                        double relativeError = Math.Abs(gradientNumerical - gradientBackprop) / Math.Max(Math.Abs(gradientNumerical), Math.Abs(gradientBackprop));
                        if (relativeError > MAX_RELATIVE_ERROR)
                        {
                            Console.Write("\nGradient check failed for input {0}\n", j);
                            Console.WriteLine("\tBackpropagation gradient: {0}", gradientBackprop);
                            Console.WriteLine("\tFinite difference gradient: {0}", gradientNumerical);
                            Console.WriteLine("\tRelative error: {0}", relativeError);

                            nErrors++;
                        }
                        cumulativeError = (relativeError + (nChecks - 1) * cumulativeError) / nChecks;
                        //}

                        // restore original input before checking next gradient
                        network.Layers[iLayer].SetInput(inputBackup);
                    }

                    if (nChecks == 0)
                    {
                        Console.Write("\nAll gradients are zero... Something is probably wrong!");
                    }
                    else if (nErrors == 0)
                    {
                        Console.Write("\nGradient check 100% passed!");
                        Console.Write("\nAverage error = {0}", cumulativeError);
                    }
                    else
                    {
                        Console.Write("\n{0} errors out of {1} checks.", nErrors, nChecks);
                        Console.Write("\nAverage error = {0}", cumulativeError);
                    }
                    Console.Write("\n\n");
                    Console.Write("Press any key to continue...");
                    Console.Write("\n\n");
                    Console.ReadKey();
                }
            }
        }