Ejemplo n.º 1
0
        public void SetupBuffers(int MiniBatchSize)
        {
            this.miniBatchSize = MiniBatchSize;


#if OPENCL_ENABLED
            this.activationsGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context,
                                                       MemFlags.ReadWrite | MemFlags.AllocHostPtr,
                                                       (IntPtr)(sizeof(float) * NumberOfUnits * MiniBatchSize),
                                                       out OpenCLSpace.ClError);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.CreateBuffer Neurons.activationsGPU");
            OpenCLSpace.WipeBuffer(activationsGPU, NumberOfUnits * MiniBatchSize, typeof(float));

            this.deltaGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context,
                                                 MemFlags.ReadWrite | MemFlags.AllocHostPtr,
                                                 (IntPtr)(sizeof(float) * NumberOfUnits * MiniBatchSize),
                                                 out OpenCLSpace.ClError);
            OpenCLSpace.WipeBuffer(activationsGPU, NumberOfUnits * MiniBatchSize, typeof(float));
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.CreateBuffer Neurons.deltaGPU");
#else
            for (int m = 0; m < MiniBatchSize; m++)
            {
                this.activations.Add(new double[nUnits]);
                this.delta.Add(new double[nUnits]);
            }
#endif
        }
Ejemplo n.º 2
0
        public override void FeedForward()
        {
#if TIMING_LAYERS
            Utils.FCForwardTimer.Start();
#endif

#if OPENCL_ENABLED
            // Set kernel arguments
            OpenCLSpace.ClError  = Cl.SetKernelArg(OpenCLSpace.FCForward, 0, outputNeurons.ActivationsGPU);
            OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.FCForward, 1, inputNeurons.ActivationsGPU);
            OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.FCForward, 2, weightsGPU);
            OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.FCForward, 3, biasesGPU);
            OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.FCForward, 4, (IntPtr)sizeof(int), nInputUnits);
            OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.FCForward, 5, (IntPtr)sizeof(int), nOutputUnits);
            OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.FCForward, 6, (IntPtr)sizeof(int), inputNeurons.MiniBatchSize);
            OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.FCForward, 7, (IntPtr)sizeof(float), (float)dropoutParameter);
            OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.FCForward, 8, (IntPtr)sizeof(ulong), (ulong)Guid.NewGuid().GetHashCode()); // this should be quite a good random seed
            OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.FCForward, 9, dropoutMaskGPU);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "FullyConnected.FeedForward(): Cl.SetKernelArg");

            // Run kernel
            OpenCLSpace.ClError = Cl.EnqueueNDRangeKernel(OpenCLSpace.Queue,
                                                          OpenCLSpace.FCForward,
                                                          2,
                                                          null,
                                                          forwardGlobalWorkSizePtr,
                                                          forwardLocalWorkSizePtr,
                                                          0,
                                                          null,
                                                          out OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "FullyConnected.FeedForward(): Cl.EnqueueNDRangeKernel");

            OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent");

            OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish");
#else
            // TODO: add dropout CPU
            // Generate dropout mask
            if (dropoutParameter < 1)
            {
                for (int iUnit = 0; iUnit < nOutputUnits * inputNeurons.MiniBatchSize; ++iUnit)
                {
                    dropoutMask[iUnit] = Global.RandomDouble() < dropoutParameter;
                }
            }

            for (int m = 0; m < inputNeurons.MiniBatchSize; m++)
            {
                double[] unbiasedOutput = Utils.MultiplyMatrixByVector(weights, inputNeurons.GetHost()[m]);
                this.outputNeurons.SetHost(m, unbiasedOutput.Zip(biases, (x, y) => x + y).ToArray());
            }
#endif


#if TIMING_LAYERS
            Utils.FCForwardTimer.Stop();
#endif
        }
Ejemplo n.º 3
0
        public override void BackPropagate()
        {
            // Set kernel arguments
            OpenCLSpace.ClError  = Cl.SetKernelArg(OpenCLSpace.TanhBackward, 0, inputNeurons.DeltaGPU);
            OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.TanhBackward, 1, outputNeurons.DeltaGPU);
            OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.TanhBackward, 2, outputNeurons.ActivationsGPU);
            OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.TanhBackward, 3, (IntPtr)sizeof(float), beta);
            OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.TanhBackward, 4, (IntPtr)sizeof(int), nInputUnits);
            OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.TanhBackward, 5, (IntPtr)sizeof(int), inputNeurons.MiniBatchSize);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Tanh.BackPropagate(): Cl.SetKernelArg");

            // Run kernel
            OpenCLSpace.ClError = Cl.EnqueueNDRangeKernel(OpenCLSpace.Queue,
                                                          OpenCLSpace.TanhBackward,
                                                          1,
                                                          null,
                                                          globalWorkSizePtr,
                                                          localWorkSizePtr,
                                                          0,
                                                          null,
                                                          out OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Tanh.BackPropagate(): Cl.EnqueueNDRangeKernel");

            OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent");

            OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish");
        }
Ejemplo n.º 4
0
        public virtual double[] GetInputGradients()
        {
            int inputArraySize = nInputUnits * inputNeurons.MiniBatchSize;

            double[] inputGradients = new double[inputArraySize];

            // Copy device buffer to host
            float[] tmpInputGradients = new float[inputArraySize];
            OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue,
                                                       inputNeurons.DeltaGPU,  // source
                                                       Bool.True,
                                                       (IntPtr)0,
                                                       (IntPtr)(sizeof(float) * inputArraySize),
                                                       tmpInputGradients,   // destination
                                                       0,
                                                       null,
                                                       out OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "clEnqueueReadBuffer");

            OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent");

            OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish");

            // Convert to double and write into public fields
            for (int i = 0; i < inputArraySize; ++i)
            {
                inputGradients[i] = (double)tmpInputGradients[i];
            }

            return(inputGradients);
        }
Ejemplo n.º 5
0
        public override void FeedForward()
        {
#if TIMING_LAYERS
            // TODO: add timer
#endif

            OpenCLSpace.ClError  = Cl.SetKernelArg(OpenCLSpace.AveragePoolingForward, 0, outputNeurons.ActivationsGPU);
            OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.AveragePoolingForward, 1, inputNeurons.ActivationsGPU);
            OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.AveragePoolingForward, 2, (IntPtr)sizeof(int), nInputUnits);
            OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.AveragePoolingForward, 3, (IntPtr)sizeof(int), inputArea);
            OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.AveragePoolingForward, 4, (IntPtr)sizeof(int), inputDepth);
            OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.AveragePoolingForward, 5, (IntPtr)sizeof(int), inputNeurons.MiniBatchSize);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.SetKernelArg");

            OpenCLSpace.ClError = Cl.EnqueueNDRangeKernel(OpenCLSpace.Queue,
                                                          OpenCLSpace.AveragePoolingForward,
                                                          2,
                                                          null,
                                                          fwdGlobalWorkSizePtr,
                                                          fwdLocalWorkSizePtr,
                                                          0,
                                                          null,
                                                          out OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.EnqueueNDRangeKernel");

            OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent");

            OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish");

#if TIMING_LAYERS
            // TODO: add timer
#endif
        }
Ejemplo n.º 6
0
        public virtual void SetInput(double[] NewInput)
        {
            // Convert to float and write into tmp arrays
            int inputArraySize = nInputUnits * inputNeurons.MiniBatchSize;

            float[] tmpInput = new float[inputArraySize];
            for (int i = 0; i < inputArraySize; ++i)
            {
                tmpInput[i] = (float)NewInput[i];
            }

            // Write arrays into buffers on device

            OpenCLSpace.ClError = Cl.EnqueueWriteBuffer(OpenCLSpace.Queue,
                                                        inputNeurons.ActivationsGPU,
                                                        OpenCL.Net.Bool.True,
                                                        (IntPtr)0,
                                                        (IntPtr)(sizeof(float) * inputArraySize),
                                                        tmpInput,
                                                        0,
                                                        null,
                                                        out OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.EnqueueWriteBuffer");
            OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent");

            OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish");
        }
Ejemplo n.º 7
0
        public override void CopyBuffersToHost()
        {
            OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue,
                                                       gammaGPU,  // source
                                                       Bool.True,
                                                       (IntPtr)0,
                                                       (IntPtr)(sizeof(float) * inputDepth),
                                                       gammaHost,   // destination
                                                       0,
                                                       null,
                                                       out OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "clEnqueueReadBuffer gammaGPU");

            OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent");

            OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue,
                                                       betaGPU,  // source
                                                       Bool.True,
                                                       (IntPtr)0,
                                                       (IntPtr)(sizeof(float) * inputDepth),
                                                       betaHost,   // destination
                                                       0,
                                                       null,
                                                       out OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "clEnqueueReadBuffer betaGPU");

            OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent");

            OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish");

            // Speeds are not saved.
        }
Ejemplo n.º 8
0
        public override void BackPropagate()
        {
            // Errors have already been backpropagated to input of first convolutional layer (see method UpdateSpeeds)
            // Now just cumulate the gradients coming from the skip connection

            OpenCLSpace.ClError  = Cl.SetKernelArg(OpenCLSpace.SkipBackward, 0, inputNeurons.DeltaGPU);
            OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.SkipBackward, 1, outputNeurons.DeltaGPU);
            OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.SkipBackward, 2, (IntPtr)sizeof(int), nInputUnits);
            OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.SkipBackward, 3, (IntPtr)sizeof(int), inputNeurons.MiniBatchSize);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.SetKernelArg");

            // Run kernel
            OpenCLSpace.ClError = Cl.EnqueueNDRangeKernel(OpenCLSpace.Queue,
                                                          OpenCLSpace.SkipBackward,
                                                          1,
                                                          null,
                                                          globalWorkSizePtr,
                                                          localWorkSizePtr,
                                                          0,
                                                          null,
                                                          out OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.EnqueueNDRangeKernel");

            OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent");

            OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish");
        }
Ejemplo n.º 9
0
        public override void SetupOutput()
        {
            this.outputWidth  = inputWidth;
            this.outputHeight = inputHeight;
            this.outputDepth  = inputDepth;
            this.inputArea    = inputHeight * inputWidth;

            this.nOutputUnits  = nInputUnits;
            this.outputNeurons = new Neurons(nOutputUnits);

            // Initialize OpenCL buffers

            // 1. mean, variance and their cumulative averages

            this.meanGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context,
                                                MemFlags.ReadWrite,
                                                (IntPtr)(sizeof(float) * inputDepth),
                                                out OpenCLSpace.ClError);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer");
            OpenCLSpace.WipeBuffer(meanGPU, inputDepth, typeof(float));

            this.varianceGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context,
                                                    MemFlags.ReadWrite,
                                                    (IntPtr)(sizeof(float) * inputDepth),
                                                    out OpenCLSpace.ClError);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer");
            OpenCLSpace.WipeBuffer(varianceGPU, inputDepth, typeof(float));

            // (Initialize cumulative means to zero...)
            this.cumulativeMeanGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context,
                                                          MemFlags.ReadWrite,
                                                          (IntPtr)(sizeof(float) * inputDepth),
                                                          out OpenCLSpace.ClError);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer");
            OpenCLSpace.WipeBuffer(cumulativeMeanGPU, inputDepth, typeof(float));

            // (...and variances to one.)
            float[] ones = new float[inputDepth];
            for (int i = 0; i < inputDepth; ++i)
            {
                ones[i] = 1.0f;
            }
            this.cumulativeVarianceGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context,
                                                              MemFlags.ReadWrite | MemFlags.CopyHostPtr,
                                                              (IntPtr)(sizeof(float) * inputDepth),
                                                              ones,
                                                              out OpenCLSpace.ClError);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer");

            // OpenCL buffer for normalized input values (needed for backprop)

            this.normalizedInputGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context,
                                                           MemFlags.ReadWrite,
                                                           (IntPtr)(sizeof(float) * nInputUnits * inputNeurons.MiniBatchSize),
                                                           out OpenCLSpace.ClError);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer");
            OpenCLSpace.WipeBuffer(normalizedInputGPU, nInputUnits * inputNeurons.MiniBatchSize, typeof(float));
        }
Ejemplo n.º 10
0
        /// <summary>
        /// Run network backwards, propagating the gradient backwards and also updating parameters.
        /// Requires that gradient has ALREADY BEEN WRITTEN in network.Layers[nLayers-1].InputNeurons.Delta
        /// </summary>
        public void BackwardPass(double learningRate, double momentumMultiplier, double weightDecayCoeff, double weightMaxNorm)
        {
            for (int l = nLayers - 2; l > 0; l--) // propagate error signal backwards (layers L-2 to 1, i.e. second last to second)
            {
                // 1. Update layer's parameters' change speed using gradient
                layers[l].UpdateSpeeds(learningRate, momentumMultiplier, weightDecayCoeff);

                // 2. Backpropagate errors to previous layer (no need to do it for layer 1)
                if (l > 1)
                {
                    layers[l].BackPropagate();
                }


#if DEBUGGING_STEPBYSTEP
                /* ------------------------- DEBUGGING --------------------------------------------- */

                // Display input delta  layer-by-layer

                int miniBatchSize = layers[0].OutputNeurons.MiniBatchSize;
#if OPENCL_ENABLED
                float[] deltaInputAll = new float[layers[l].InputNeurons.NumberOfUnits * miniBatchSize];
                OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue,
                                                           layers[l].InputNeurons.DeltaGPU,  // source
                                                           Bool.True,
                                                           (IntPtr)0,
                                                           (IntPtr)(layers[l].InputNeurons.NumberOfUnits * miniBatchSize * sizeof(float)),
                                                           deltaInputAll,   // destination
                                                           0,
                                                           null,
                                                           out OpenCLSpace.ClEvent);
                OpenCLSpace.CheckErr(OpenCLSpace.ClError, "NeuralNetwork.ForwardPass Cl.clEnqueueReadBuffer deltaInputAll");
#endif
                Console.WriteLine("\nLayer {0} ({1}) backpropagated delta:", l, layers[l].Type);
                for (int m = 0; m < miniBatchSize; m++)
                {
                    float[] deltaInput = new float[layers[l].InputNeurons.NumberOfUnits];
                    Array.Copy(deltaInputAll, m * layers[l].InputNeurons.NumberOfUnits, deltaInput, 0, layers[l].InputNeurons.NumberOfUnits);

                    Console.WriteLine("\n --- Mini-batch item {0} -----", m);
                    for (int j = 0; j < deltaInput.Length; j++)
                    {
                        Console.Write("{0}  ", deltaInput[j]);
                    }
                    Console.WriteLine();
                    Console.ReadKey();
                }

                /* ------------------------- END DEBUGGING --------------------------------------------- */
#endif

                // 3. Update layer's parameters
                layers[l].UpdateParameters(weightMaxNorm);
            }
        }
Ejemplo n.º 11
0
        public override double[] GetParameterGradients()
        {
            int nParameters = nInputUnits * nOutputUnits + nOutputUnits;

            double[] parameterGradients = new double[nParameters];

            // Copy weights and biases gradients buffers to host
            float[] tmpWeightsGrad = new float[nInputUnits * nOutputUnits];
            OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue,
                                                       weightsGradientsGPU,  // source
                                                       Bool.True,
                                                       (IntPtr)0,
                                                       (IntPtr)(sizeof(float) * nInputUnits * nOutputUnits),
                                                       tmpWeightsGrad,   // destination
                                                       0,
                                                       null,
                                                       out OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "clEnqueueReadBuffer");

            OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent");

            float[] tmpBiasesGrad = new float[nOutputUnits];
            OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue,
                                                       biasesGradientsGPU,  // source
                                                       Bool.True,
                                                       (IntPtr)0,
                                                       (IntPtr)(sizeof(float) * nOutputUnits),
                                                       tmpBiasesGrad,   // destination
                                                       0,
                                                       null,
                                                       out OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "clEnqueueReadBuffer");

            OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent");

            OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish");

            // Convert to double and write into parameterGradients
            //Console.WriteLine("Weight gradients:\n");
            for (int i = 0; i < nInputUnits * nOutputUnits; ++i)
            {
                parameterGradients[i] = (double)tmpWeightsGrad[i];
                //Console.Write(" {0}", tmpWeightsGrad[i]);
            }
            //Console.ReadKey();
            for (int i = 0; i < nOutputUnits; ++i)
            {
                parameterGradients[nInputUnits * nOutputUnits + i] = (double)tmpBiasesGrad[i];
            }

            return(parameterGradients);
        }
Ejemplo n.º 12
0
        public override void FeedForward()
        {
#if TIMING_LAYERS
            Utils.NonlinearityForwardTimer.Start();
#endif

#if OPENCL_ENABLED
            // Set kernel arguments
            OpenCLSpace.ClError  = Cl.SetKernelArg(OpenCLSpace.ReLUForward, 0, OutputNeurons.ActivationsGPU);
            OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.ReLUForward, 1, InputNeurons.ActivationsGPU);
            OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.ReLUForward, 2, (IntPtr)sizeof(int), OutputNeurons.NumberOfUnits * inputNeurons.MiniBatchSize);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "ReLU.FeedForward(): Cl.SetKernelArg");

            // Run kernel
            OpenCLSpace.ClError = Cl.EnqueueNDRangeKernel(OpenCLSpace.Queue,
                                                          OpenCLSpace.ReLUForward,
                                                          1,
                                                          null,
                                                          globalWorkSizePtr,
                                                          localWorkSizePtr,
                                                          0,
                                                          null,
                                                          out OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "ReLU.FeedForward(): Cl.EnqueueNDRangeKernel");

            OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent");

            OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish");
#else
            for (int m = 0; m < inputNeurons.MiniBatchSize; m++)
            {
                double[] tmpOutput = new double[this.nOutputUnits];
                for (int i = 0; i < this.nOutputUnits; i++)
                {
                    if (this.inputNeurons.GetHost()[m][i] > 0)
                    {
                        tmpOutput[i] = this.inputNeurons.GetHost()[m][i];
                    }
                    else
                    {
                        tmpOutput[i] = 0.0;
                    }
                }
                this.outputNeurons.SetHost(m, tmpOutput);
            }
#endif

#if TIMING_LAYERS
            Utils.NonlinearityForwardTimer.Stop();
#endif
        }
Ejemplo n.º 13
0
        public void ReadImage(Image input, int label)
        {
            unsafe
            {
                using (Bitmap bmp = new Bitmap(input))
                {
                    int offSet = bmp.Width * bmp.Height;
                    DataDimension = offSet * 3;
#if OPENCL_ENABLED
                    float[] dataPoint = new float[DataDimension];
#else
                    double[] dataPoint = new double[DataDimension];
#endif
                    #region Copy RGB values directly from memory to the array
                    BitmapData bitmapData     = bmp.LockBits(new Rectangle(0, 0, bmp.Width, bmp.Height), ImageLockMode.ReadOnly, bmp.PixelFormat);
                    int        bytesPerPixel  = Image.GetPixelFormatSize(bmp.PixelFormat) / 8;
                    int        heightInPixels = bitmapData.Height;
                    int        widthInBytes   = bitmapData.Width * bytesPerPixel;
                    byte *     ptrFirstPixel  = (byte *)bitmapData.Scan0;

                    int index = 0;
                    for (int y = 0; y < heightInPixels; y++)
                    {
                        byte *currentLine = ptrFirstPixel + (y * bitmapData.Stride);
                        for (int x = 0; x < widthInBytes; x = x + bytesPerPixel)
                        {
                            dataPoint[index]                   = currentLine[x + 2]; // Red
                            dataPoint[index + offSet]          = currentLine[x + 1]; // Green
                            dataPoint[index + offSet + offSet] = currentLine[x];     // Blue
                            index++;
                        }
                    }

                    bmp.UnlockBits(bitmapData);
                    #endregion

#if OPENCL_ENABLED
                    int datumBytesSize = sizeof(float) * dataPoint.Length;
                    Mem tmpBuffer      = (Mem)Cl.CreateBuffer(OpenCLSpace.Context,
                                                              MemFlags.ReadOnly | MemFlags.CopyHostPtr | MemFlags.AllocHostPtr,
                                                              (IntPtr)datumBytesSize,
                                                              dataPoint,
                                                              out OpenCLSpace.ClError);
                    OpenCLSpace.CheckErr(OpenCLSpace.ClError, "DataSet(): Cl.CreateBuffer tmpBuffer");

                    DataContainer.Add(new DataItem(tmpBuffer, label));
#else
                    DataContainer.Add(new DataItem(dataPoint, label));
#endif
                }
            }
        }
Ejemplo n.º 14
0
        public static void WipeBuffer(Mem buffer, int nElementsInBuffer, Type type)
        {
            Kernel WipeKernel;

            if (type == typeof(float))
            {
                WipeKernel = WipeBufferFloatKernel;
            }
            else if (type == typeof(int))
            {
                WipeKernel = WipeBufferIntKernel;
            }
            else if (type == typeof(bool))
            {
                WipeKernel = WipeBufferBoolKernel;
            }
            else
            {
                throw new ArgumentException("Type not supported. Use either float, int, or bool.");
            }

            // Set kernel arguments
            OpenCLSpace.ClError  = Cl.SetKernelArg(WipeKernel, 0, buffer);
            OpenCLSpace.ClError |= Cl.SetKernelArg(WipeKernel, 1, (IntPtr)sizeof(int), nElementsInBuffer);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.SetKernelArg WipeBufferKernel");

            // Work sizes
            IntPtr[] localWorkSizePtr  = { (IntPtr)OPTIMAL_GROUP_SIZE };
            IntPtr[] globalWorkSizePtr = { (IntPtr)(OPTIMAL_GROUP_SIZE * Math.Ceiling((double)(nElementsInBuffer) / (double)OPTIMAL_GROUP_SIZE)) };

            // Run kernel
            ClError = Cl.EnqueueNDRangeKernel(queue,
                                              WipeKernel,
                                              1,
                                              null,
                                              globalWorkSizePtr,
                                              localWorkSizePtr,
                                              0,
                                              null,
                                              out ClEvent);
            CheckErr(ClError, "Cl.EnqueueNDRangeKernel ZeroUnpadBatch");

            ClError = Cl.ReleaseEvent(ClEvent);
            CheckErr(ClError, "Cl.ReleaseEvent");

            ClError = Cl.Finish(queue);
            CheckErr(ClError, "Cl.Finish");

            //Cl.ReleaseKernel(WipeKernel);
        }
Ejemplo n.º 15
0
        public override void BackPropagate()
        {

#if TIMING_LAYERS
            Utils.NonlinearityBackpropTimer.Start();
#endif

#if OPENCL_ENABLED
            // Set kernel arguments
            OpenCLSpace.ClError  = Cl.SetKernelArg(OpenCLSpace.ELUBackward, 0, inputNeurons.DeltaGPU);
            OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.ELUBackward, 1, outputNeurons.DeltaGPU);
            OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.ELUBackward, 2, inputNeurons.ActivationsGPU);
            OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.ELUBackward, 3, (IntPtr)sizeof(float), alpha);
            OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.ELUBackward, 4, (IntPtr)sizeof(int), nInputUnits * inputNeurons.MiniBatchSize);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "ELU.BackPropagate(): Cl.SetKernelArg");

            // Run kernel
            OpenCLSpace.ClError = Cl.EnqueueNDRangeKernel(OpenCLSpace.Queue,
                                                            OpenCLSpace.ELUBackward,
                                                            1,
                                                            null,
                                                            globalWorkSizePtr,
                                                            localWorkSizePtr,
                                                            0,
                                                            null,
                                                            out OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "ELU.BackPropagate(): Cl.EnqueueNDRangeKernel");

            OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent");

            OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish");
#else
            throw new NotImplementedException("CPU code for ELUs not implemented yet.");
            for (int m = 0; m < inputNeurons.MiniBatchSize; m++)
            {
                for (int i = 0; i < nOutputUnits; i++)
                    //inputNeurons.DeltaHost[m][i] = inputNeurons.GetHost()[m][i] > 0 ? outputNeurons.DeltaHost[m][i] : 0.0;

            }
#endif

#if TIMING_LAYERS
            Utils.NonlinearityBackpropTimer.Stop();
#endif
        }
Ejemplo n.º 16
0
        public override double[] GetParameterGradients()
        {
            double[] parameterGradients = new double[2 * nInputUnits];

            // Copy gamma and beta gradients buffers to host
            float[] tmpGammaGrad = new float[nInputUnits];
            OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue,
                                                       deltaGammaGPU,  // source
                                                       Bool.True,
                                                       (IntPtr)0,
                                                       (IntPtr)(sizeof(float) * nInputUnits),
                                                       tmpGammaGrad,   // destination
                                                       0,
                                                       null,
                                                       out OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "clEnqueueReadBuffer");

            OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent");

            float[] tmpBetaGrad = new float[nInputUnits];
            OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue,
                                                       deltaBetaGPU,  // source
                                                       Bool.True,
                                                       (IntPtr)0,
                                                       (IntPtr)(sizeof(float) * nInputUnits),
                                                       tmpBetaGrad,   // destination
                                                       0,
                                                       null,
                                                       out OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "clEnqueueReadBuffer");

            OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent");

            OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish");

            // Convert to double and write into public fields
            for (int i = 0; i < nInputUnits; ++i)
            {
                parameterGradients[i] = (double)tmpGammaGrad[i];
                parameterGradients[nInputUnits + i] = (double)tmpBetaGrad[i];
            }

            return(parameterGradients);
        }
Ejemplo n.º 17
0
        public override void SetParameters(double[] NewParameters)
        {
            // Convert to float and write into tmp arrays

            float[] tmpWeights = new float[nInputUnits * nOutputUnits];
            float[] tmpBiases  = new float[nOutputUnits];
            for (int i = 0; i < nInputUnits * nOutputUnits; ++i)
            {
                tmpWeights[i] = (float)NewParameters[i];
            }
            for (int i = 0; i < nOutputUnits; ++i)
            {
                tmpBiases[i] = (float)NewParameters[nInputUnits * nOutputUnits + i];
            }

            // Write arrays into buffers on device

            OpenCLSpace.ClError = Cl.EnqueueWriteBuffer(OpenCLSpace.Queue,
                                                        weightsGPU,
                                                        OpenCL.Net.Bool.True,
                                                        (IntPtr)0,
                                                        (IntPtr)(sizeof(float) * nInputUnits * nOutputUnits),
                                                        tmpWeights,
                                                        0,
                                                        null,
                                                        out OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.EnqueueWriteBuffer");
            OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent");

            OpenCLSpace.ClError = Cl.EnqueueWriteBuffer(OpenCLSpace.Queue,
                                                        biasesGPU,
                                                        OpenCL.Net.Bool.True,
                                                        (IntPtr)0,
                                                        (IntPtr)(sizeof(float) * nOutputUnits),
                                                        tmpBiases,
                                                        0,
                                                        null,
                                                        out OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.EnqueueWriteBuffer");
            OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent");

            OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish");
        }
Ejemplo n.º 18
0
        public override void SetupOutput()
        {
            this.outputWidth  = inputWidth;
            this.outputHeight = inputHeight;
            this.outputDepth  = inputDepth;

            this.nOutputUnits  = nInputUnits;
            this.outputNeurons = new Neurons(nOutputUnits);

            // Also initialize OpenCL buffers for mean, variance, their cumulative averages, and normalized input activations

            this.meanGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context,
                                                MemFlags.ReadWrite,
                                                (IntPtr)(sizeof(float) * nInputUnits),
                                                out OpenCLSpace.ClError);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer");
            OpenCLSpace.WipeBuffer(meanGPU, nInputUnits, typeof(float));

            this.varianceGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context,
                                                    MemFlags.ReadWrite,
                                                    (IntPtr)(sizeof(float) * nInputUnits),
                                                    out OpenCLSpace.ClError);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer");
            OpenCLSpace.WipeBuffer(varianceGPU, nInputUnits, typeof(float));

            this.cumulativeMeanGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context,
                                                          MemFlags.ReadWrite,
                                                          (IntPtr)(sizeof(float) * nInputUnits),
                                                          out OpenCLSpace.ClError);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer");
            OpenCLSpace.WipeBuffer(cumulativeMeanGPU, nInputUnits, typeof(float));

            this.cumulativeVarianceGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context,
                                                              MemFlags.ReadWrite,
                                                              (IntPtr)(sizeof(float) * nInputUnits),
                                                              out OpenCLSpace.ClError);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer");
            OpenCLSpace.WipeBuffer(cumulativeVarianceGPU, nInputUnits, typeof(float));

            this.normalizedInputGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context,
                                                           MemFlags.ReadWrite,
                                                           (IntPtr)(sizeof(float) * nInputUnits * inputNeurons.MiniBatchSize),
                                                           out OpenCLSpace.ClError);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer");
            OpenCLSpace.WipeBuffer(normalizedInputGPU, nInputUnits * inputNeurons.MiniBatchSize, typeof(float));
        }
Ejemplo n.º 19
0
        public override void SetupOutput()
        {
            this.outputDepth  = nOutputUnits;
            this.outputHeight = 1;
            this.outputWidth  = 1;

            this.outputNeurons = new Neurons(this.nOutputUnits);

#if OPENCL_ENABLED
            this.dropoutMaskGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context,
                                                       MemFlags.ReadWrite,
                                                       (IntPtr)(sizeof(bool) * nOutputUnits * inputNeurons.MiniBatchSize),
                                                       out OpenCLSpace.ClError);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer");
            OpenCLSpace.WipeBuffer(dropoutMaskGPU, nOutputUnits * inputNeurons.MiniBatchSize, typeof(bool));
#endif
        }
Ejemplo n.º 20
0
        public override void BackPropagate()
        {
#if TIMING_LAYERS
            Utils.FCBackpropTimer.Start();
#endif

#if OPENCL_ENABLED
            // Set kernel arguments
            OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.FCBackward, 0, inputNeurons.DeltaGPU);
            OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.FCBackward, 1, outputNeurons.DeltaGPU);
            OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.FCBackward, 2, weightsGPU);
            OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.FCBackward, 3, dropoutMaskGPU);
            OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.FCBackward, 4, (IntPtr)sizeof(int), nInputUnits);
            OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.FCBackward, 5, (IntPtr)sizeof(int), nOutputUnits);
            OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.FCBackward, 6, (IntPtr)sizeof(int), inputNeurons.MiniBatchSize);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "FullyConnected.BackPropagate(): Cl.SetKernelArg");

            // Run kernel
            OpenCLSpace.ClError = Cl.EnqueueNDRangeKernel(OpenCLSpace.Queue,
                                                          OpenCLSpace.FCBackward,
                                                          2,
                                                          null,
                                                          backwardGlobalWorkSizePtr,
                                                          backwardLocalWorkSizePtr,
                                                          0,
                                                          null,
                                                          out OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "FullyConnected.BackPropagate(): Cl.EnqueueNDRangeKernel");

            OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent");

            OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish");
#else
            for (int m = 0; m < inputNeurons.MiniBatchSize; m++)
            {
                inputNeurons.DeltaHost[m] = Utils.MultiplyMatrixTranspByVector(weights, outputNeurons.DeltaHost[m]);
            }
#endif

#if TIMING_LAYERS
            Utils.FCBackpropTimer.Stop();
#endif
        }
Ejemplo n.º 21
0
        public override void SetParameters(double[] NewParameters)
        {
            // Convert to float and write into tmp arrays

            float[] tmpGamma = new float[inputDepth];
            float[] tmpBeta  = new float[inputDepth];
            for (int i = 0; i < inputDepth; ++i)
            {
                tmpGamma[i] = (float)NewParameters[i];
                tmpBeta[i]  = (float)NewParameters[inputDepth + i];
            }

            // Wirte arrays into buffers on device

            OpenCLSpace.ClError = Cl.EnqueueWriteBuffer(OpenCLSpace.Queue,
                                                        gammaGPU,
                                                        OpenCL.Net.Bool.True,
                                                        (IntPtr)0,
                                                        (IntPtr)(sizeof(float) * inputDepth),
                                                        tmpGamma,
                                                        0,
                                                        null,
                                                        out OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.EnqueueWriteBuffer");
            OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent");

            OpenCLSpace.ClError = Cl.EnqueueWriteBuffer(OpenCLSpace.Queue,
                                                        betaGPU,
                                                        OpenCL.Net.Bool.True,
                                                        (IntPtr)0,
                                                        (IntPtr)(sizeof(float) * inputDepth),
                                                        tmpBeta,
                                                        0,
                                                        null,
                                                        out OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.EnqueueWriteBuffer");
            OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent");

            OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish");
        }
Ejemplo n.º 22
0
        public void ReadData(string dataPath, string labelsPath)
        {
            string[] dataArray   = File.ReadAllLines(dataPath);
            string[] labelsArray = File.ReadAllLines(labelsPath);

            if (dataArray.Length != labelsArray.Length)
            {
                throw new Exception("The amount of data does not match the amount of labels");
            }

            // Read images and their labels
            for (int index = 0; index < dataArray.Length; index++)
            {
                string[] columns = dataArray[index].Split('\t');

                DataDimension = columns.Length;

#if OPENCL_ENABLED
                float[] dataPoint = new float[columns.Length];
                for (int i = 0; i < columns.Length; i++)
                {
                    dataPoint[i] = float.Parse(columns[i], CultureInfo.InvariantCulture.NumberFormat);
                }

                int datumBytesSize = sizeof(float) * dataPoint.Length;
                Mem tmpBuffer      = (Mem)Cl.CreateBuffer(OpenCLSpace.Context,
                                                          MemFlags.ReadOnly | MemFlags.CopyHostPtr | MemFlags.AllocHostPtr,
                                                          (IntPtr)datumBytesSize,
                                                          dataPoint,
                                                          out OpenCLSpace.ClError);
                OpenCLSpace.CheckErr(OpenCLSpace.ClError, "DataSet(): Cl.CreateBuffer tmpBuffer");
#else
                double[] tmpBuffer = new double[columns.Length];
                for (int i = 0; i < columns.Length; i++)
                {
                    tmpBuffer[i] = double.Parse(columns[i], CultureInfo.InvariantCulture.NumberFormat);
                }
#endif

                DataContainer.Add(new DataItem(tmpBuffer, Convert.ToInt32(labelsArray[index])));
            }
        }
Ejemplo n.º 23
0
        public override void BackPropagate()
        {
#if TIMING_LAYERS
            Utils.PoolingBackpropTimer.Start();
#endif

#if OPENCL_ENABLED
            OpenCLSpace.ClError  = Cl.SetKernelArg(OpenCLSpace.MaxPoolingBackward, 0, inputNeurons.DeltaGPU);
            OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.MaxPoolingBackward, 1, outputNeurons.DeltaGPU);
            OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.MaxPoolingBackward, 2, switchesGPU);
            OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.MaxPoolingBackward, 3, poolingTableGPU);
            OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.MaxPoolingBackward, 4, (IntPtr)sizeof(int), nInputUnits);
            OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.MaxPoolingBackward, 5, (IntPtr)sizeof(int), inputWidth * inputWidth);
            OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.MaxPoolingBackward, 6, (IntPtr)sizeof(int), nOutputUnits);
            OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.MaxPoolingBackward, 7, (IntPtr)sizeof(int), outputWidth * outputWidth);
            OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.MaxPoolingBackward, 8, (IntPtr)sizeof(int), inputNeurons.MiniBatchSize);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.SetKernelArg PoolingBackward");

            OpenCLSpace.ClError = Cl.EnqueueNDRangeKernel(OpenCLSpace.Queue,
                                                          OpenCLSpace.MaxPoolingBackward,
                                                          1,
                                                          null,
                                                          globalWorkSizePtr,
                                                          localWorkSizePtr,
                                                          0,
                                                          null,
                                                          out OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.EnqueueNDRangeKernel PoolingBackward");

            OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent");

            OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish");
#else
            //TODO: CPU code
#endif

#if TIMING_LAYERS
            Utils.PoolingBackpropTimer.Stop();
#endif
        }
Ejemplo n.º 24
0
        public void CrossEntropyGradient(DataSet DataSet, int[] iMiniBatch)
        {
            float[] crossEntropyGradientBatch = new float[iMiniBatch.Length * DataSet.NumberOfClasses];
            int     nClasses = DataSet.NumberOfClasses;

            for (int m = 0; m < iMiniBatch.Length; m++)
            {
                int iDataPoint = iMiniBatch[m];
                int trueLabel  = DataSet.DataContainer[iDataPoint].Label;

                double[] crossEntropyGradient = new double[nClasses];
                Array.Copy(outputLayer.OutputClassScores[m], crossEntropyGradient, nClasses);
                crossEntropyGradient[trueLabel] -= 1.0;

                for (int c = 0; c < nClasses; c++)
                {
                    crossEntropyGradientBatch[m * DataSet.NumberOfClasses + c] = (float)crossEntropyGradient[c];
                }
            }

            // now write gradient to input neurons of softmax layer (i.e. to output neurons of classifier)


            OpenCLSpace.ClError = Cl.EnqueueWriteBuffer(OpenCLSpace.Queue,
                                                        layers.Last().InputNeurons.DeltaGPU,
                                                        Bool.True,
                                                        (IntPtr)0,
                                                        (IntPtr)(sizeof(float) * crossEntropyGradientBatch.Length),
                                                        crossEntropyGradientBatch,
                                                        0,
                                                        null,
                                                        out OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "NetworkTrainer.CrossEntropyGradient(): Cl.EnqueueWriteBuffer");


            OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent");

            OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish");
        }
Ejemplo n.º 25
0
        public override void BackPropagate()
        {
#if TIMING_LAYERS
            Utils.BNConvBackpropTimer.Start();
#endif

            OpenCLSpace.ClError  = Cl.SetKernelArg(OpenCLSpace.BNConvBackPropagate, 0, inputNeurons.DeltaGPU);
            OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNConvBackPropagate, 1, outputNeurons.DeltaGPU);
            OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNConvBackPropagate, 2, normalizedInputGPU);
            OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNConvBackPropagate, 3, gammaGPU);
            OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNConvBackPropagate, 4, varianceGPU);
            OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNConvBackPropagate, 5, deltaGammaGPU);
            OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNConvBackPropagate, 6, deltaBetaGPU);
            OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNConvBackPropagate, 7, (IntPtr)sizeof(int), inputArea);
            OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNConvBackPropagate, 8, (IntPtr)sizeof(int), nInputUnits);
            OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNConvBackPropagate, 9, (IntPtr)sizeof(int), inputNeurons.MiniBatchSize);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.SetKernelArg");

            OpenCLSpace.ClError = Cl.EnqueueNDRangeKernel(OpenCLSpace.Queue,
                                                          OpenCLSpace.BNConvBackPropagate,
                                                          1,
                                                          null,
                                                          nActivationsGlobalWorkSizePtr,
                                                          optimalLocalWorkSizePtr,
                                                          0,
                                                          null,
                                                          out OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.EnqueueNDRangeKernel");

            OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent");

            OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish");

#if TIMING_LAYERS
            Utils.BNConvBackpropTimer.Stop();
#endif
        }
Ejemplo n.º 26
0
        public override void CopyBuffersToHost()
        {
            OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue,
                                                       weightsGPU,  // source
                                                       Bool.True,
                                                       (IntPtr)0,
                                                       (IntPtr)(sizeof(float) * nInputUnits * nOutputUnits),
                                                       weightsHost,   // destination
                                                       0,
                                                       null,
                                                       out OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "clEnqueueReadBuffer weightsGPU");

            OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent");

            OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue,
                                                       biasesGPU,  // source
                                                       Bool.True,
                                                       (IntPtr)0,
                                                       (IntPtr)(sizeof(float) * nOutputUnits),
                                                       biasesHost,   // destination
                                                       0,
                                                       null,
                                                       out OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "clEnqueueReadBuffer biasesGPU");

            OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent");


            OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish");

            // Speeds are not saved.
        }
Ejemplo n.º 27
0
        public override void FeedForward()
        {
            convolutionalLayer1.FeedForward();


            /*
             *
             * float[] conv1outputAll = new float[convolutionalLayer1.OutputNeurons.NumberOfUnits * inputNeurons.MiniBatchSize];
             * OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue,
             *                                          convolutionalLayer1.OutputNeurons.ActivationsGPU, // source
             *                                          Bool.True,
             *                                          (IntPtr)0,
             *                                          (IntPtr)(convolutionalLayer1.OutputNeurons.NumberOfUnits * inputNeurons.MiniBatchSize * sizeof(float)),
             *                                          conv1outputAll,  // destination
             *                                          0,
             *                                          null,
             *                                          out OpenCLSpace.ClEvent);
             * OpenCLSpace.CheckErr(OpenCLSpace.ClError, "NeuralNetwork.ForwardPass Cl.clEnqueueReadBuffer layerInput");
             *
             * Console.WriteLine("\nConvLayer1 output activations:");
             * for (int m = 0; m < inputNeurons.MiniBatchSize; m++)
             * {
             *  float[] layerOutput = new float[convolutionalLayer1.OutputNeurons.NumberOfUnits];
             *  Array.Copy(conv1outputAll, m * convolutionalLayer1.OutputNeurons.NumberOfUnits, layerOutput, 0, convolutionalLayer1.OutputNeurons.NumberOfUnits);
             *
             *  Console.WriteLine("\n --- Mini-batch item {0} -----", m);
             *  for (int j = 0; j < layerOutput.Length; j++)
             *      Console.Write("{0}  ", layerOutput[j]);
             *  Console.WriteLine();
             *  Console.ReadKey();
             * }
             */
            if (nonlinearityType == "ReLU")
            {
                nonlinearityReLU.FeedForward();
            }
            else if (nonlinearityType == "ELU")
            {
                nonlinearityELU.FeedForward();
            }


            /*
             * float[] nonlinOutputAll = new float[nonlinearity.OutputNeurons.NumberOfUnits * inputNeurons.MiniBatchSize];
             * OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue,
             *                                          nonlinearity.OutputNeurons.ActivationsGPU, // source
             *                                          Bool.True,
             *                                          (IntPtr)0,
             *                                          (IntPtr)(convolutionalLayer1.OutputNeurons.NumberOfUnits * inputNeurons.MiniBatchSize * sizeof(float)),
             *                                          nonlinOutputAll,  // destination
             *                                          0,
             *                                          null,
             *                                          out OpenCLSpace.ClEvent);
             * OpenCLSpace.CheckErr(OpenCLSpace.ClError, "NeuralNetwork.ForwardPass Cl.clEnqueueReadBuffer layerInput");
             *
             * Console.WriteLine("\nNonlinearity output activations:");
             * for (int m = 0; m < inputNeurons.MiniBatchSize; m++)
             * {
             *  float[] layerOutput = new float[nonlinearity.OutputNeurons.NumberOfUnits];
             *  Array.Copy(nonlinOutputAll, m * nonlinearity.OutputNeurons.NumberOfUnits, layerOutput, 0, nonlinearity.OutputNeurons.NumberOfUnits);
             *
             *  Console.WriteLine("\n --- Mini-batch item {0} -----", m);
             *  for (int j = 0; j < layerOutput.Length; j++)
             *      Console.Write("{0}  ", layerOutput[j]);
             *  Console.WriteLine();
             *  Console.ReadKey();
             * }
             */

            convolutionalLayer2.FeedForward();

            /*
             * float[] conv2outputAll = new float[convolutionalLayer2.OutputNeurons.NumberOfUnits * inputNeurons.MiniBatchSize];
             * OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue,
             *                                          convolutionalLayer2.OutputNeurons.ActivationsGPU, // source
             *                                          Bool.True,
             *                                          (IntPtr)0,
             *                                          (IntPtr)(convolutionalLayer2.OutputNeurons.NumberOfUnits * inputNeurons.MiniBatchSize * sizeof(float)),
             *                                          conv2outputAll,  // destination
             *                                          0,
             *                                          null,
             *                                          out OpenCLSpace.ClEvent);
             * OpenCLSpace.CheckErr(OpenCLSpace.ClError, "NeuralNetwork.ForwardPass Cl.clEnqueueReadBuffer layerInput");
             *
             * Console.WriteLine("\nConvLayer2 output activations:");
             * for (int m = 0; m < inputNeurons.MiniBatchSize; m++)
             * {
             *  float[] layerOutput = new float[convolutionalLayer2.OutputNeurons.NumberOfUnits];
             *  Array.Copy(conv2outputAll, m * convolutionalLayer2.OutputNeurons.NumberOfUnits, layerOutput, 0, convolutionalLayer2.OutputNeurons.NumberOfUnits);
             *
             *  Console.WriteLine("\n --- Mini-batch item {0} -----", m);
             *  for (int j = 0; j < layerOutput.Length; j++)
             *      Console.Write("{0}  ", layerOutput[j]);
             *  Console.WriteLine();
             *  Console.ReadKey();
             * }
             */

            // Additionally, cumulate inputs onto outputs

            OpenCLSpace.ClError  = Cl.SetKernelArg(OpenCLSpace.SkipForward, 0, outputNeurons.ActivationsGPU);
            OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.SkipForward, 1, inputNeurons.ActivationsGPU);
            OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.SkipForward, 2, (IntPtr)sizeof(int), nInputUnits);
            OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.SkipForward, 3, (IntPtr)sizeof(int), inputNeurons.MiniBatchSize);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.SetKernelArg");

            // Run kernel
            OpenCLSpace.ClError = Cl.EnqueueNDRangeKernel(OpenCLSpace.Queue,
                                                          OpenCLSpace.SkipForward,
                                                          1,
                                                          null,
                                                          globalWorkSizePtr,
                                                          localWorkSizePtr,
                                                          0,
                                                          null,
                                                          out OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.EnqueueNDRangeKernel");

            OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent");

            OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish");
        }
Ejemplo n.º 28
0
        public override void SetupOutput()
        {
            // Check arguments _______________________________________________________________________________________

            if (inputHeight != inputWidth)
            {
                throw new ArgumentException("MaxPooling currently only supports spatially square input.");
            }

            if (inputWidth % poolWidth != 0)
            {
                throw new ArgumentException("Cannot apply max pooling to input: pooling width and stride do not fit input width!");
            }


            // Setup output __________________________________________________________________________________________

            this.outputWidth  = (inputWidth - poolWidth) / stride + 1;
            this.outputHeight = (inputHeight - poolWidth) / stride + 1;
            this.outputDepth  = inputDepth;

            this.nOutputUnits  = outputWidth * outputHeight * outputDepth;
            this.outputNeurons = new Neurons(nOutputUnits);

            // Initialize and create auxiliary structures ____________________________________________________________
#if OPENCL_ENABLED
            // Pooling table

            this.poolingTableGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context,
                                                        MemFlags.ReadWrite,
                                                        (IntPtr)(sizeof(int) * 4 * outputHeight * outputWidth),
                                                        out OpenCLSpace.ClError);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.CreateBuffer poolingTableGPU");
            OpenCLSpace.WipeBuffer(poolingTableGPU, 4 * outputHeight * outputWidth, typeof(int));

            OpenCLSpace.ClError  = Cl.SetKernelArg(OpenCLSpace.CreateMaxPoolingTable, 0, poolingTableGPU);
            OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.CreateMaxPoolingTable, 1, (IntPtr)sizeof(int), stride);
            OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.CreateMaxPoolingTable, 2, (IntPtr)sizeof(int), inputWidth);
            OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.CreateMaxPoolingTable, 3, (IntPtr)sizeof(int), outputWidth);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.SetKernelArg CreatePoolingTable");

            OpenCLSpace.ClError = Cl.EnqueueNDRangeKernel(OpenCLSpace.Queue,
                                                          OpenCLSpace.CreateMaxPoolingTable,
                                                          1,
                                                          null,
                                                          new IntPtr[] { (IntPtr)(32 * Math.Ceiling((double)(nOutputUnits * inputNeurons.MiniBatchSize) / (double)32)) },
                                                          new IntPtr[] { (IntPtr)32 },
                                                          0,
                                                          null,
                                                          out OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.EnqueueNDRangeKernel CreatePoolingTable");

            OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent");


            // Switches

            this.switchesGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context,
                                                    MemFlags.ReadWrite,
                                                    (IntPtr)(sizeof(bool) * nInputUnits * inputNeurons.MiniBatchSize),
                                                    out OpenCLSpace.ClError);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.CreateBuffer switchesGPU");
            OpenCLSpace.WipeBuffer(switchesGPU, nInputUnits * inputNeurons.MiniBatchSize, typeof(bool));
#else
            //TODO: create poolingTable and switches on cpu
#endif
        }
Ejemplo n.º 29
0
        public void FeedData(DataSet dataSet, int[] iExamples)
        {
#if TIMING_LAYERS
            Utils.InputFeedTimer.Start();
#endif
            int dataPointSize = dataSet.DataDimension;

            for (int m = 0; m < outputNeurons.MiniBatchSize; m++)
            {
#if OPENCL_ENABLED
                int iDataPoint = iExamples[m];

                OpenCLSpace.ClError = Cl.EnqueueCopyBuffer(OpenCLSpace.Queue,
                                                           dataSet.DataContainer[iDataPoint].Data,      // source
                                                           outputNeurons.ActivationsGPU,                // destination
                                                           (IntPtr)0,                                   // source offset (in bytes)
                                                           (IntPtr)(sizeof(float) * m * dataPointSize), // destination offset (in bytes)
                                                           (IntPtr)(sizeof(float) * dataPointSize),     // size of buffer to copy
                                                           0,
                                                           null,
                                                           out OpenCLSpace.ClEvent);
                OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InputLayer.FeedData Cl.EnqueueCopyBuffer inputData");

                OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent);
                OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent");

                // Dropout!

                if (dropoutParameter < 1.0)
                {
                    // Set kernel arguments
                    OpenCLSpace.ClError  = Cl.SetKernelArg(OpenCLSpace.InputDropout, 0, outputNeurons.ActivationsGPU);
                    OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.InputDropout, 1, (IntPtr)sizeof(int), nOutputUnits * outputNeurons.MiniBatchSize);
                    OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.InputDropout, 2, (IntPtr)sizeof(float), (float)dropoutParameter);
                    OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.InputDropout, 3, (IntPtr)sizeof(ulong), (ulong)Guid.NewGuid().GetHashCode());
                    OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InputDropout: Cl.SetKernelArg");

                    // Run kernel
                    OpenCLSpace.ClError = Cl.EnqueueNDRangeKernel(OpenCLSpace.Queue,
                                                                  OpenCLSpace.InputDropout,
                                                                  1,
                                                                  null,
                                                                  dropoutGlobalWorkSizePtr,
                                                                  dropoutLocalWorkSizePtr,
                                                                  0,
                                                                  null,
                                                                  out OpenCLSpace.ClEvent);
                    OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InputDropout: Cl.EnqueueNDRangeKernel");

                    OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent);
                    OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent");

                    OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue);
                    OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish");
                }
#else
                outputNeurons.SetHost(m, dataSet.Data[iExamples[m]]);
#endif
            }

#if OPENCL_ENABLED
            OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish");
#endif

#if TIMING_LAYERS
            Utils.InputFeedTimer.Stop();
#endif
        }
Ejemplo n.º 30
0
        public override void UpdateParameters(double weightDecayCoeff)
        {
#if TIMING_LAYERS
            Utils.BNFCUpdateParametersTimer.Start();
#endif

            OpenCLSpace.ClError  = Cl.SetKernelArg(OpenCLSpace.BNFCUpdateParameters, 0, gammaGPU);
            OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNFCUpdateParameters, 1, betaGPU);
            OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNFCUpdateParameters, 2, gammaSpeedGPU);
            OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNFCUpdateParameters, 3, betaSpeedGPU);
            OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNFCUpdateParameters, 4, (IntPtr)sizeof(int), nInputUnits);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.SetKernelArg");

            OpenCLSpace.ClError = Cl.EnqueueNDRangeKernel(OpenCLSpace.Queue,
                                                          OpenCLSpace.BNFCUpdateParameters,
                                                          1,
                                                          null,
                                                          nUnitsGlobalWorkSizePtr,
                                                          optimalLocalWorkSizePtr,
                                                          0,
                                                          null,
                                                          out OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.EnqueueNDRangeKernel");

            OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent");

            OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue);
            OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish");


            /* ------------------------- DEBUGGING ---------------------------------------------
             *
             * // Display gamma
             * float[] gamma = new float[nInputUnits];
             *
             * OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue,
             *                                          gammaGPU, // source
             *                                          Bool.True,
             *                                          (IntPtr)0,
             *                                          (IntPtr)(nInputUnits * sizeof(float)),
             *                                          gamma,  // destination
             *                                          0,
             *                                          null,
             *                                          out OpenCLSpace.ClEvent);
             * OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.clEnqueueReadBuffer");
             *
             * Console.WriteLine("\n\nUpdated gammas are:\n");
             * for (int i = 0; i < nInputUnits; i++)
             *  Console.Write("{0}  ", gamma[i]);
             * //Console.ReadKey();
             *
             * // Display beta
             * float[] beta = new float[nInputUnits];
             *
             * OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue,
             *                                          betaGPU, // source
             *                                          Bool.True,
             *                                          (IntPtr)0,
             *                                          (IntPtr)(nInputUnits * sizeof(float)),
             *                                          beta,  // destination
             *                                          0,
             *                                          null,
             *                                          out OpenCLSpace.ClEvent);
             * OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.clEnqueueReadBuffer");
             *
             * Console.WriteLine("\n\nUpdated betas are:\n");
             * for (int i = 0; i < nInputUnits; i++)
             *  Console.Write("{0}  ", beta[i]);
             * Console.ReadKey();
             *
             *
             * /* ------------------------- END DEBUGGING --------------------------------------------- */

#if TIMING_LAYERS
            Utils.BNFCUpdateParametersTimer.Stop();
#endif
        }