public void SetupBuffers(int MiniBatchSize) { this.miniBatchSize = MiniBatchSize; #if OPENCL_ENABLED this.activationsGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite | MemFlags.AllocHostPtr, (IntPtr)(sizeof(float) * NumberOfUnits * MiniBatchSize), out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.CreateBuffer Neurons.activationsGPU"); OpenCLSpace.WipeBuffer(activationsGPU, NumberOfUnits * MiniBatchSize, typeof(float)); this.deltaGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite | MemFlags.AllocHostPtr, (IntPtr)(sizeof(float) * NumberOfUnits * MiniBatchSize), out OpenCLSpace.ClError); OpenCLSpace.WipeBuffer(activationsGPU, NumberOfUnits * MiniBatchSize, typeof(float)); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.CreateBuffer Neurons.deltaGPU"); #else for (int m = 0; m < MiniBatchSize; m++) { this.activations.Add(new double[nUnits]); this.delta.Add(new double[nUnits]); } #endif }
public override void FeedForward() { #if TIMING_LAYERS Utils.FCForwardTimer.Start(); #endif #if OPENCL_ENABLED // Set kernel arguments OpenCLSpace.ClError = Cl.SetKernelArg(OpenCLSpace.FCForward, 0, outputNeurons.ActivationsGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.FCForward, 1, inputNeurons.ActivationsGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.FCForward, 2, weightsGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.FCForward, 3, biasesGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.FCForward, 4, (IntPtr)sizeof(int), nInputUnits); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.FCForward, 5, (IntPtr)sizeof(int), nOutputUnits); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.FCForward, 6, (IntPtr)sizeof(int), inputNeurons.MiniBatchSize); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.FCForward, 7, (IntPtr)sizeof(float), (float)dropoutParameter); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.FCForward, 8, (IntPtr)sizeof(ulong), (ulong)Guid.NewGuid().GetHashCode()); // this should be quite a good random seed OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.FCForward, 9, dropoutMaskGPU); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "FullyConnected.FeedForward(): Cl.SetKernelArg"); // Run kernel OpenCLSpace.ClError = Cl.EnqueueNDRangeKernel(OpenCLSpace.Queue, OpenCLSpace.FCForward, 2, null, forwardGlobalWorkSizePtr, forwardLocalWorkSizePtr, 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "FullyConnected.FeedForward(): Cl.EnqueueNDRangeKernel"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish"); #else // TODO: add dropout CPU // Generate dropout mask if (dropoutParameter < 1) { for (int iUnit = 0; iUnit < nOutputUnits * inputNeurons.MiniBatchSize; ++iUnit) { dropoutMask[iUnit] = Global.RandomDouble() < dropoutParameter; } } for (int m = 0; m < inputNeurons.MiniBatchSize; m++) { double[] unbiasedOutput = Utils.MultiplyMatrixByVector(weights, inputNeurons.GetHost()[m]); this.outputNeurons.SetHost(m, unbiasedOutput.Zip(biases, (x, y) => x + y).ToArray()); } #endif #if TIMING_LAYERS Utils.FCForwardTimer.Stop(); #endif }
public override void BackPropagate() { // Set kernel arguments OpenCLSpace.ClError = Cl.SetKernelArg(OpenCLSpace.TanhBackward, 0, inputNeurons.DeltaGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.TanhBackward, 1, outputNeurons.DeltaGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.TanhBackward, 2, outputNeurons.ActivationsGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.TanhBackward, 3, (IntPtr)sizeof(float), beta); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.TanhBackward, 4, (IntPtr)sizeof(int), nInputUnits); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.TanhBackward, 5, (IntPtr)sizeof(int), inputNeurons.MiniBatchSize); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Tanh.BackPropagate(): Cl.SetKernelArg"); // Run kernel OpenCLSpace.ClError = Cl.EnqueueNDRangeKernel(OpenCLSpace.Queue, OpenCLSpace.TanhBackward, 1, null, globalWorkSizePtr, localWorkSizePtr, 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Tanh.BackPropagate(): Cl.EnqueueNDRangeKernel"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish"); }
public virtual double[] GetInputGradients() { int inputArraySize = nInputUnits * inputNeurons.MiniBatchSize; double[] inputGradients = new double[inputArraySize]; // Copy device buffer to host float[] tmpInputGradients = new float[inputArraySize]; OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue, inputNeurons.DeltaGPU, // source Bool.True, (IntPtr)0, (IntPtr)(sizeof(float) * inputArraySize), tmpInputGradients, // destination 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "clEnqueueReadBuffer"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish"); // Convert to double and write into public fields for (int i = 0; i < inputArraySize; ++i) { inputGradients[i] = (double)tmpInputGradients[i]; } return(inputGradients); }
public override void FeedForward() { #if TIMING_LAYERS // TODO: add timer #endif OpenCLSpace.ClError = Cl.SetKernelArg(OpenCLSpace.AveragePoolingForward, 0, outputNeurons.ActivationsGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.AveragePoolingForward, 1, inputNeurons.ActivationsGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.AveragePoolingForward, 2, (IntPtr)sizeof(int), nInputUnits); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.AveragePoolingForward, 3, (IntPtr)sizeof(int), inputArea); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.AveragePoolingForward, 4, (IntPtr)sizeof(int), inputDepth); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.AveragePoolingForward, 5, (IntPtr)sizeof(int), inputNeurons.MiniBatchSize); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.SetKernelArg"); OpenCLSpace.ClError = Cl.EnqueueNDRangeKernel(OpenCLSpace.Queue, OpenCLSpace.AveragePoolingForward, 2, null, fwdGlobalWorkSizePtr, fwdLocalWorkSizePtr, 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.EnqueueNDRangeKernel"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish"); #if TIMING_LAYERS // TODO: add timer #endif }
public virtual void SetInput(double[] NewInput) { // Convert to float and write into tmp arrays int inputArraySize = nInputUnits * inputNeurons.MiniBatchSize; float[] tmpInput = new float[inputArraySize]; for (int i = 0; i < inputArraySize; ++i) { tmpInput[i] = (float)NewInput[i]; } // Write arrays into buffers on device OpenCLSpace.ClError = Cl.EnqueueWriteBuffer(OpenCLSpace.Queue, inputNeurons.ActivationsGPU, OpenCL.Net.Bool.True, (IntPtr)0, (IntPtr)(sizeof(float) * inputArraySize), tmpInput, 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.EnqueueWriteBuffer"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish"); }
public override void CopyBuffersToHost() { OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue, gammaGPU, // source Bool.True, (IntPtr)0, (IntPtr)(sizeof(float) * inputDepth), gammaHost, // destination 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "clEnqueueReadBuffer gammaGPU"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue, betaGPU, // source Bool.True, (IntPtr)0, (IntPtr)(sizeof(float) * inputDepth), betaHost, // destination 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "clEnqueueReadBuffer betaGPU"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish"); // Speeds are not saved. }
public override void BackPropagate() { // Errors have already been backpropagated to input of first convolutional layer (see method UpdateSpeeds) // Now just cumulate the gradients coming from the skip connection OpenCLSpace.ClError = Cl.SetKernelArg(OpenCLSpace.SkipBackward, 0, inputNeurons.DeltaGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.SkipBackward, 1, outputNeurons.DeltaGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.SkipBackward, 2, (IntPtr)sizeof(int), nInputUnits); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.SkipBackward, 3, (IntPtr)sizeof(int), inputNeurons.MiniBatchSize); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.SetKernelArg"); // Run kernel OpenCLSpace.ClError = Cl.EnqueueNDRangeKernel(OpenCLSpace.Queue, OpenCLSpace.SkipBackward, 1, null, globalWorkSizePtr, localWorkSizePtr, 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.EnqueueNDRangeKernel"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish"); }
public override void SetupOutput() { this.outputWidth = inputWidth; this.outputHeight = inputHeight; this.outputDepth = inputDepth; this.inputArea = inputHeight * inputWidth; this.nOutputUnits = nInputUnits; this.outputNeurons = new Neurons(nOutputUnits); // Initialize OpenCL buffers // 1. mean, variance and their cumulative averages this.meanGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite, (IntPtr)(sizeof(float) * inputDepth), out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer"); OpenCLSpace.WipeBuffer(meanGPU, inputDepth, typeof(float)); this.varianceGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite, (IntPtr)(sizeof(float) * inputDepth), out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer"); OpenCLSpace.WipeBuffer(varianceGPU, inputDepth, typeof(float)); // (Initialize cumulative means to zero...) this.cumulativeMeanGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite, (IntPtr)(sizeof(float) * inputDepth), out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer"); OpenCLSpace.WipeBuffer(cumulativeMeanGPU, inputDepth, typeof(float)); // (...and variances to one.) float[] ones = new float[inputDepth]; for (int i = 0; i < inputDepth; ++i) { ones[i] = 1.0f; } this.cumulativeVarianceGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite | MemFlags.CopyHostPtr, (IntPtr)(sizeof(float) * inputDepth), ones, out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer"); // OpenCL buffer for normalized input values (needed for backprop) this.normalizedInputGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite, (IntPtr)(sizeof(float) * nInputUnits * inputNeurons.MiniBatchSize), out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer"); OpenCLSpace.WipeBuffer(normalizedInputGPU, nInputUnits * inputNeurons.MiniBatchSize, typeof(float)); }
/// <summary> /// Run network backwards, propagating the gradient backwards and also updating parameters. /// Requires that gradient has ALREADY BEEN WRITTEN in network.Layers[nLayers-1].InputNeurons.Delta /// </summary> public void BackwardPass(double learningRate, double momentumMultiplier, double weightDecayCoeff, double weightMaxNorm) { for (int l = nLayers - 2; l > 0; l--) // propagate error signal backwards (layers L-2 to 1, i.e. second last to second) { // 1. Update layer's parameters' change speed using gradient layers[l].UpdateSpeeds(learningRate, momentumMultiplier, weightDecayCoeff); // 2. Backpropagate errors to previous layer (no need to do it for layer 1) if (l > 1) { layers[l].BackPropagate(); } #if DEBUGGING_STEPBYSTEP /* ------------------------- DEBUGGING --------------------------------------------- */ // Display input delta layer-by-layer int miniBatchSize = layers[0].OutputNeurons.MiniBatchSize; #if OPENCL_ENABLED float[] deltaInputAll = new float[layers[l].InputNeurons.NumberOfUnits * miniBatchSize]; OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue, layers[l].InputNeurons.DeltaGPU, // source Bool.True, (IntPtr)0, (IntPtr)(layers[l].InputNeurons.NumberOfUnits * miniBatchSize * sizeof(float)), deltaInputAll, // destination 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "NeuralNetwork.ForwardPass Cl.clEnqueueReadBuffer deltaInputAll"); #endif Console.WriteLine("\nLayer {0} ({1}) backpropagated delta:", l, layers[l].Type); for (int m = 0; m < miniBatchSize; m++) { float[] deltaInput = new float[layers[l].InputNeurons.NumberOfUnits]; Array.Copy(deltaInputAll, m * layers[l].InputNeurons.NumberOfUnits, deltaInput, 0, layers[l].InputNeurons.NumberOfUnits); Console.WriteLine("\n --- Mini-batch item {0} -----", m); for (int j = 0; j < deltaInput.Length; j++) { Console.Write("{0} ", deltaInput[j]); } Console.WriteLine(); Console.ReadKey(); } /* ------------------------- END DEBUGGING --------------------------------------------- */ #endif // 3. Update layer's parameters layers[l].UpdateParameters(weightMaxNorm); } }
public override double[] GetParameterGradients() { int nParameters = nInputUnits * nOutputUnits + nOutputUnits; double[] parameterGradients = new double[nParameters]; // Copy weights and biases gradients buffers to host float[] tmpWeightsGrad = new float[nInputUnits * nOutputUnits]; OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue, weightsGradientsGPU, // source Bool.True, (IntPtr)0, (IntPtr)(sizeof(float) * nInputUnits * nOutputUnits), tmpWeightsGrad, // destination 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "clEnqueueReadBuffer"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); float[] tmpBiasesGrad = new float[nOutputUnits]; OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue, biasesGradientsGPU, // source Bool.True, (IntPtr)0, (IntPtr)(sizeof(float) * nOutputUnits), tmpBiasesGrad, // destination 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "clEnqueueReadBuffer"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish"); // Convert to double and write into parameterGradients //Console.WriteLine("Weight gradients:\n"); for (int i = 0; i < nInputUnits * nOutputUnits; ++i) { parameterGradients[i] = (double)tmpWeightsGrad[i]; //Console.Write(" {0}", tmpWeightsGrad[i]); } //Console.ReadKey(); for (int i = 0; i < nOutputUnits; ++i) { parameterGradients[nInputUnits * nOutputUnits + i] = (double)tmpBiasesGrad[i]; } return(parameterGradients); }
public override void FeedForward() { #if TIMING_LAYERS Utils.NonlinearityForwardTimer.Start(); #endif #if OPENCL_ENABLED // Set kernel arguments OpenCLSpace.ClError = Cl.SetKernelArg(OpenCLSpace.ReLUForward, 0, OutputNeurons.ActivationsGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.ReLUForward, 1, InputNeurons.ActivationsGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.ReLUForward, 2, (IntPtr)sizeof(int), OutputNeurons.NumberOfUnits * inputNeurons.MiniBatchSize); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "ReLU.FeedForward(): Cl.SetKernelArg"); // Run kernel OpenCLSpace.ClError = Cl.EnqueueNDRangeKernel(OpenCLSpace.Queue, OpenCLSpace.ReLUForward, 1, null, globalWorkSizePtr, localWorkSizePtr, 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "ReLU.FeedForward(): Cl.EnqueueNDRangeKernel"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish"); #else for (int m = 0; m < inputNeurons.MiniBatchSize; m++) { double[] tmpOutput = new double[this.nOutputUnits]; for (int i = 0; i < this.nOutputUnits; i++) { if (this.inputNeurons.GetHost()[m][i] > 0) { tmpOutput[i] = this.inputNeurons.GetHost()[m][i]; } else { tmpOutput[i] = 0.0; } } this.outputNeurons.SetHost(m, tmpOutput); } #endif #if TIMING_LAYERS Utils.NonlinearityForwardTimer.Stop(); #endif }
public void ReadImage(Image input, int label) { unsafe { using (Bitmap bmp = new Bitmap(input)) { int offSet = bmp.Width * bmp.Height; DataDimension = offSet * 3; #if OPENCL_ENABLED float[] dataPoint = new float[DataDimension]; #else double[] dataPoint = new double[DataDimension]; #endif #region Copy RGB values directly from memory to the array BitmapData bitmapData = bmp.LockBits(new Rectangle(0, 0, bmp.Width, bmp.Height), ImageLockMode.ReadOnly, bmp.PixelFormat); int bytesPerPixel = Image.GetPixelFormatSize(bmp.PixelFormat) / 8; int heightInPixels = bitmapData.Height; int widthInBytes = bitmapData.Width * bytesPerPixel; byte * ptrFirstPixel = (byte *)bitmapData.Scan0; int index = 0; for (int y = 0; y < heightInPixels; y++) { byte *currentLine = ptrFirstPixel + (y * bitmapData.Stride); for (int x = 0; x < widthInBytes; x = x + bytesPerPixel) { dataPoint[index] = currentLine[x + 2]; // Red dataPoint[index + offSet] = currentLine[x + 1]; // Green dataPoint[index + offSet + offSet] = currentLine[x]; // Blue index++; } } bmp.UnlockBits(bitmapData); #endregion #if OPENCL_ENABLED int datumBytesSize = sizeof(float) * dataPoint.Length; Mem tmpBuffer = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadOnly | MemFlags.CopyHostPtr | MemFlags.AllocHostPtr, (IntPtr)datumBytesSize, dataPoint, out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "DataSet(): Cl.CreateBuffer tmpBuffer"); DataContainer.Add(new DataItem(tmpBuffer, label)); #else DataContainer.Add(new DataItem(dataPoint, label)); #endif } } }
public static void WipeBuffer(Mem buffer, int nElementsInBuffer, Type type) { Kernel WipeKernel; if (type == typeof(float)) { WipeKernel = WipeBufferFloatKernel; } else if (type == typeof(int)) { WipeKernel = WipeBufferIntKernel; } else if (type == typeof(bool)) { WipeKernel = WipeBufferBoolKernel; } else { throw new ArgumentException("Type not supported. Use either float, int, or bool."); } // Set kernel arguments OpenCLSpace.ClError = Cl.SetKernelArg(WipeKernel, 0, buffer); OpenCLSpace.ClError |= Cl.SetKernelArg(WipeKernel, 1, (IntPtr)sizeof(int), nElementsInBuffer); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.SetKernelArg WipeBufferKernel"); // Work sizes IntPtr[] localWorkSizePtr = { (IntPtr)OPTIMAL_GROUP_SIZE }; IntPtr[] globalWorkSizePtr = { (IntPtr)(OPTIMAL_GROUP_SIZE * Math.Ceiling((double)(nElementsInBuffer) / (double)OPTIMAL_GROUP_SIZE)) }; // Run kernel ClError = Cl.EnqueueNDRangeKernel(queue, WipeKernel, 1, null, globalWorkSizePtr, localWorkSizePtr, 0, null, out ClEvent); CheckErr(ClError, "Cl.EnqueueNDRangeKernel ZeroUnpadBatch"); ClError = Cl.ReleaseEvent(ClEvent); CheckErr(ClError, "Cl.ReleaseEvent"); ClError = Cl.Finish(queue); CheckErr(ClError, "Cl.Finish"); //Cl.ReleaseKernel(WipeKernel); }
public override void BackPropagate() { #if TIMING_LAYERS Utils.NonlinearityBackpropTimer.Start(); #endif #if OPENCL_ENABLED // Set kernel arguments OpenCLSpace.ClError = Cl.SetKernelArg(OpenCLSpace.ELUBackward, 0, inputNeurons.DeltaGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.ELUBackward, 1, outputNeurons.DeltaGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.ELUBackward, 2, inputNeurons.ActivationsGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.ELUBackward, 3, (IntPtr)sizeof(float), alpha); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.ELUBackward, 4, (IntPtr)sizeof(int), nInputUnits * inputNeurons.MiniBatchSize); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "ELU.BackPropagate(): Cl.SetKernelArg"); // Run kernel OpenCLSpace.ClError = Cl.EnqueueNDRangeKernel(OpenCLSpace.Queue, OpenCLSpace.ELUBackward, 1, null, globalWorkSizePtr, localWorkSizePtr, 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "ELU.BackPropagate(): Cl.EnqueueNDRangeKernel"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish"); #else throw new NotImplementedException("CPU code for ELUs not implemented yet."); for (int m = 0; m < inputNeurons.MiniBatchSize; m++) { for (int i = 0; i < nOutputUnits; i++) //inputNeurons.DeltaHost[m][i] = inputNeurons.GetHost()[m][i] > 0 ? outputNeurons.DeltaHost[m][i] : 0.0; } #endif #if TIMING_LAYERS Utils.NonlinearityBackpropTimer.Stop(); #endif }
public override double[] GetParameterGradients() { double[] parameterGradients = new double[2 * nInputUnits]; // Copy gamma and beta gradients buffers to host float[] tmpGammaGrad = new float[nInputUnits]; OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue, deltaGammaGPU, // source Bool.True, (IntPtr)0, (IntPtr)(sizeof(float) * nInputUnits), tmpGammaGrad, // destination 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "clEnqueueReadBuffer"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); float[] tmpBetaGrad = new float[nInputUnits]; OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue, deltaBetaGPU, // source Bool.True, (IntPtr)0, (IntPtr)(sizeof(float) * nInputUnits), tmpBetaGrad, // destination 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "clEnqueueReadBuffer"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish"); // Convert to double and write into public fields for (int i = 0; i < nInputUnits; ++i) { parameterGradients[i] = (double)tmpGammaGrad[i]; parameterGradients[nInputUnits + i] = (double)tmpBetaGrad[i]; } return(parameterGradients); }
public override void SetParameters(double[] NewParameters) { // Convert to float and write into tmp arrays float[] tmpWeights = new float[nInputUnits * nOutputUnits]; float[] tmpBiases = new float[nOutputUnits]; for (int i = 0; i < nInputUnits * nOutputUnits; ++i) { tmpWeights[i] = (float)NewParameters[i]; } for (int i = 0; i < nOutputUnits; ++i) { tmpBiases[i] = (float)NewParameters[nInputUnits * nOutputUnits + i]; } // Write arrays into buffers on device OpenCLSpace.ClError = Cl.EnqueueWriteBuffer(OpenCLSpace.Queue, weightsGPU, OpenCL.Net.Bool.True, (IntPtr)0, (IntPtr)(sizeof(float) * nInputUnits * nOutputUnits), tmpWeights, 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.EnqueueWriteBuffer"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); OpenCLSpace.ClError = Cl.EnqueueWriteBuffer(OpenCLSpace.Queue, biasesGPU, OpenCL.Net.Bool.True, (IntPtr)0, (IntPtr)(sizeof(float) * nOutputUnits), tmpBiases, 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.EnqueueWriteBuffer"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish"); }
public override void SetupOutput() { this.outputWidth = inputWidth; this.outputHeight = inputHeight; this.outputDepth = inputDepth; this.nOutputUnits = nInputUnits; this.outputNeurons = new Neurons(nOutputUnits); // Also initialize OpenCL buffers for mean, variance, their cumulative averages, and normalized input activations this.meanGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite, (IntPtr)(sizeof(float) * nInputUnits), out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer"); OpenCLSpace.WipeBuffer(meanGPU, nInputUnits, typeof(float)); this.varianceGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite, (IntPtr)(sizeof(float) * nInputUnits), out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer"); OpenCLSpace.WipeBuffer(varianceGPU, nInputUnits, typeof(float)); this.cumulativeMeanGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite, (IntPtr)(sizeof(float) * nInputUnits), out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer"); OpenCLSpace.WipeBuffer(cumulativeMeanGPU, nInputUnits, typeof(float)); this.cumulativeVarianceGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite, (IntPtr)(sizeof(float) * nInputUnits), out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer"); OpenCLSpace.WipeBuffer(cumulativeVarianceGPU, nInputUnits, typeof(float)); this.normalizedInputGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite, (IntPtr)(sizeof(float) * nInputUnits * inputNeurons.MiniBatchSize), out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer"); OpenCLSpace.WipeBuffer(normalizedInputGPU, nInputUnits * inputNeurons.MiniBatchSize, typeof(float)); }
public override void SetupOutput() { this.outputDepth = nOutputUnits; this.outputHeight = 1; this.outputWidth = 1; this.outputNeurons = new Neurons(this.nOutputUnits); #if OPENCL_ENABLED this.dropoutMaskGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite, (IntPtr)(sizeof(bool) * nOutputUnits * inputNeurons.MiniBatchSize), out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer"); OpenCLSpace.WipeBuffer(dropoutMaskGPU, nOutputUnits * inputNeurons.MiniBatchSize, typeof(bool)); #endif }
public override void BackPropagate() { #if TIMING_LAYERS Utils.FCBackpropTimer.Start(); #endif #if OPENCL_ENABLED // Set kernel arguments OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.FCBackward, 0, inputNeurons.DeltaGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.FCBackward, 1, outputNeurons.DeltaGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.FCBackward, 2, weightsGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.FCBackward, 3, dropoutMaskGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.FCBackward, 4, (IntPtr)sizeof(int), nInputUnits); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.FCBackward, 5, (IntPtr)sizeof(int), nOutputUnits); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.FCBackward, 6, (IntPtr)sizeof(int), inputNeurons.MiniBatchSize); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "FullyConnected.BackPropagate(): Cl.SetKernelArg"); // Run kernel OpenCLSpace.ClError = Cl.EnqueueNDRangeKernel(OpenCLSpace.Queue, OpenCLSpace.FCBackward, 2, null, backwardGlobalWorkSizePtr, backwardLocalWorkSizePtr, 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "FullyConnected.BackPropagate(): Cl.EnqueueNDRangeKernel"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish"); #else for (int m = 0; m < inputNeurons.MiniBatchSize; m++) { inputNeurons.DeltaHost[m] = Utils.MultiplyMatrixTranspByVector(weights, outputNeurons.DeltaHost[m]); } #endif #if TIMING_LAYERS Utils.FCBackpropTimer.Stop(); #endif }
public override void SetParameters(double[] NewParameters) { // Convert to float and write into tmp arrays float[] tmpGamma = new float[inputDepth]; float[] tmpBeta = new float[inputDepth]; for (int i = 0; i < inputDepth; ++i) { tmpGamma[i] = (float)NewParameters[i]; tmpBeta[i] = (float)NewParameters[inputDepth + i]; } // Wirte arrays into buffers on device OpenCLSpace.ClError = Cl.EnqueueWriteBuffer(OpenCLSpace.Queue, gammaGPU, OpenCL.Net.Bool.True, (IntPtr)0, (IntPtr)(sizeof(float) * inputDepth), tmpGamma, 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.EnqueueWriteBuffer"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); OpenCLSpace.ClError = Cl.EnqueueWriteBuffer(OpenCLSpace.Queue, betaGPU, OpenCL.Net.Bool.True, (IntPtr)0, (IntPtr)(sizeof(float) * inputDepth), tmpBeta, 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.EnqueueWriteBuffer"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish"); }
public void ReadData(string dataPath, string labelsPath) { string[] dataArray = File.ReadAllLines(dataPath); string[] labelsArray = File.ReadAllLines(labelsPath); if (dataArray.Length != labelsArray.Length) { throw new Exception("The amount of data does not match the amount of labels"); } // Read images and their labels for (int index = 0; index < dataArray.Length; index++) { string[] columns = dataArray[index].Split('\t'); DataDimension = columns.Length; #if OPENCL_ENABLED float[] dataPoint = new float[columns.Length]; for (int i = 0; i < columns.Length; i++) { dataPoint[i] = float.Parse(columns[i], CultureInfo.InvariantCulture.NumberFormat); } int datumBytesSize = sizeof(float) * dataPoint.Length; Mem tmpBuffer = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadOnly | MemFlags.CopyHostPtr | MemFlags.AllocHostPtr, (IntPtr)datumBytesSize, dataPoint, out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "DataSet(): Cl.CreateBuffer tmpBuffer"); #else double[] tmpBuffer = new double[columns.Length]; for (int i = 0; i < columns.Length; i++) { tmpBuffer[i] = double.Parse(columns[i], CultureInfo.InvariantCulture.NumberFormat); } #endif DataContainer.Add(new DataItem(tmpBuffer, Convert.ToInt32(labelsArray[index]))); } }
public override void BackPropagate() { #if TIMING_LAYERS Utils.PoolingBackpropTimer.Start(); #endif #if OPENCL_ENABLED OpenCLSpace.ClError = Cl.SetKernelArg(OpenCLSpace.MaxPoolingBackward, 0, inputNeurons.DeltaGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.MaxPoolingBackward, 1, outputNeurons.DeltaGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.MaxPoolingBackward, 2, switchesGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.MaxPoolingBackward, 3, poolingTableGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.MaxPoolingBackward, 4, (IntPtr)sizeof(int), nInputUnits); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.MaxPoolingBackward, 5, (IntPtr)sizeof(int), inputWidth * inputWidth); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.MaxPoolingBackward, 6, (IntPtr)sizeof(int), nOutputUnits); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.MaxPoolingBackward, 7, (IntPtr)sizeof(int), outputWidth * outputWidth); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.MaxPoolingBackward, 8, (IntPtr)sizeof(int), inputNeurons.MiniBatchSize); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.SetKernelArg PoolingBackward"); OpenCLSpace.ClError = Cl.EnqueueNDRangeKernel(OpenCLSpace.Queue, OpenCLSpace.MaxPoolingBackward, 1, null, globalWorkSizePtr, localWorkSizePtr, 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.EnqueueNDRangeKernel PoolingBackward"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish"); #else //TODO: CPU code #endif #if TIMING_LAYERS Utils.PoolingBackpropTimer.Stop(); #endif }
public void CrossEntropyGradient(DataSet DataSet, int[] iMiniBatch) { float[] crossEntropyGradientBatch = new float[iMiniBatch.Length * DataSet.NumberOfClasses]; int nClasses = DataSet.NumberOfClasses; for (int m = 0; m < iMiniBatch.Length; m++) { int iDataPoint = iMiniBatch[m]; int trueLabel = DataSet.DataContainer[iDataPoint].Label; double[] crossEntropyGradient = new double[nClasses]; Array.Copy(outputLayer.OutputClassScores[m], crossEntropyGradient, nClasses); crossEntropyGradient[trueLabel] -= 1.0; for (int c = 0; c < nClasses; c++) { crossEntropyGradientBatch[m * DataSet.NumberOfClasses + c] = (float)crossEntropyGradient[c]; } } // now write gradient to input neurons of softmax layer (i.e. to output neurons of classifier) OpenCLSpace.ClError = Cl.EnqueueWriteBuffer(OpenCLSpace.Queue, layers.Last().InputNeurons.DeltaGPU, Bool.True, (IntPtr)0, (IntPtr)(sizeof(float) * crossEntropyGradientBatch.Length), crossEntropyGradientBatch, 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "NetworkTrainer.CrossEntropyGradient(): Cl.EnqueueWriteBuffer"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish"); }
public override void BackPropagate() { #if TIMING_LAYERS Utils.BNConvBackpropTimer.Start(); #endif OpenCLSpace.ClError = Cl.SetKernelArg(OpenCLSpace.BNConvBackPropagate, 0, inputNeurons.DeltaGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNConvBackPropagate, 1, outputNeurons.DeltaGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNConvBackPropagate, 2, normalizedInputGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNConvBackPropagate, 3, gammaGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNConvBackPropagate, 4, varianceGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNConvBackPropagate, 5, deltaGammaGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNConvBackPropagate, 6, deltaBetaGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNConvBackPropagate, 7, (IntPtr)sizeof(int), inputArea); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNConvBackPropagate, 8, (IntPtr)sizeof(int), nInputUnits); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNConvBackPropagate, 9, (IntPtr)sizeof(int), inputNeurons.MiniBatchSize); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.SetKernelArg"); OpenCLSpace.ClError = Cl.EnqueueNDRangeKernel(OpenCLSpace.Queue, OpenCLSpace.BNConvBackPropagate, 1, null, nActivationsGlobalWorkSizePtr, optimalLocalWorkSizePtr, 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.EnqueueNDRangeKernel"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish"); #if TIMING_LAYERS Utils.BNConvBackpropTimer.Stop(); #endif }
public override void CopyBuffersToHost() { OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue, weightsGPU, // source Bool.True, (IntPtr)0, (IntPtr)(sizeof(float) * nInputUnits * nOutputUnits), weightsHost, // destination 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "clEnqueueReadBuffer weightsGPU"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue, biasesGPU, // source Bool.True, (IntPtr)0, (IntPtr)(sizeof(float) * nOutputUnits), biasesHost, // destination 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "clEnqueueReadBuffer biasesGPU"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish"); // Speeds are not saved. }
public override void FeedForward() { convolutionalLayer1.FeedForward(); /* * * float[] conv1outputAll = new float[convolutionalLayer1.OutputNeurons.NumberOfUnits * inputNeurons.MiniBatchSize]; * OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue, * convolutionalLayer1.OutputNeurons.ActivationsGPU, // source * Bool.True, * (IntPtr)0, * (IntPtr)(convolutionalLayer1.OutputNeurons.NumberOfUnits * inputNeurons.MiniBatchSize * sizeof(float)), * conv1outputAll, // destination * 0, * null, * out OpenCLSpace.ClEvent); * OpenCLSpace.CheckErr(OpenCLSpace.ClError, "NeuralNetwork.ForwardPass Cl.clEnqueueReadBuffer layerInput"); * * Console.WriteLine("\nConvLayer1 output activations:"); * for (int m = 0; m < inputNeurons.MiniBatchSize; m++) * { * float[] layerOutput = new float[convolutionalLayer1.OutputNeurons.NumberOfUnits]; * Array.Copy(conv1outputAll, m * convolutionalLayer1.OutputNeurons.NumberOfUnits, layerOutput, 0, convolutionalLayer1.OutputNeurons.NumberOfUnits); * * Console.WriteLine("\n --- Mini-batch item {0} -----", m); * for (int j = 0; j < layerOutput.Length; j++) * Console.Write("{0} ", layerOutput[j]); * Console.WriteLine(); * Console.ReadKey(); * } */ if (nonlinearityType == "ReLU") { nonlinearityReLU.FeedForward(); } else if (nonlinearityType == "ELU") { nonlinearityELU.FeedForward(); } /* * float[] nonlinOutputAll = new float[nonlinearity.OutputNeurons.NumberOfUnits * inputNeurons.MiniBatchSize]; * OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue, * nonlinearity.OutputNeurons.ActivationsGPU, // source * Bool.True, * (IntPtr)0, * (IntPtr)(convolutionalLayer1.OutputNeurons.NumberOfUnits * inputNeurons.MiniBatchSize * sizeof(float)), * nonlinOutputAll, // destination * 0, * null, * out OpenCLSpace.ClEvent); * OpenCLSpace.CheckErr(OpenCLSpace.ClError, "NeuralNetwork.ForwardPass Cl.clEnqueueReadBuffer layerInput"); * * Console.WriteLine("\nNonlinearity output activations:"); * for (int m = 0; m < inputNeurons.MiniBatchSize; m++) * { * float[] layerOutput = new float[nonlinearity.OutputNeurons.NumberOfUnits]; * Array.Copy(nonlinOutputAll, m * nonlinearity.OutputNeurons.NumberOfUnits, layerOutput, 0, nonlinearity.OutputNeurons.NumberOfUnits); * * Console.WriteLine("\n --- Mini-batch item {0} -----", m); * for (int j = 0; j < layerOutput.Length; j++) * Console.Write("{0} ", layerOutput[j]); * Console.WriteLine(); * Console.ReadKey(); * } */ convolutionalLayer2.FeedForward(); /* * float[] conv2outputAll = new float[convolutionalLayer2.OutputNeurons.NumberOfUnits * inputNeurons.MiniBatchSize]; * OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue, * convolutionalLayer2.OutputNeurons.ActivationsGPU, // source * Bool.True, * (IntPtr)0, * (IntPtr)(convolutionalLayer2.OutputNeurons.NumberOfUnits * inputNeurons.MiniBatchSize * sizeof(float)), * conv2outputAll, // destination * 0, * null, * out OpenCLSpace.ClEvent); * OpenCLSpace.CheckErr(OpenCLSpace.ClError, "NeuralNetwork.ForwardPass Cl.clEnqueueReadBuffer layerInput"); * * Console.WriteLine("\nConvLayer2 output activations:"); * for (int m = 0; m < inputNeurons.MiniBatchSize; m++) * { * float[] layerOutput = new float[convolutionalLayer2.OutputNeurons.NumberOfUnits]; * Array.Copy(conv2outputAll, m * convolutionalLayer2.OutputNeurons.NumberOfUnits, layerOutput, 0, convolutionalLayer2.OutputNeurons.NumberOfUnits); * * Console.WriteLine("\n --- Mini-batch item {0} -----", m); * for (int j = 0; j < layerOutput.Length; j++) * Console.Write("{0} ", layerOutput[j]); * Console.WriteLine(); * Console.ReadKey(); * } */ // Additionally, cumulate inputs onto outputs OpenCLSpace.ClError = Cl.SetKernelArg(OpenCLSpace.SkipForward, 0, outputNeurons.ActivationsGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.SkipForward, 1, inputNeurons.ActivationsGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.SkipForward, 2, (IntPtr)sizeof(int), nInputUnits); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.SkipForward, 3, (IntPtr)sizeof(int), inputNeurons.MiniBatchSize); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.SetKernelArg"); // Run kernel OpenCLSpace.ClError = Cl.EnqueueNDRangeKernel(OpenCLSpace.Queue, OpenCLSpace.SkipForward, 1, null, globalWorkSizePtr, localWorkSizePtr, 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.EnqueueNDRangeKernel"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish"); }
public override void SetupOutput() { // Check arguments _______________________________________________________________________________________ if (inputHeight != inputWidth) { throw new ArgumentException("MaxPooling currently only supports spatially square input."); } if (inputWidth % poolWidth != 0) { throw new ArgumentException("Cannot apply max pooling to input: pooling width and stride do not fit input width!"); } // Setup output __________________________________________________________________________________________ this.outputWidth = (inputWidth - poolWidth) / stride + 1; this.outputHeight = (inputHeight - poolWidth) / stride + 1; this.outputDepth = inputDepth; this.nOutputUnits = outputWidth * outputHeight * outputDepth; this.outputNeurons = new Neurons(nOutputUnits); // Initialize and create auxiliary structures ____________________________________________________________ #if OPENCL_ENABLED // Pooling table this.poolingTableGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite, (IntPtr)(sizeof(int) * 4 * outputHeight * outputWidth), out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.CreateBuffer poolingTableGPU"); OpenCLSpace.WipeBuffer(poolingTableGPU, 4 * outputHeight * outputWidth, typeof(int)); OpenCLSpace.ClError = Cl.SetKernelArg(OpenCLSpace.CreateMaxPoolingTable, 0, poolingTableGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.CreateMaxPoolingTable, 1, (IntPtr)sizeof(int), stride); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.CreateMaxPoolingTable, 2, (IntPtr)sizeof(int), inputWidth); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.CreateMaxPoolingTable, 3, (IntPtr)sizeof(int), outputWidth); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.SetKernelArg CreatePoolingTable"); OpenCLSpace.ClError = Cl.EnqueueNDRangeKernel(OpenCLSpace.Queue, OpenCLSpace.CreateMaxPoolingTable, 1, null, new IntPtr[] { (IntPtr)(32 * Math.Ceiling((double)(nOutputUnits * inputNeurons.MiniBatchSize) / (double)32)) }, new IntPtr[] { (IntPtr)32 }, 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.EnqueueNDRangeKernel CreatePoolingTable"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); // Switches this.switchesGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite, (IntPtr)(sizeof(bool) * nInputUnits * inputNeurons.MiniBatchSize), out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.CreateBuffer switchesGPU"); OpenCLSpace.WipeBuffer(switchesGPU, nInputUnits * inputNeurons.MiniBatchSize, typeof(bool)); #else //TODO: create poolingTable and switches on cpu #endif }
public void FeedData(DataSet dataSet, int[] iExamples) { #if TIMING_LAYERS Utils.InputFeedTimer.Start(); #endif int dataPointSize = dataSet.DataDimension; for (int m = 0; m < outputNeurons.MiniBatchSize; m++) { #if OPENCL_ENABLED int iDataPoint = iExamples[m]; OpenCLSpace.ClError = Cl.EnqueueCopyBuffer(OpenCLSpace.Queue, dataSet.DataContainer[iDataPoint].Data, // source outputNeurons.ActivationsGPU, // destination (IntPtr)0, // source offset (in bytes) (IntPtr)(sizeof(float) * m * dataPointSize), // destination offset (in bytes) (IntPtr)(sizeof(float) * dataPointSize), // size of buffer to copy 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InputLayer.FeedData Cl.EnqueueCopyBuffer inputData"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); // Dropout! if (dropoutParameter < 1.0) { // Set kernel arguments OpenCLSpace.ClError = Cl.SetKernelArg(OpenCLSpace.InputDropout, 0, outputNeurons.ActivationsGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.InputDropout, 1, (IntPtr)sizeof(int), nOutputUnits * outputNeurons.MiniBatchSize); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.InputDropout, 2, (IntPtr)sizeof(float), (float)dropoutParameter); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.InputDropout, 3, (IntPtr)sizeof(ulong), (ulong)Guid.NewGuid().GetHashCode()); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InputDropout: Cl.SetKernelArg"); // Run kernel OpenCLSpace.ClError = Cl.EnqueueNDRangeKernel(OpenCLSpace.Queue, OpenCLSpace.InputDropout, 1, null, dropoutGlobalWorkSizePtr, dropoutLocalWorkSizePtr, 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InputDropout: Cl.EnqueueNDRangeKernel"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish"); } #else outputNeurons.SetHost(m, dataSet.Data[iExamples[m]]); #endif } #if OPENCL_ENABLED OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish"); #endif #if TIMING_LAYERS Utils.InputFeedTimer.Stop(); #endif }
public override void UpdateParameters(double weightDecayCoeff) { #if TIMING_LAYERS Utils.BNFCUpdateParametersTimer.Start(); #endif OpenCLSpace.ClError = Cl.SetKernelArg(OpenCLSpace.BNFCUpdateParameters, 0, gammaGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNFCUpdateParameters, 1, betaGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNFCUpdateParameters, 2, gammaSpeedGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNFCUpdateParameters, 3, betaSpeedGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNFCUpdateParameters, 4, (IntPtr)sizeof(int), nInputUnits); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.SetKernelArg"); OpenCLSpace.ClError = Cl.EnqueueNDRangeKernel(OpenCLSpace.Queue, OpenCLSpace.BNFCUpdateParameters, 1, null, nUnitsGlobalWorkSizePtr, optimalLocalWorkSizePtr, 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.EnqueueNDRangeKernel"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish"); /* ------------------------- DEBUGGING --------------------------------------------- * * // Display gamma * float[] gamma = new float[nInputUnits]; * * OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue, * gammaGPU, // source * Bool.True, * (IntPtr)0, * (IntPtr)(nInputUnits * sizeof(float)), * gamma, // destination * 0, * null, * out OpenCLSpace.ClEvent); * OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.clEnqueueReadBuffer"); * * Console.WriteLine("\n\nUpdated gammas are:\n"); * for (int i = 0; i < nInputUnits; i++) * Console.Write("{0} ", gamma[i]); * //Console.ReadKey(); * * // Display beta * float[] beta = new float[nInputUnits]; * * OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue, * betaGPU, // source * Bool.True, * (IntPtr)0, * (IntPtr)(nInputUnits * sizeof(float)), * beta, // destination * 0, * null, * out OpenCLSpace.ClEvent); * OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.clEnqueueReadBuffer"); * * Console.WriteLine("\n\nUpdated betas are:\n"); * for (int i = 0; i < nInputUnits; i++) * Console.Write("{0} ", beta[i]); * Console.ReadKey(); * * * /* ------------------------- END DEBUGGING --------------------------------------------- */ #if TIMING_LAYERS Utils.BNFCUpdateParametersTimer.Stop(); #endif }