public void SetupBuffers(int MiniBatchSize) { this.miniBatchSize = MiniBatchSize; #if OPENCL_ENABLED this.activationsGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite | MemFlags.AllocHostPtr, (IntPtr)(sizeof(float) * NumberOfUnits * MiniBatchSize), out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.CreateBuffer Neurons.activationsGPU"); OpenCLSpace.WipeBuffer(activationsGPU, NumberOfUnits * MiniBatchSize, typeof(float)); this.deltaGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite | MemFlags.AllocHostPtr, (IntPtr)(sizeof(float) * NumberOfUnits * MiniBatchSize), out OpenCLSpace.ClError); OpenCLSpace.WipeBuffer(activationsGPU, NumberOfUnits * MiniBatchSize, typeof(float)); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.CreateBuffer Neurons.deltaGPU"); #else for (int m = 0; m < MiniBatchSize; m++) { this.activations.Add(new double[nUnits]); this.delta.Add(new double[nUnits]); } #endif }
public override void SetupOutput() { this.outputWidth = inputWidth; this.outputHeight = inputHeight; this.outputDepth = inputDepth; this.inputArea = inputHeight * inputWidth; this.nOutputUnits = nInputUnits; this.outputNeurons = new Neurons(nOutputUnits); // Initialize OpenCL buffers // 1. mean, variance and their cumulative averages this.meanGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite, (IntPtr)(sizeof(float) * inputDepth), out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer"); OpenCLSpace.WipeBuffer(meanGPU, inputDepth, typeof(float)); this.varianceGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite, (IntPtr)(sizeof(float) * inputDepth), out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer"); OpenCLSpace.WipeBuffer(varianceGPU, inputDepth, typeof(float)); // (Initialize cumulative means to zero...) this.cumulativeMeanGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite, (IntPtr)(sizeof(float) * inputDepth), out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer"); OpenCLSpace.WipeBuffer(cumulativeMeanGPU, inputDepth, typeof(float)); // (...and variances to one.) float[] ones = new float[inputDepth]; for (int i = 0; i < inputDepth; ++i) { ones[i] = 1.0f; } this.cumulativeVarianceGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite | MemFlags.CopyHostPtr, (IntPtr)(sizeof(float) * inputDepth), ones, out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer"); // OpenCL buffer for normalized input values (needed for backprop) this.normalizedInputGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite, (IntPtr)(sizeof(float) * nInputUnits * inputNeurons.MiniBatchSize), out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer"); OpenCLSpace.WipeBuffer(normalizedInputGPU, nInputUnits * inputNeurons.MiniBatchSize, typeof(float)); }
public override void SetupOutput() { this.outputWidth = inputWidth; this.outputHeight = inputHeight; this.outputDepth = inputDepth; this.nOutputUnits = nInputUnits; this.outputNeurons = new Neurons(nOutputUnits); // Also initialize OpenCL buffers for mean, variance, their cumulative averages, and normalized input activations this.meanGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite, (IntPtr)(sizeof(float) * nInputUnits), out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer"); OpenCLSpace.WipeBuffer(meanGPU, nInputUnits, typeof(float)); this.varianceGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite, (IntPtr)(sizeof(float) * nInputUnits), out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer"); OpenCLSpace.WipeBuffer(varianceGPU, nInputUnits, typeof(float)); this.cumulativeMeanGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite, (IntPtr)(sizeof(float) * nInputUnits), out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer"); OpenCLSpace.WipeBuffer(cumulativeMeanGPU, nInputUnits, typeof(float)); this.cumulativeVarianceGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite, (IntPtr)(sizeof(float) * nInputUnits), out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer"); OpenCLSpace.WipeBuffer(cumulativeVarianceGPU, nInputUnits, typeof(float)); this.normalizedInputGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite, (IntPtr)(sizeof(float) * nInputUnits * inputNeurons.MiniBatchSize), out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer"); OpenCLSpace.WipeBuffer(normalizedInputGPU, nInputUnits * inputNeurons.MiniBatchSize, typeof(float)); }
public override void SetupOutput() { this.outputDepth = nOutputUnits; this.outputHeight = 1; this.outputWidth = 1; this.outputNeurons = new Neurons(this.nOutputUnits); #if OPENCL_ENABLED this.dropoutMaskGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite, (IntPtr)(sizeof(bool) * nOutputUnits * inputNeurons.MiniBatchSize), out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer"); OpenCLSpace.WipeBuffer(dropoutMaskGPU, nOutputUnits * inputNeurons.MiniBatchSize, typeof(bool)); #endif }
public override void SetupOutput() { // Check arguments _______________________________________________________________________________________ if (inputHeight != inputWidth) { throw new ArgumentException("MaxPooling currently only supports spatially square input."); } if (inputWidth % poolWidth != 0) { throw new ArgumentException("Cannot apply max pooling to input: pooling width and stride do not fit input width!"); } // Setup output __________________________________________________________________________________________ this.outputWidth = (inputWidth - poolWidth) / stride + 1; this.outputHeight = (inputHeight - poolWidth) / stride + 1; this.outputDepth = inputDepth; this.nOutputUnits = outputWidth * outputHeight * outputDepth; this.outputNeurons = new Neurons(nOutputUnits); // Initialize and create auxiliary structures ____________________________________________________________ #if OPENCL_ENABLED // Pooling table this.poolingTableGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite, (IntPtr)(sizeof(int) * 4 * outputHeight * outputWidth), out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.CreateBuffer poolingTableGPU"); OpenCLSpace.WipeBuffer(poolingTableGPU, 4 * outputHeight * outputWidth, typeof(int)); OpenCLSpace.ClError = Cl.SetKernelArg(OpenCLSpace.CreateMaxPoolingTable, 0, poolingTableGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.CreateMaxPoolingTable, 1, (IntPtr)sizeof(int), stride); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.CreateMaxPoolingTable, 2, (IntPtr)sizeof(int), inputWidth); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.CreateMaxPoolingTable, 3, (IntPtr)sizeof(int), outputWidth); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.SetKernelArg CreatePoolingTable"); OpenCLSpace.ClError = Cl.EnqueueNDRangeKernel(OpenCLSpace.Queue, OpenCLSpace.CreateMaxPoolingTable, 1, null, new IntPtr[] { (IntPtr)(32 * Math.Ceiling((double)(nOutputUnits * inputNeurons.MiniBatchSize) / (double)32)) }, new IntPtr[] { (IntPtr)32 }, 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.EnqueueNDRangeKernel CreatePoolingTable"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); // Switches this.switchesGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite, (IntPtr)(sizeof(bool) * nInputUnits * inputNeurons.MiniBatchSize), out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.CreateBuffer switchesGPU"); OpenCLSpace.WipeBuffer(switchesGPU, nInputUnits * inputNeurons.MiniBatchSize, typeof(bool)); #else //TODO: create poolingTable and switches on cpu #endif }
public override void FeedForward() { #if TIMING_LAYERS Utils.BNFCForwardTimer.Start(); #endif if (isEpochBeginning) { iCumulativeAverage = 0; // Wipe cumulative means and variances (theoretically, this is redundant) OpenCLSpace.WipeBuffer(cumulativeMeanGPU, nInputUnits, typeof(float)); OpenCLSpace.WipeBuffer(cumulativeVarianceGPU, nInputUnits, typeof(float)); isEpochBeginning = false; } // If training, compute means and variances, and update cumulative averages if (isTraining || isPreInference) { OpenCLSpace.ClError = Cl.SetKernelArg(OpenCLSpace.BNFCComputeMeansVariances, 0, meanGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNFCComputeMeansVariances, 1, varianceGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNFCComputeMeansVariances, 2, cumulativeMeanGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNFCComputeMeansVariances, 3, cumulativeVarianceGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNFCComputeMeansVariances, 4, inputNeurons.ActivationsGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNFCComputeMeansVariances, 5, (IntPtr)sizeof(int), nInputUnits); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNFCComputeMeansVariances, 6, (IntPtr)sizeof(int), inputNeurons.MiniBatchSize); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNFCComputeMeansVariances, 7, (IntPtr)sizeof(int), Convert.ToInt32(isPreInference)); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNFCComputeMeansVariances, 8, (IntPtr)sizeof(int), iCumulativeAverage); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.SetKernelArg"); OpenCLSpace.ClError = Cl.EnqueueNDRangeKernel(OpenCLSpace.Queue, OpenCLSpace.BNFCComputeMeansVariances, 1, null, nUnitsGlobalWorkSizePtr, optimalLocalWorkSizePtr, 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.EnqueueNDRangeKernel"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); if (isPreInference) { iCumulativeAverage++; // increase cumulative average counter } } /* ------------------------- DEBUGGING --------------------------------------------- * * Console.WriteLine("\nPRE-INFERENCE MINI-BATCH {0}\n", iCumulativeAverage); * // Display cum means * * float[] cumMeans = new float[nInputUnits]; * * OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue, * cumulativeMeanGPU, // source * Bool.True, * (IntPtr)0, * (IntPtr)(nInputUnits * sizeof(float)), * cumMeans, // destination * 0, * null, * out OpenCLSpace.ClEvent); * OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.clEnqueueReadBuffer"); * * Console.WriteLine("\nCumulative means:\n"); * for (int i = 0; i < nInputUnits; i++) * Console.Write("{0} ", cumMeans[i]); * //Console.ReadKey(); * * // Display cum var * float[] cumVar = new float[nInputUnits]; * * OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue, * cumulativeVarianceGPU, // source * Bool.True, * (IntPtr)0, * (IntPtr)(nInputUnits * sizeof(float)), * cumVar, // destination * 0, * null, * out OpenCLSpace.ClEvent); * OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.clEnqueueReadBuffer"); * * Console.WriteLine("\n\nCumulative variance:\n"); * for (int i = 0; i < nInputUnits; i++) * Console.Write("{0} ", cumVar[i]); * Console.ReadKey(); * * * /* ------------------------- END DEBUGGING --------------------------------------------- */ //Normalize input, scale and shift OpenCLSpace.ClError = Cl.SetKernelArg(OpenCLSpace.BNFCForward, 0, outputNeurons.ActivationsGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNFCForward, 1, normalizedInputGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNFCForward, 2, inputNeurons.ActivationsGPU); if (isTraining) { OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNFCForward, 3, meanGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNFCForward, 4, varianceGPU); } else if (isPreInference || isInference) { OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNFCForward, 3, cumulativeMeanGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNFCForward, 4, cumulativeVarianceGPU); } else { throw new InvalidOperationException("ERROR: BatchNormConv is currently not in training mode, nor pre-inference, nor inference."); } OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNFCForward, 5, gammaGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNFCForward, 6, betaGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNFCForward, 7, (IntPtr)sizeof(int), nInputUnits); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNFCForward, 8, (IntPtr)sizeof(int), inputNeurons.MiniBatchSize); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.SetKernelArg"); OpenCLSpace.ClError = Cl.EnqueueNDRangeKernel(OpenCLSpace.Queue, OpenCLSpace.BNFCForward, 1, null, nActivationsGlobalWorkSizePtr, optimalLocalWorkSizePtr, 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.EnqueueNDRangeKernel"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish"); #if TIMING_LAYERS Utils.BNFCForwardTimer.Stop(); #endif }
public override void InitializeParameters(string Option) { this.iCumulativeAverage = 0; this.isEpochBeginning = true; this.isTraining = true; this.isPreInference = false; this.isInference = false; if (Option == "random") // initialize parameters on host { // Gamma parameters are initialized to one gammaHost = new float[nInputUnits]; for (int i = 0; i < nInputUnits; ++i) { gammaHost[i] = 1.0f; } // And beta parameters to zero betaHost = new float[nInputUnits]; } // else Option must be ''load'' => do not initialized parameters, just load them from host to device // Tranfer parameters to device this.gammaGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite | MemFlags.CopyHostPtr, (IntPtr)(sizeof(float) * nInputUnits), gammaHost, out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer"); this.betaGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite | MemFlags.CopyHostPtr, (IntPtr)(sizeof(float) * nInputUnits), betaHost, out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer"); // Also create buffers for parameter gradients this.deltaGammaGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite, (IntPtr)(sizeof(float) * nInputUnits), out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer"); OpenCLSpace.WipeBuffer(deltaGammaGPU, nInputUnits, typeof(float)); this.deltaBetaGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite, (IntPtr)(sizeof(float) * nInputUnits), out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer"); OpenCLSpace.WipeBuffer(deltaBetaGPU, nInputUnits, typeof(float)); // And for parameter update speed this.gammaSpeedGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite, (IntPtr)(sizeof(float) * nInputUnits), out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer"); OpenCLSpace.WipeBuffer(gammaSpeedGPU, nInputUnits, typeof(float)); this.betaSpeedGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite, (IntPtr)(sizeof(float) * nInputUnits), out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer"); OpenCLSpace.WipeBuffer(betaSpeedGPU, nInputUnits, typeof(float)); }
public override void InitializeParameters(string Option) { base.InitializeParameters(Option); // makes sure this method is only call AFTER "SetupOutput()" if (Option == "random") // sample new parameters { // WEIGHTS are initialized as normally distributed numbers with mean 0 and std equals to sqrt(2/nInputUnits) // BIASES are initialized to a small positive number, e.g. 0.001 this.weightsHost = new float[nOutputUnits * nInputUnits]; this.biasesHost = new float[nOutputUnits]; double weightsStdDev = Math.Sqrt(2.0 / (10 * nInputUnits)); double uniformRand1; double uniformRand2; double tmp; for (int iRow = 0; iRow < nOutputUnits; iRow++) { for (int iCol = 0; iCol < nInputUnits; iCol++) { uniformRand1 = Global.rng.NextDouble(); uniformRand2 = Global.rng.NextDouble(); // Use a Box-Muller transform to get a random normal(0,1) tmp = Math.Sqrt(-2.0 * Math.Log(uniformRand1)) * Math.Sin(2.0 * Math.PI * uniformRand2); tmp = weightsStdDev * tmp; // rescale weightsHost[iRow * nInputUnits + iCol] = (float)tmp; } biasesHost[iRow] = 0.00f; } } // else Option must be ''load'' => do not sample parameters, just load them from host to device int weightBufferSize = sizeof(float) * (outputNeurons.NumberOfUnits * inputNeurons.NumberOfUnits); int biasesBufferSize = sizeof(float) * outputNeurons.NumberOfUnits; this.weightsGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite | MemFlags.CopyHostPtr, (IntPtr)weightBufferSize, weightsHost, out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.CreateBuffer"); this.biasesGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite | MemFlags.CopyHostPtr, (IntPtr)biasesBufferSize, biasesHost, out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.CreateBuffer"); // Also create weightsGradients and biasesGradients buffers and initialize them to zero this.weightsGradientsGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite, (IntPtr)weightBufferSize, out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.CreateBuffer"); OpenCLSpace.WipeBuffer(weightsGradientsGPU, (nInputUnits * nOutputUnits), typeof(float)); this.biasesGradientsGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite, (IntPtr)biasesBufferSize, out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.CreateBuffer"); OpenCLSpace.WipeBuffer(biasesGradientsGPU, nOutputUnits, typeof(float)); // Also create weightsSpeed and biasesSpeed buffers and initialize them to zero this.weightsSpeedGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite, (IntPtr)weightBufferSize, out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.CreateBuffer"); OpenCLSpace.WipeBuffer(weightsSpeedGPU, (nInputUnits * nOutputUnits), typeof(float)); this.biasesSpeedGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite, (IntPtr)biasesBufferSize, out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.CreateBuffer"); OpenCLSpace.WipeBuffer(biasesSpeedGPU, nOutputUnits, typeof(float)); }
public override void InitializeParameters(string Option) { this.iCumulativeAverage = 0; this.isEpochBeginning = true; this.isTraining = true; this.isPreInference = false; this.isInference = false; // Initialize OpenCL buffers for learnable parameters gamma and beta, their gradients, and their update speed. if (Option == "random") { // A new network is being created. Write ones in gammas and zeros in betas (identity function in the beginning). this.gammaHost = new float[inputDepth]; for (int i = 0; i < inputDepth; ++i) { gammaHost[i] = 1.0f; } this.betaHost = new float[inputDepth]; } // else Option must be "load", in which case just copy host values (retrieved from file) to device this.gammaGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite | MemFlags.CopyHostPtr, (IntPtr)(sizeof(float) * inputDepth), gammaHost, out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer"); this.betaGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite | MemFlags.CopyHostPtr, (IntPtr)(sizeof(float) * inputDepth), betaHost, out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer"); this.deltaGammaBatchGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite, (IntPtr)(sizeof(float) * nInputUnits), out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer"); OpenCLSpace.WipeBuffer(deltaGammaBatchGPU, nInputUnits, typeof(float)); this.deltaBetaBatchGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite, (IntPtr)(sizeof(float) * nInputUnits), out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer"); OpenCLSpace.WipeBuffer(deltaBetaBatchGPU, nInputUnits, typeof(float)); this.deltaGammaGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite, (IntPtr)(sizeof(float) * inputDepth), out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer"); OpenCLSpace.WipeBuffer(deltaGammaGPU, inputDepth, typeof(float)); this.deltaBetaGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite, (IntPtr)(sizeof(float) * inputDepth), out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer"); OpenCLSpace.WipeBuffer(deltaBetaGPU, inputDepth, typeof(float)); this.gammaSpeedGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite, (IntPtr)(sizeof(float) * inputDepth), out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer"); OpenCLSpace.WipeBuffer(gammaSpeedGPU, inputDepth, typeof(float)); this.betaSpeedGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite, (IntPtr)(sizeof(float) * inputDepth), out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer"); OpenCLSpace.WipeBuffer(betaSpeedGPU, inputDepth, typeof(float)); }