public override void FeedForward() { #if TIMING_LAYERS Utils.NonlinearityForwardTimer.Start(); #endif #if OPENCL_ENABLED // Set kernel arguments OpenCLSpace.ClError = Cl.SetKernelArg(OpenCLSpace.ELUForward, 0, OutputNeurons.ActivationsGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.ELUForward, 1, InputNeurons.ActivationsGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.ELUForward, 2, (IntPtr)sizeof(float), alpha); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.ELUForward, 3, (IntPtr)sizeof(int), OutputNeurons.NumberOfUnits * inputNeurons.MiniBatchSize); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "ELU.FeedForward(): Cl.SetKernelArg"); // Run kernel OpenCLSpace.ClError = Cl.EnqueueNDRangeKernel( OpenCLSpace.Queue, OpenCLSpace.ELUForward, 1, null, globalWorkSizePtr, localWorkSizePtr, 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "ELU.FeedForward(): Cl.EnqueueNDRangeKernel"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish"); #else for (int m = 0; m < inputNeurons.MiniBatchSize; m++) { double[] tmpOutput = new double[this.nOutputUnits]; for (int i = 0; i < this.nOutputUnits; i++) { if (this.inputNeurons.GetHost()[m][i] > 0) tmpOutput[i] = this.inputNeurons.GetHost()[m][i]; else tmpOutput[i] = alpha * (Math.Exp(this.inputNeurons.GetHost()[m][i]) - 1.0 ); } this.outputNeurons.SetHost(m, tmpOutput); } #endif #if TIMING_LAYERS Utils.NonlinearityForwardTimer.Stop(); #endif }
public override double[] GetParameters() { int nParameters = nInputUnits * nOutputUnits + nOutputUnits; double[] parameters = new double[nParameters]; // Copy weights and biases buffers to host float[] tmpWeights = new float[nInputUnits * nOutputUnits]; OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue, weightsGPU, // source Bool.True, (IntPtr)0, (IntPtr)(sizeof(float) * nInputUnits * nOutputUnits), tmpWeights, // destination 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "clEnqueueReadBuffer"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); float[] tmpBiases = new float[nOutputUnits]; OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue, biasesGPU, // source Bool.True, (IntPtr)0, (IntPtr)(sizeof(float) * nOutputUnits), tmpBiases, // destination 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "clEnqueueReadBuffer"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish"); // Convert to double and write into parameters array for (int i = 0; i < nInputUnits * nOutputUnits; ++i) { parameters[i] = (double)tmpWeights[i]; } for (int i = 0; i < nOutputUnits; ++i) { parameters[nInputUnits * nOutputUnits + i] = (double)tmpBiases[i]; } return(parameters); }
public static void WipeBuffer(Mem buffer, int nElementsInBuffer, Type type) { Kernel WipeKernel; if (type == typeof(float)) { WipeKernel = WipeBufferFloatKernel; } else if (type == typeof(int)) { WipeKernel = WipeBufferIntKernel; } else if (type == typeof(bool)) { WipeKernel = WipeBufferBoolKernel; } else { throw new ArgumentException("Type not supported. Use either float, int, or bool."); } // Set kernel arguments OpenCLSpace.ClError = Cl.SetKernelArg(WipeKernel, 0, buffer); OpenCLSpace.ClError |= Cl.SetKernelArg(WipeKernel, 1, (IntPtr)sizeof(int), nElementsInBuffer); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.SetKernelArg WipeBufferKernel"); // Work sizes IntPtr[] localWorkSizePtr = { (IntPtr)OPTIMAL_GROUP_SIZE }; IntPtr[] globalWorkSizePtr = { (IntPtr)(OPTIMAL_GROUP_SIZE * Math.Ceiling((double)(nElementsInBuffer) / (double)OPTIMAL_GROUP_SIZE)) }; // Run kernel ClError = Cl.EnqueueNDRangeKernel(queue, WipeKernel, 1, null, globalWorkSizePtr, localWorkSizePtr, 0, null, out ClEvent); CheckErr(ClError, "Cl.EnqueueNDRangeKernel ZeroUnpadBatch"); ClError = Cl.ReleaseEvent(ClEvent); CheckErr(ClError, "Cl.ReleaseEvent"); ClError = Cl.Finish(queue); CheckErr(ClError, "Cl.Finish"); //Cl.ReleaseKernel(WipeKernel); }
public override double[] GetParameterGradients() { double[] parameterGradients = new double[2 * nInputUnits]; // Copy gamma and beta gradients buffers to host float[] tmpGammaGrad = new float[nInputUnits]; OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue, deltaGammaGPU, // source Bool.True, (IntPtr)0, (IntPtr)(sizeof(float) * nInputUnits), tmpGammaGrad, // destination 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "clEnqueueReadBuffer"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); float[] tmpBetaGrad = new float[nInputUnits]; OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue, deltaBetaGPU, // source Bool.True, (IntPtr)0, (IntPtr)(sizeof(float) * nInputUnits), tmpBetaGrad, // destination 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "clEnqueueReadBuffer"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish"); // Convert to double and write into public fields for (int i = 0; i < nInputUnits; ++i) { parameterGradients[i] = (double)tmpGammaGrad[i]; parameterGradients[nInputUnits + i] = (double)tmpBetaGrad[i]; } return(parameterGradients); }
public override void BackPropagate() { #if TIMING_LAYERS Utils.NonlinearityBackpropTimer.Start(); #endif #if OPENCL_ENABLED // Set kernel arguments OpenCLSpace.ClError = Cl.SetKernelArg(OpenCLSpace.ELUBackward, 0, inputNeurons.DeltaGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.ELUBackward, 1, outputNeurons.DeltaGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.ELUBackward, 2, inputNeurons.ActivationsGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.ELUBackward, 3, (IntPtr)sizeof(float), alpha); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.ELUBackward, 4, (IntPtr)sizeof(int), nInputUnits * inputNeurons.MiniBatchSize); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "ELU.BackPropagate(): Cl.SetKernelArg"); // Run kernel OpenCLSpace.ClError = Cl.EnqueueNDRangeKernel(OpenCLSpace.Queue, OpenCLSpace.ELUBackward, 1, null, globalWorkSizePtr, localWorkSizePtr, 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "ELU.BackPropagate(): Cl.EnqueueNDRangeKernel"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish"); #else throw new NotImplementedException("CPU code for ELUs not implemented yet."); for (int m = 0; m < inputNeurons.MiniBatchSize; m++) { for (int i = 0; i < nOutputUnits; i++) //inputNeurons.DeltaHost[m][i] = inputNeurons.GetHost()[m][i] > 0 ? outputNeurons.DeltaHost[m][i] : 0.0; } #endif #if TIMING_LAYERS Utils.NonlinearityBackpropTimer.Stop(); #endif }
public override void SetupOutput() { this.outputWidth = inputWidth; this.outputHeight = inputHeight; this.outputDepth = inputDepth; this.nOutputUnits = nInputUnits; this.outputNeurons = new Neurons(nOutputUnits); // Also initialize OpenCL buffers for mean, variance, their cumulative averages, and normalized input activations this.meanGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite, (IntPtr)(sizeof(float) * nInputUnits), out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer"); OpenCLSpace.WipeBuffer(meanGPU, nInputUnits, typeof(float)); this.varianceGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite, (IntPtr)(sizeof(float) * nInputUnits), out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer"); OpenCLSpace.WipeBuffer(varianceGPU, nInputUnits, typeof(float)); this.cumulativeMeanGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite, (IntPtr)(sizeof(float) * nInputUnits), out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer"); OpenCLSpace.WipeBuffer(cumulativeMeanGPU, nInputUnits, typeof(float)); this.cumulativeVarianceGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite, (IntPtr)(sizeof(float) * nInputUnits), out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer"); OpenCLSpace.WipeBuffer(cumulativeVarianceGPU, nInputUnits, typeof(float)); this.normalizedInputGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite, (IntPtr)(sizeof(float) * nInputUnits * inputNeurons.MiniBatchSize), out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer"); OpenCLSpace.WipeBuffer(normalizedInputGPU, nInputUnits * inputNeurons.MiniBatchSize, typeof(float)); }
public override void SetupOutput() { this.outputDepth = nOutputUnits; this.outputHeight = 1; this.outputWidth = 1; this.outputNeurons = new Neurons(this.nOutputUnits); #if OPENCL_ENABLED this.dropoutMaskGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite, (IntPtr)(sizeof(bool) * nOutputUnits * inputNeurons.MiniBatchSize), out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer"); OpenCLSpace.WipeBuffer(dropoutMaskGPU, nOutputUnits * inputNeurons.MiniBatchSize, typeof(bool)); #endif }
public override void SetParameters(double[] NewParameters) { // Convert to float and write into tmp arrays float[] tmpWeights = new float[nInputUnits * nOutputUnits]; float[] tmpBiases = new float[nOutputUnits]; for (int i = 0; i < nInputUnits * nOutputUnits; ++i) { tmpWeights[i] = (float)NewParameters[i]; } for (int i = 0; i < nOutputUnits; ++i) { tmpBiases[i] = (float)NewParameters[nInputUnits * nOutputUnits + i]; } // Write arrays into buffers on device OpenCLSpace.ClError = Cl.EnqueueWriteBuffer(OpenCLSpace.Queue, weightsGPU, OpenCL.Net.Bool.True, (IntPtr)0, (IntPtr)(sizeof(float) * nInputUnits * nOutputUnits), tmpWeights, 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.EnqueueWriteBuffer"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); OpenCLSpace.ClError = Cl.EnqueueWriteBuffer(OpenCLSpace.Queue, biasesGPU, OpenCL.Net.Bool.True, (IntPtr)0, (IntPtr)(sizeof(float) * nOutputUnits), tmpBiases, 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.EnqueueWriteBuffer"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish"); }
public override void BackPropagate() { #if TIMING_LAYERS Utils.FCBackpropTimer.Start(); #endif #if OPENCL_ENABLED // Set kernel arguments OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.FCBackward, 0, inputNeurons.DeltaGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.FCBackward, 1, outputNeurons.DeltaGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.FCBackward, 2, weightsGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.FCBackward, 3, dropoutMaskGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.FCBackward, 4, (IntPtr)sizeof(int), nInputUnits); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.FCBackward, 5, (IntPtr)sizeof(int), nOutputUnits); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.FCBackward, 6, (IntPtr)sizeof(int), inputNeurons.MiniBatchSize); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "FullyConnected.BackPropagate(): Cl.SetKernelArg"); // Run kernel OpenCLSpace.ClError = Cl.EnqueueNDRangeKernel(OpenCLSpace.Queue, OpenCLSpace.FCBackward, 2, null, backwardGlobalWorkSizePtr, backwardLocalWorkSizePtr, 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "FullyConnected.BackPropagate(): Cl.EnqueueNDRangeKernel"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish"); #else for (int m = 0; m < inputNeurons.MiniBatchSize; m++) { inputNeurons.DeltaHost[m] = Utils.MultiplyMatrixTranspByVector(weights, outputNeurons.DeltaHost[m]); } #endif #if TIMING_LAYERS Utils.FCBackpropTimer.Stop(); #endif }
public override void SetParameters(double[] NewParameters) { // Convert to float and write into tmp arrays float[] tmpGamma = new float[inputDepth]; float[] tmpBeta = new float[inputDepth]; for (int i = 0; i < inputDepth; ++i) { tmpGamma[i] = (float)NewParameters[i]; tmpBeta[i] = (float)NewParameters[inputDepth + i]; } // Wirte arrays into buffers on device OpenCLSpace.ClError = Cl.EnqueueWriteBuffer(OpenCLSpace.Queue, gammaGPU, OpenCL.Net.Bool.True, (IntPtr)0, (IntPtr)(sizeof(float) * inputDepth), tmpGamma, 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.EnqueueWriteBuffer"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); OpenCLSpace.ClError = Cl.EnqueueWriteBuffer(OpenCLSpace.Queue, betaGPU, OpenCL.Net.Bool.True, (IntPtr)0, (IntPtr)(sizeof(float) * inputDepth), tmpBeta, 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.EnqueueWriteBuffer"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish"); }
public override void BackPropagate() { #if TIMING_LAYERS Utils.PoolingBackpropTimer.Start(); #endif #if OPENCL_ENABLED OpenCLSpace.ClError = Cl.SetKernelArg(OpenCLSpace.MaxPoolingBackward, 0, inputNeurons.DeltaGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.MaxPoolingBackward, 1, outputNeurons.DeltaGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.MaxPoolingBackward, 2, switchesGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.MaxPoolingBackward, 3, poolingTableGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.MaxPoolingBackward, 4, (IntPtr)sizeof(int), nInputUnits); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.MaxPoolingBackward, 5, (IntPtr)sizeof(int), inputWidth * inputWidth); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.MaxPoolingBackward, 6, (IntPtr)sizeof(int), nOutputUnits); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.MaxPoolingBackward, 7, (IntPtr)sizeof(int), outputWidth * outputWidth); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.MaxPoolingBackward, 8, (IntPtr)sizeof(int), inputNeurons.MiniBatchSize); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.SetKernelArg PoolingBackward"); OpenCLSpace.ClError = Cl.EnqueueNDRangeKernel(OpenCLSpace.Queue, OpenCLSpace.MaxPoolingBackward, 1, null, globalWorkSizePtr, localWorkSizePtr, 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.EnqueueNDRangeKernel PoolingBackward"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish"); #else //TODO: CPU code #endif #if TIMING_LAYERS Utils.PoolingBackpropTimer.Stop(); #endif }
public void ReadData(string dataPath, string labelsPath) { string[] dataArray = File.ReadAllLines(dataPath); string[] labelsArray = File.ReadAllLines(labelsPath); if (dataArray.Length != labelsArray.Length) { throw new Exception("The amount of data does not match the amount of labels"); } // Read images and their labels for (int index = 0; index < dataArray.Length; index++) { string[] columns = dataArray[index].Split('\t'); DataDimension = columns.Length; #if OPENCL_ENABLED float[] dataPoint = new float[columns.Length]; for (int i = 0; i < columns.Length; i++) { dataPoint[i] = float.Parse(columns[i], CultureInfo.InvariantCulture.NumberFormat); } int datumBytesSize = sizeof(float) * dataPoint.Length; Mem tmpBuffer = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadOnly | MemFlags.CopyHostPtr | MemFlags.AllocHostPtr, (IntPtr)datumBytesSize, dataPoint, out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "DataSet(): Cl.CreateBuffer tmpBuffer"); #else double[] tmpBuffer = new double[columns.Length]; for (int i = 0; i < columns.Length; i++) { tmpBuffer[i] = double.Parse(columns[i], CultureInfo.InvariantCulture.NumberFormat); } #endif DataContainer.Add(new DataItem(tmpBuffer, Convert.ToInt32(labelsArray[index]))); } }
public void CrossEntropyGradient(DataSet DataSet, int[] iMiniBatch) { float[] crossEntropyGradientBatch = new float[iMiniBatch.Length * DataSet.NumberOfClasses]; int nClasses = DataSet.NumberOfClasses; for (int m = 0; m < iMiniBatch.Length; m++) { int iDataPoint = iMiniBatch[m]; int trueLabel = DataSet.DataContainer[iDataPoint].Label; double[] crossEntropyGradient = new double[nClasses]; Array.Copy(outputLayer.OutputClassScores[m], crossEntropyGradient, nClasses); crossEntropyGradient[trueLabel] -= 1.0; for (int c = 0; c < nClasses; c++) { crossEntropyGradientBatch[m * DataSet.NumberOfClasses + c] = (float)crossEntropyGradient[c]; } } // now write gradient to input neurons of softmax layer (i.e. to output neurons of classifier) OpenCLSpace.ClError = Cl.EnqueueWriteBuffer(OpenCLSpace.Queue, layers.Last().InputNeurons.DeltaGPU, Bool.True, (IntPtr)0, (IntPtr)(sizeof(float) * crossEntropyGradientBatch.Length), crossEntropyGradientBatch, 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "NetworkTrainer.CrossEntropyGradient(): Cl.EnqueueWriteBuffer"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish"); }
public override void BackPropagate() { #if TIMING_LAYERS Utils.BNConvBackpropTimer.Start(); #endif OpenCLSpace.ClError = Cl.SetKernelArg(OpenCLSpace.BNConvBackPropagate, 0, inputNeurons.DeltaGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNConvBackPropagate, 1, outputNeurons.DeltaGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNConvBackPropagate, 2, normalizedInputGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNConvBackPropagate, 3, gammaGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNConvBackPropagate, 4, varianceGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNConvBackPropagate, 5, deltaGammaGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNConvBackPropagate, 6, deltaBetaGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNConvBackPropagate, 7, (IntPtr)sizeof(int), inputArea); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNConvBackPropagate, 8, (IntPtr)sizeof(int), nInputUnits); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNConvBackPropagate, 9, (IntPtr)sizeof(int), inputNeurons.MiniBatchSize); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.SetKernelArg"); OpenCLSpace.ClError = Cl.EnqueueNDRangeKernel(OpenCLSpace.Queue, OpenCLSpace.BNConvBackPropagate, 1, null, nActivationsGlobalWorkSizePtr, optimalLocalWorkSizePtr, 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.EnqueueNDRangeKernel"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish"); #if TIMING_LAYERS Utils.BNConvBackpropTimer.Stop(); #endif }
public override void CopyBuffersToHost() { OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue, weightsGPU, // source Bool.True, (IntPtr)0, (IntPtr)(sizeof(float) * nInputUnits * nOutputUnits), weightsHost, // destination 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "clEnqueueReadBuffer weightsGPU"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue, biasesGPU, // source Bool.True, (IntPtr)0, (IntPtr)(sizeof(float) * nOutputUnits), biasesHost, // destination 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "clEnqueueReadBuffer biasesGPU"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish"); // Speeds are not saved. }
public override void CopyBuffersToHost() { OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue, gammaGPU, // source Bool.True, (IntPtr)0, (IntPtr)(sizeof(float) * nInputUnits), gammaHost, // destination 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "clEnqueueReadBuffer gammaGPU"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue, betaGPU, // source Bool.True, (IntPtr)0, (IntPtr)(sizeof(float) * nInputUnits), betaHost, // destination 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "clEnqueueReadBuffer betaGPU"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish"); // Gradients and speeds are not saved. }
public override void BackPropagate() { #if TIMING_LAYERS // TODO: add timer #endif OpenCLSpace.ClError = Cl.SetKernelArg(OpenCLSpace.AveragePoolingBackward, 0, inputNeurons.DeltaGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.AveragePoolingBackward, 1, outputNeurons.DeltaGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.AveragePoolingBackward, 2, (IntPtr)sizeof(int), nInputUnits); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.AveragePoolingBackward, 3, (IntPtr)sizeof(int), inputArea); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.AveragePoolingBackward, 4, (IntPtr)sizeof(int), inputDepth); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.AveragePoolingBackward, 5, (IntPtr)sizeof(int), inputNeurons.MiniBatchSize); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.SetKernelArg"); OpenCLSpace.ClError = Cl.EnqueueNDRangeKernel(OpenCLSpace.Queue, OpenCLSpace.AveragePoolingBackward, 2, null, bwdGlobalWorkSizePtr, bwdLocalWorkSizePtr, 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.EnqueueNDRangeKernel"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish"); #if TIMING_LAYERS // TODO: add timer #endif }
public override void FeedForward() { convolutionalLayer1.FeedForward(); /* * * float[] conv1outputAll = new float[convolutionalLayer1.OutputNeurons.NumberOfUnits * inputNeurons.MiniBatchSize]; * OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue, * convolutionalLayer1.OutputNeurons.ActivationsGPU, // source * Bool.True, * (IntPtr)0, * (IntPtr)(convolutionalLayer1.OutputNeurons.NumberOfUnits * inputNeurons.MiniBatchSize * sizeof(float)), * conv1outputAll, // destination * 0, * null, * out OpenCLSpace.ClEvent); * OpenCLSpace.CheckErr(OpenCLSpace.ClError, "NeuralNetwork.ForwardPass Cl.clEnqueueReadBuffer layerInput"); * * Console.WriteLine("\nConvLayer1 output activations:"); * for (int m = 0; m < inputNeurons.MiniBatchSize; m++) * { * float[] layerOutput = new float[convolutionalLayer1.OutputNeurons.NumberOfUnits]; * Array.Copy(conv1outputAll, m * convolutionalLayer1.OutputNeurons.NumberOfUnits, layerOutput, 0, convolutionalLayer1.OutputNeurons.NumberOfUnits); * * Console.WriteLine("\n --- Mini-batch item {0} -----", m); * for (int j = 0; j < layerOutput.Length; j++) * Console.Write("{0} ", layerOutput[j]); * Console.WriteLine(); * Console.ReadKey(); * } */ if (nonlinearityType == "ReLU") { nonlinearityReLU.FeedForward(); } else if (nonlinearityType == "ELU") { nonlinearityELU.FeedForward(); } /* * float[] nonlinOutputAll = new float[nonlinearity.OutputNeurons.NumberOfUnits * inputNeurons.MiniBatchSize]; * OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue, * nonlinearity.OutputNeurons.ActivationsGPU, // source * Bool.True, * (IntPtr)0, * (IntPtr)(convolutionalLayer1.OutputNeurons.NumberOfUnits * inputNeurons.MiniBatchSize * sizeof(float)), * nonlinOutputAll, // destination * 0, * null, * out OpenCLSpace.ClEvent); * OpenCLSpace.CheckErr(OpenCLSpace.ClError, "NeuralNetwork.ForwardPass Cl.clEnqueueReadBuffer layerInput"); * * Console.WriteLine("\nNonlinearity output activations:"); * for (int m = 0; m < inputNeurons.MiniBatchSize; m++) * { * float[] layerOutput = new float[nonlinearity.OutputNeurons.NumberOfUnits]; * Array.Copy(nonlinOutputAll, m * nonlinearity.OutputNeurons.NumberOfUnits, layerOutput, 0, nonlinearity.OutputNeurons.NumberOfUnits); * * Console.WriteLine("\n --- Mini-batch item {0} -----", m); * for (int j = 0; j < layerOutput.Length; j++) * Console.Write("{0} ", layerOutput[j]); * Console.WriteLine(); * Console.ReadKey(); * } */ convolutionalLayer2.FeedForward(); /* * float[] conv2outputAll = new float[convolutionalLayer2.OutputNeurons.NumberOfUnits * inputNeurons.MiniBatchSize]; * OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue, * convolutionalLayer2.OutputNeurons.ActivationsGPU, // source * Bool.True, * (IntPtr)0, * (IntPtr)(convolutionalLayer2.OutputNeurons.NumberOfUnits * inputNeurons.MiniBatchSize * sizeof(float)), * conv2outputAll, // destination * 0, * null, * out OpenCLSpace.ClEvent); * OpenCLSpace.CheckErr(OpenCLSpace.ClError, "NeuralNetwork.ForwardPass Cl.clEnqueueReadBuffer layerInput"); * * Console.WriteLine("\nConvLayer2 output activations:"); * for (int m = 0; m < inputNeurons.MiniBatchSize; m++) * { * float[] layerOutput = new float[convolutionalLayer2.OutputNeurons.NumberOfUnits]; * Array.Copy(conv2outputAll, m * convolutionalLayer2.OutputNeurons.NumberOfUnits, layerOutput, 0, convolutionalLayer2.OutputNeurons.NumberOfUnits); * * Console.WriteLine("\n --- Mini-batch item {0} -----", m); * for (int j = 0; j < layerOutput.Length; j++) * Console.Write("{0} ", layerOutput[j]); * Console.WriteLine(); * Console.ReadKey(); * } */ // Additionally, cumulate inputs onto outputs OpenCLSpace.ClError = Cl.SetKernelArg(OpenCLSpace.SkipForward, 0, outputNeurons.ActivationsGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.SkipForward, 1, inputNeurons.ActivationsGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.SkipForward, 2, (IntPtr)sizeof(int), nInputUnits); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.SkipForward, 3, (IntPtr)sizeof(int), inputNeurons.MiniBatchSize); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.SetKernelArg"); // Run kernel OpenCLSpace.ClError = Cl.EnqueueNDRangeKernel(OpenCLSpace.Queue, OpenCLSpace.SkipForward, 1, null, globalWorkSizePtr, localWorkSizePtr, 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.EnqueueNDRangeKernel"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish"); }
public override void UpdateParameters(double weightMaxNorm) { #if TIMING_LAYERS Utils.FCUpdateParametersTimer.Start(); #endif #if OPENCL_ENABLED // Set kernel arguments OpenCLSpace.ClError = Cl.SetKernelArg(OpenCLSpace.FCUpdateParameters, 0, weightsGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.FCUpdateParameters, 1, biasesGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.FCUpdateParameters, 2, weightsSpeedGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.FCUpdateParameters, 3, biasesSpeedGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.FCUpdateParameters, 4, (IntPtr)sizeof(int), nInputUnits); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.FCUpdateParameters, 5, (IntPtr)sizeof(int), nOutputUnits); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "FullyConnected.UpdateParameters(): Cl.SetKernelArg"); // Run kernel OpenCLSpace.ClError = Cl.EnqueueNDRangeKernel(OpenCLSpace.Queue, OpenCLSpace.FCUpdateParameters, 2, null, updateGlobalWorkSizePtr, updateLocalWorkSizePtr, 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "FullyConnected.UpdateParameters(): Cl.EnqueueNDRangeKernel"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); // Now constrain norm of each weight vector if (!double.IsInfinity(weightMaxNorm)) { // Set kernel arguments OpenCLSpace.ClError = Cl.SetKernelArg(OpenCLSpace.FCConstrainWeightNorm, 0, weightsGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.FCConstrainWeightNorm, 1, (IntPtr)sizeof(int), nOutputUnits); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.FCConstrainWeightNorm, 2, (IntPtr)sizeof(int), nInputUnits); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.FCConstrainWeightNorm, 3, (IntPtr)sizeof(float), (float)weightMaxNorm); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "FCConstrainWeightNorm(): Cl.SetKernelArg"); // Run kernel OpenCLSpace.ClError = Cl.EnqueueNDRangeKernel(OpenCLSpace.Queue, OpenCLSpace.FCConstrainWeightNorm, 1, null, constrainNormGlobalWorkSizePtr, constrainNormLocalWorkSizePtr, 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "FCConstrainWeightNorm(): Cl.EnqueueNDRangeKernel"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); } OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish"); #else for (int i = 0; i < nOutputUnits; i++) { // weights update for (int j = 0; j < nInputUnits; j++) { weights[i, j] += weightsUpdateSpeed[i, j]; } // update biases biases[i] += biasesUpdateSpeed[i]; } #endif #if TIMING_LAYERS Utils.FCUpdateParametersTimer.Stop(); #endif }
static void Main(string[] args) { string dirPath = "C:/Users/jacopo/Dropbox/Chalmers/MSc thesis"; /***************************************************** * (0) Setup OpenCL ****************************************************/ Console.WriteLine("\n========================================="); Console.WriteLine(" OpenCL setup"); Console.WriteLine("=========================================\n"); OpenCLSpace.SetupSpace(4); OpenCLSpace.KernelsPath = dirPath + "/ConvDotNet/Kernels"; OpenCLSpace.LoadKernels(); /***************************************************** * (1) Load data ******************************************************/ Console.WriteLine("\n========================================="); Console.WriteLine(" Importing data"); Console.WriteLine("=========================================\n"); // GTSRB greyscale test set 1 DataSet testSetGS1 = new DataSet(43); string GTSRBtestDataGS1 = dirPath + "/GTSRB/Preprocessed/14_test_images.dat"; string GTSRBtestLabelsGS1 = dirPath + "/GTSRB/Preprocessed/test_labels_full.dat"; Console.WriteLine("Importing test set (grayscale 1)..."); testSetGS1.ReadData(GTSRBtestDataGS1, GTSRBtestLabelsGS1); /* * // GTSRB greyscale test set 2 * DataSet testSetGS2 = new DataSet(43); * string GTSRBtestDataGS2 = dirPath + "/GTSRB/Preprocessed/18_test_images.dat"; * string GTSRBtestLabelsGS2 = dirPath + "/GTSRB/Preprocessed/test_labels_full.dat"; * Console.WriteLine("Importing test set (grayscale 2)..."); * testSetGS2.ReadData(GTSRBtestDataGS2); * testSetGS2.ReadLabels(GTSRBtestLabelsGS2); */ // GTSRB RGB test set 1 DataSet testSetRGB1 = new DataSet(43); string GTSRBtestDataRGB1 = dirPath + "/GTSRB/Preprocessed/16_test_images.dat"; string GTSRBtestLabelsRGB1 = dirPath + "/GTSRB/Preprocessed/test_labels_full.dat"; Console.WriteLine("Importing test set (RGB 1)..."); testSetRGB1.ReadData(GTSRBtestDataRGB1, GTSRBtestLabelsRGB1); /* * // GTSRB RGB test set 2 * DataSet testSetRGB2 = new DataSet(43); * string GTSRBtestDataRGB2 = dirPath + "/GTSRB/Preprocessed/20_test_images.dat"; * string GTSRBtestLabelsRGB2 = dirPath + "/GTSRB/Preprocessed/test_labels_full.dat"; * Console.WriteLine("Importing test set (RGB 2)..."); * testSetRGB2.ReadData(GTSRBtestDataRGB2); * testSetRGB2.ReadLabels(GTSRBtestLabelsRGB2); */ /***************************************************** * (2) Evaluate ensemble of networks *****************************************************/ List <NeuralNetwork> networkEnsemble = new List <NeuralNetwork>(); networkEnsemble.Add(Utils.LoadNetworkFromFile(dirPath + "/Results/Networks/", "FIXED_LeNet_GS_DropoutFC")); networkEnsemble.Add(Utils.LoadNetworkFromFile(dirPath + "/Results/Networks/", "FIXED_LeNet_RGB_DropoutFC")); //networkEnsemble.Add(Utils.LoadNetworkFromFile(dirPath + "/Results/Networks/", "LeNet_GSb_DropoutFC")); //networkEnsemble.Add(Utils.LoadNetworkFromFile(dirPath + "/Results/Networks/", "LeNet_RGBb_Dropout")); networkEnsemble.Add(Utils.LoadNetworkFromFile(dirPath + "/Results/Networks/", "FIXED_VGGv2_GS_DropoutFC")); networkEnsemble.Add(Utils.LoadNetworkFromFile(dirPath + "/Results/Networks/", "FIXED_VGGv2_RGB_DropoutFC")); //networkEnsemble.Add(Utils.LoadNetworkFromFile(dirPath + "/Results/Networks/", "VGG_GSb_DropoutFC")); //networkEnsemble.Add(Utils.LoadNetworkFromFile(dirPath + "/Results/Networks/", "VGG_RGBb_Dropout")); double error = 0.0; Console.WriteLine("\nEvaluating an ensemble of {0} networks...", networkEnsemble.Count); NetworkEvaluator.EvaluateEnsemble(networkEnsemble, testSetGS1, testSetRGB1, 64, out error); Console.WriteLine("\n\tTest set error = {0}\n\tAccuracy = {1}", error, 100 * (1 - error)); }
public override void SetupOutput() { // Check arguments _______________________________________________________________________________________ if (inputHeight != inputWidth) { throw new ArgumentException("MaxPooling currently only supports spatially square input."); } if (inputWidth % poolWidth != 0) { throw new ArgumentException("Cannot apply max pooling to input: pooling width and stride do not fit input width!"); } // Setup output __________________________________________________________________________________________ this.outputWidth = (inputWidth - poolWidth) / stride + 1; this.outputHeight = (inputHeight - poolWidth) / stride + 1; this.outputDepth = inputDepth; this.nOutputUnits = outputWidth * outputHeight * outputDepth; this.outputNeurons = new Neurons(nOutputUnits); // Initialize and create auxiliary structures ____________________________________________________________ #if OPENCL_ENABLED // Pooling table this.poolingTableGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite, (IntPtr)(sizeof(int) * 4 * outputHeight * outputWidth), out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.CreateBuffer poolingTableGPU"); OpenCLSpace.WipeBuffer(poolingTableGPU, 4 * outputHeight * outputWidth, typeof(int)); OpenCLSpace.ClError = Cl.SetKernelArg(OpenCLSpace.CreateMaxPoolingTable, 0, poolingTableGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.CreateMaxPoolingTable, 1, (IntPtr)sizeof(int), stride); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.CreateMaxPoolingTable, 2, (IntPtr)sizeof(int), inputWidth); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.CreateMaxPoolingTable, 3, (IntPtr)sizeof(int), outputWidth); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.SetKernelArg CreatePoolingTable"); OpenCLSpace.ClError = Cl.EnqueueNDRangeKernel(OpenCLSpace.Queue, OpenCLSpace.CreateMaxPoolingTable, 1, null, new IntPtr[] { (IntPtr)(32 * Math.Ceiling((double)(nOutputUnits * inputNeurons.MiniBatchSize) / (double)32)) }, new IntPtr[] { (IntPtr)32 }, 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.EnqueueNDRangeKernel CreatePoolingTable"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); // Switches this.switchesGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite, (IntPtr)(sizeof(bool) * nInputUnits * inputNeurons.MiniBatchSize), out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.CreateBuffer switchesGPU"); OpenCLSpace.WipeBuffer(switchesGPU, nInputUnits * inputNeurons.MiniBatchSize, typeof(bool)); #else //TODO: create poolingTable and switches on cpu #endif }
public void ForwardPass(object StartPoint, object EndPoint) { int iStartLayer, iEndLayer; if (StartPoint.GetType() == typeof(string)) { if (StartPoint.ToString() == "beginning") { iStartLayer = 1; } else { throw new ArgumentException("First argument: pass either ''beginning'', or an integer corresponding to starting layer."); } } else if (StartPoint.GetType() == typeof(int)) { iStartLayer = (int)StartPoint; } else { throw new ArgumentException("First argument <StartPoint> is invalid."); } if (EndPoint.GetType() == typeof(string)) { if (EndPoint.ToString() == "end") { iEndLayer = nLayers; } else { throw new ArgumentException("Second argument: pass either ''end'', or an integer corresponding to end layer."); } } else if (EndPoint.GetType() == typeof(int)) { iEndLayer = (int)EndPoint; } else { throw new ArgumentException("Second argument <EndPoint> is invalid."); } // Run network forward for (int l = iStartLayer; l < iEndLayer; l++) { #if DEBUGGING_STEPBYSTEP /* ------------------------- DEBUGGING ---------------------------------------------*/ int miniBatchSize = layers[0].OutputNeurons.MiniBatchSize; if (l < nLayers - 1) { float[] layerInputAll = new float[layers[l].InputNeurons.NumberOfUnits * miniBatchSize]; OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue, layers[l].InputNeurons.ActivationsGPU, // source Bool.True, (IntPtr)0, (IntPtr)(layers[l].InputNeurons.NumberOfUnits * miniBatchSize * sizeof(float)), layerInputAll, // destination 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "NeuralNetwork.ForwardPass Cl.clEnqueueReadBuffer layerInput"); // Display input layer-by-layer Console.WriteLine("\nLayer {0} ({1}) input activations:", l, layers[l].Type); for (int m = 0; m < miniBatchSize; m++) { float[] layerInput = new float[layers[l].InputNeurons.NumberOfUnits]; Array.Copy(layerInputAll, m * layers[l].InputNeurons.NumberOfUnits, layerInput, 0, layers[l].InputNeurons.NumberOfUnits); Console.WriteLine("\n --- Mini-batch item {0} -----", m); for (int j = 0; j < layerInput.Length; j++) { Console.Write("{0} ", layerInput[j]); } Console.WriteLine(); Console.ReadKey(); } } /* ------------------------- END DEBUGGING --------------------------------------------- */ #endif layers[l].FeedForward(); #if DEBUGGING_STEPBYSTEP /* ------------------------- DEBUGGING --------------------------------------------- */ // Display output layer-by-layer //int miniBatchSize = layers[0].OutputNeurons.MiniBatchSize; if (l < nLayers - 1) { float[] layerOutputAll = new float[layers[l].OutputNeurons.NumberOfUnits * miniBatchSize]; OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue, layers[l].OutputNeurons.ActivationsGPU, // source Bool.True, (IntPtr)0, (IntPtr)(layers[l].OutputNeurons.NumberOfUnits * miniBatchSize * sizeof(float)), layerOutputAll, // destination 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "NeuralNetwork.ForwardPass Cl.clEnqueueReadBuffer layerInput"); Console.WriteLine("\nLayer {0} ({1}) output activations:", l, layers[l].Type); for (int m = 0; m < miniBatchSize; m++) { float[] layerOutput = new float[layers[l].OutputNeurons.NumberOfUnits]; Array.Copy(layerOutputAll, m * layers[l].OutputNeurons.NumberOfUnits, layerOutput, 0, layers[l].OutputNeurons.NumberOfUnits); Console.WriteLine("\n --- Mini-batch item {0} -----", m); for (int j = 0; j < layerOutput.Length; j++) { Console.Write("{0} ", layerOutput[j]); } Console.WriteLine(); Console.ReadKey(); } } /* ------------------------- END DEBUGGING --------------------------------------------- */ #endif } /* * using (System.IO.StreamWriter classScoresFile = new System.IO.StreamWriter(@"C:\Users\jacopo\Desktop\ClassScores_08.txt", true)) * { * * for (int m = 0; m < layers[0].OutputNeurons.MiniBatchSize; m++) * { * double[] outputScores = outputLayer.OutputClassScores[m]; * * for (int j = 0; j < outputScores.Length; j++) * classScoresFile.Write(outputScores[j].ToString() + "\t"); * classScoresFile.WriteLine(); * } * } */ #if DEBUGGING_STEPBYSTEP Console.WriteLine("Class scores (softmax activation):"); for (int m = 0; m < layers[0].OutputNeurons.MiniBatchSize; m++) { double[] outputScores = outputLayer.OutputClassScores[m]; Console.WriteLine("\n --- Mini-batch item {0} -----", m); for (int j = 0; j < outputScores.Length; j++) { Console.Write("{0} ", (float)outputScores[j]); } Console.WriteLine(); Console.ReadKey(); } #endif }
public override void InitializeParameters(string Option) { base.InitializeParameters(Option); // makes sure this method is only call AFTER "SetupOutput()" if (Option == "random") // sample new parameters { // WEIGHTS are initialized as normally distributed numbers with mean 0 and std equals to sqrt(2/nInputUnits) // BIASES are initialized to a small positive number, e.g. 0.001 this.weightsHost = new float[nOutputUnits * nInputUnits]; this.biasesHost = new float[nOutputUnits]; double weightsStdDev = Math.Sqrt(2.0 / (10 * nInputUnits)); double uniformRand1; double uniformRand2; double tmp; for (int iRow = 0; iRow < nOutputUnits; iRow++) { for (int iCol = 0; iCol < nInputUnits; iCol++) { uniformRand1 = Global.rng.NextDouble(); uniformRand2 = Global.rng.NextDouble(); // Use a Box-Muller transform to get a random normal(0,1) tmp = Math.Sqrt(-2.0 * Math.Log(uniformRand1)) * Math.Sin(2.0 * Math.PI * uniformRand2); tmp = weightsStdDev * tmp; // rescale weightsHost[iRow * nInputUnits + iCol] = (float)tmp; } biasesHost[iRow] = 0.00f; } } // else Option must be ''load'' => do not sample parameters, just load them from host to device int weightBufferSize = sizeof(float) * (outputNeurons.NumberOfUnits * inputNeurons.NumberOfUnits); int biasesBufferSize = sizeof(float) * outputNeurons.NumberOfUnits; this.weightsGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite | MemFlags.CopyHostPtr, (IntPtr)weightBufferSize, weightsHost, out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.CreateBuffer"); this.biasesGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite | MemFlags.CopyHostPtr, (IntPtr)biasesBufferSize, biasesHost, out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.CreateBuffer"); // Also create weightsGradients and biasesGradients buffers and initialize them to zero this.weightsGradientsGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite, (IntPtr)weightBufferSize, out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.CreateBuffer"); OpenCLSpace.WipeBuffer(weightsGradientsGPU, (nInputUnits * nOutputUnits), typeof(float)); this.biasesGradientsGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite, (IntPtr)biasesBufferSize, out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.CreateBuffer"); OpenCLSpace.WipeBuffer(biasesGradientsGPU, nOutputUnits, typeof(float)); // Also create weightsSpeed and biasesSpeed buffers and initialize them to zero this.weightsSpeedGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite, (IntPtr)weightBufferSize, out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.CreateBuffer"); OpenCLSpace.WipeBuffer(weightsSpeedGPU, (nInputUnits * nOutputUnits), typeof(float)); this.biasesSpeedGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite, (IntPtr)biasesBufferSize, out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.CreateBuffer"); OpenCLSpace.WipeBuffer(biasesSpeedGPU, nOutputUnits, typeof(float)); }
public void SaveWeights(string whichLayer, string outputDirPath) { int n; if (whichLayer == "all") { n = nLayers; } else if (whichLayer == "first") { n = 1; } else { throw new ArgumentException("First argument must be either ''first'' or ''all''"); } for (int iLayer = 1; iLayer <= n; ++iLayer) { if (layers[iLayer].Type == "Convolutional") { string outputFilePath = outputDirPath + name + "_layer" + iLayer.ToString() + "_convolutional_filters.txt"; Mem filtersGPU = layers[iLayer].WeightsGPU; int nFilters = layers[iLayer].OutputDepth; int inputDepth = layers[iLayer].InputDepth; int filterSize = layers[iLayer].FilterSize; int nParameters = nFilters * inputDepth * filterSize * filterSize; float[] filters = new float[nParameters]; OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue, filtersGPU, // source Bool.True, (IntPtr)0, (IntPtr)(sizeof(float) * nParameters), filters, // destination 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "clEnqueueReadBuffer filtersGPU"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish"); using (System.IO.StreamWriter outputFile = new System.IO.StreamWriter(outputFilePath)) { foreach (float filterValue in filters) { outputFile.WriteLine(filterValue.ToString()); } Console.WriteLine("Weights of layer " + iLayer.ToString() + " (convolutional) saved to file" + outputFilePath); } } else if (layers[iLayer].Type == "FullyConnected") { string outputFilePath = outputDirPath + name + "_layer" + iLayer.ToString() + "_fullyConnected_weights.txt"; Mem weightsGPU = layers[iLayer].WeightsGPU; int nOutputUnits = layers[iLayer].NOutputUnits; int nInputUnits = layers[iLayer].NInputUnits; int nParameters = nOutputUnits * nInputUnits; float[] weights = new float[nParameters]; OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue, weightsGPU, // source Bool.True, (IntPtr)0, (IntPtr)(sizeof(float) * nParameters), weights, // destination 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "clEnqueueReadBuffer weightsGPU"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish"); using (System.IO.StreamWriter outputFile = new System.IO.StreamWriter(outputFilePath)) { foreach (float weightValue in weights) { outputFile.WriteLine(weightValue.ToString()); } Console.WriteLine("Weights of layer " + iLayer.ToString() + " (fully connected) saved to file" + outputFilePath); } } } }
public override void InitializeParameters(string Option) { this.iCumulativeAverage = 0; this.isEpochBeginning = true; this.isTraining = true; this.isPreInference = false; this.isInference = false; if (Option == "random") // initialize parameters on host { // Gamma parameters are initialized to one gammaHost = new float[nInputUnits]; for (int i = 0; i < nInputUnits; ++i) { gammaHost[i] = 1.0f; } // And beta parameters to zero betaHost = new float[nInputUnits]; } // else Option must be ''load'' => do not initialized parameters, just load them from host to device // Tranfer parameters to device this.gammaGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite | MemFlags.CopyHostPtr, (IntPtr)(sizeof(float) * nInputUnits), gammaHost, out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer"); this.betaGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite | MemFlags.CopyHostPtr, (IntPtr)(sizeof(float) * nInputUnits), betaHost, out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer"); // Also create buffers for parameter gradients this.deltaGammaGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite, (IntPtr)(sizeof(float) * nInputUnits), out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer"); OpenCLSpace.WipeBuffer(deltaGammaGPU, nInputUnits, typeof(float)); this.deltaBetaGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite, (IntPtr)(sizeof(float) * nInputUnits), out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer"); OpenCLSpace.WipeBuffer(deltaBetaGPU, nInputUnits, typeof(float)); // And for parameter update speed this.gammaSpeedGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite, (IntPtr)(sizeof(float) * nInputUnits), out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer"); OpenCLSpace.WipeBuffer(gammaSpeedGPU, nInputUnits, typeof(float)); this.betaSpeedGPU = (Mem)Cl.CreateBuffer(OpenCLSpace.Context, MemFlags.ReadWrite, (IntPtr)(sizeof(float) * nInputUnits), out OpenCLSpace.ClError); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InitializeParameters(): Cl.CreateBuffer"); OpenCLSpace.WipeBuffer(betaSpeedGPU, nInputUnits, typeof(float)); }
public void FeedData(DataSet dataSet, int[] iExamples) { #if TIMING_LAYERS Utils.InputFeedTimer.Start(); #endif int dataPointSize = dataSet.DataDimension; for (int m = 0; m < outputNeurons.MiniBatchSize; m++) { #if OPENCL_ENABLED int iDataPoint = iExamples[m]; OpenCLSpace.ClError = Cl.EnqueueCopyBuffer(OpenCLSpace.Queue, dataSet.DataContainer[iDataPoint].Data, // source outputNeurons.ActivationsGPU, // destination (IntPtr)0, // source offset (in bytes) (IntPtr)(sizeof(float) * m * dataPointSize), // destination offset (in bytes) (IntPtr)(sizeof(float) * dataPointSize), // size of buffer to copy 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InputLayer.FeedData Cl.EnqueueCopyBuffer inputData"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); // Dropout! if (dropoutParameter < 1.0) { // Set kernel arguments OpenCLSpace.ClError = Cl.SetKernelArg(OpenCLSpace.InputDropout, 0, outputNeurons.ActivationsGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.InputDropout, 1, (IntPtr)sizeof(int), nOutputUnits * outputNeurons.MiniBatchSize); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.InputDropout, 2, (IntPtr)sizeof(float), (float)dropoutParameter); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.InputDropout, 3, (IntPtr)sizeof(ulong), (ulong)Guid.NewGuid().GetHashCode()); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InputDropout: Cl.SetKernelArg"); // Run kernel OpenCLSpace.ClError = Cl.EnqueueNDRangeKernel(OpenCLSpace.Queue, OpenCLSpace.InputDropout, 1, null, dropoutGlobalWorkSizePtr, dropoutLocalWorkSizePtr, 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "InputDropout: Cl.EnqueueNDRangeKernel"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish"); } #else outputNeurons.SetHost(m, dataSet.Data[iExamples[m]]); #endif } #if OPENCL_ENABLED OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish"); #endif #if TIMING_LAYERS Utils.InputFeedTimer.Stop(); #endif }
public override void UpdateParameters(double weightDecayCoeff) { #if TIMING_LAYERS Utils.BNFCUpdateParametersTimer.Start(); #endif OpenCLSpace.ClError = Cl.SetKernelArg(OpenCLSpace.BNFCUpdateParameters, 0, gammaGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNFCUpdateParameters, 1, betaGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNFCUpdateParameters, 2, gammaSpeedGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNFCUpdateParameters, 3, betaSpeedGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNFCUpdateParameters, 4, (IntPtr)sizeof(int), nInputUnits); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.SetKernelArg"); OpenCLSpace.ClError = Cl.EnqueueNDRangeKernel(OpenCLSpace.Queue, OpenCLSpace.BNFCUpdateParameters, 1, null, nUnitsGlobalWorkSizePtr, optimalLocalWorkSizePtr, 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.EnqueueNDRangeKernel"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish"); /* ------------------------- DEBUGGING --------------------------------------------- * * // Display gamma * float[] gamma = new float[nInputUnits]; * * OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue, * gammaGPU, // source * Bool.True, * (IntPtr)0, * (IntPtr)(nInputUnits * sizeof(float)), * gamma, // destination * 0, * null, * out OpenCLSpace.ClEvent); * OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.clEnqueueReadBuffer"); * * Console.WriteLine("\n\nUpdated gammas are:\n"); * for (int i = 0; i < nInputUnits; i++) * Console.Write("{0} ", gamma[i]); * //Console.ReadKey(); * * // Display beta * float[] beta = new float[nInputUnits]; * * OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue, * betaGPU, // source * Bool.True, * (IntPtr)0, * (IntPtr)(nInputUnits * sizeof(float)), * beta, // destination * 0, * null, * out OpenCLSpace.ClEvent); * OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.clEnqueueReadBuffer"); * * Console.WriteLine("\n\nUpdated betas are:\n"); * for (int i = 0; i < nInputUnits; i++) * Console.Write("{0} ", beta[i]); * Console.ReadKey(); * * * /* ------------------------- END DEBUGGING --------------------------------------------- */ #if TIMING_LAYERS Utils.BNFCUpdateParametersTimer.Stop(); #endif }
public override void UpdateSpeeds(double learningRate, double momentumMultiplier) { #if TIMING_LAYERS Utils.BNFCUpdateSpeedsTimer.Stop(); #endif OpenCLSpace.ClError = Cl.SetKernelArg(OpenCLSpace.BNFCUpdateSpeeds, 0, gammaSpeedGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNFCUpdateSpeeds, 1, betaSpeedGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNFCUpdateSpeeds, 2, outputNeurons.DeltaGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNFCUpdateSpeeds, 3, normalizedInputGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNFCUpdateSpeeds, 4, deltaGammaGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNFCUpdateSpeeds, 5, deltaBetaGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNFCUpdateSpeeds, 6, (IntPtr)sizeof(int), nInputUnits); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNFCUpdateSpeeds, 7, (IntPtr)sizeof(int), inputNeurons.MiniBatchSize); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNFCUpdateSpeeds, 8, (IntPtr)sizeof(float), (float)momentumMultiplier); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNFCUpdateSpeeds, 9, (IntPtr)sizeof(float), (float)learningRate); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.SetKernelArg"); OpenCLSpace.ClError = Cl.EnqueueNDRangeKernel(OpenCLSpace.Queue, OpenCLSpace.BNFCUpdateSpeeds, 1, null, nUnitsGlobalWorkSizePtr, optimalLocalWorkSizePtr, 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.EnqueueNDRangeKernel"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish"); /* ------------------------- DEBUGGING --------------------------------------------- * * // Display gamma gradient * * float[] deltaGgamma = new float[nInputUnits]; * * OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue, * deltaGammaGPU, // source * Bool.True, * (IntPtr)0, * (IntPtr)(nInputUnits * sizeof(float)), * deltaGgamma, // destination * 0, * null, * out OpenCLSpace.ClEvent); * OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.clEnqueueReadBuffer"); * * Console.WriteLine("\nGradient wrt gamma:\n"); * for (int i = 0; i < nInputUnits; i++) * Console.Write("{0} ", deltaGgamma[i]); * //Console.ReadKey(); * * // Display beta * float[] deltaBeta = new float[nInputUnits]; * * OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue, * deltaBetaGPU, // source * Bool.True, * (IntPtr)0, * (IntPtr)(nInputUnits * sizeof(float)), * deltaBeta, // destination * 0, * null, * out OpenCLSpace.ClEvent); * OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.clEnqueueReadBuffer"); * * Console.WriteLine("\n\nGradient wrt beta:\n"); * for (int i = 0; i < nInputUnits; i++) * Console.Write("{0} ", deltaBeta[i]); * Console.ReadKey(); * * * /* ------------------------- END DEBUGGING --------------------------------------------- */ #if TIMING_LAYERS Utils.BNFCUpdateSpeedsTimer.Stop(); #endif }
static void Main(string[] args) { /***************************************************** * (0) Setup OpenCL ****************************************************/ Console.WriteLine("\n========================================="); Console.WriteLine(" OpenCL setup"); Console.WriteLine("=========================================\n"); OpenCLSpace.SetupSpace(4); OpenCLSpace.KernelsPath = "../../../Kernels"; OpenCLSpace.LoadKernels(); /***************************************************** * (2) Load data ****************************************************/ Console.WriteLine("\n========================================="); Console.WriteLine(" Importing data"); Console.WriteLine("=========================================\n"); // GTSRB training set string GTSRBtrainingDataGS = "../../../../GTSRB/Preprocessed/14_training_images.dat"; string GTSRBtrainingLabelsGS = "../../../../GTSRB/Preprocessed/14_training_classes.dat"; // GTSRB validation set (grayscale) string GTSRBvalidationDataGS = "../../../../GTSRB/Preprocessed/14_validation_images.dat"; string GTSRBvalidationLabelsGS = "../../../../GTSRB/Preprocessed/14_validation_classes.dat"; // GTSRB test set (grayscale) string GTSRBtestDataGS = "../../../../GTSRB/Preprocessed/14_test_images.dat"; string GTSRBtestLabelsGS = "../../../../GTSRB/Preprocessed/test_labels_full.dat"; Console.WriteLine("Importing training set..."); DataSet trainingSet = new DataSet(43); trainingSet.ReadData(GTSRBtrainingDataGS, GTSRBtrainingLabelsGS); Console.WriteLine("Importing validation set..."); DataSet validationSet = new DataSet(43); validationSet.ReadData(GTSRBvalidationDataGS, GTSRBvalidationLabelsGS); Console.WriteLine("Importing test set..."); DataSet testSet = new DataSet(43); testSet.ReadData(GTSRBtestDataGS, GTSRBtestLabelsGS); /***************************************************** * (1) Instantiate a neural network and add layers * * OPTIONS: * ConvolutionalLayer(filterSize, numberOfFilters, strideLength, zeroPadding) * FullyConnectedLayer(numberOfUnits) * MaxPooling(2, 2) * ReLU() * ELU(alpha) * SoftMax() ****************************************************/ double[] eta = { 1e-2, 3e-3, 1e-3, 3e-4, 1e-4, 3e-5, 1e-5, 3e-6, 1e-6, 3e-7, 1e-7 }; for (int iEta = 0; iEta < eta.Length; iEta++) { Console.WriteLine("\n\n\n New learning rate = {0}", eta[iEta]); Console.WriteLine("\n========================================="); Console.WriteLine(" Neural network creation"); Console.WriteLine("=========================================\n"); // OPTION 1: Create a new network NeuralNetwork network = new NeuralNetwork("EtaTest_VGG_ReLU"); network.AddLayer(new InputLayer(1, 32, 32)); network.AddLayer(new ConvolutionalLayer(3, 32, 1, 1)); network.AddLayer(new ReLU()); network.AddLayer(new ConvolutionalLayer(3, 32, 1, 1)); network.AddLayer(new ReLU()); network.AddLayer(new MaxPooling(2, 2)); network.AddLayer(new ConvolutionalLayer(3, 64, 1, 1)); network.AddLayer(new ReLU()); network.AddLayer(new ConvolutionalLayer(3, 64, 1, 1)); network.AddLayer(new ReLU()); network.AddLayer(new MaxPooling(2, 2)); network.AddLayer(new ConvolutionalLayer(3, 128, 1, 1)); network.AddLayer(new ReLU()); network.AddLayer(new ConvolutionalLayer(3, 128, 1, 1)); network.AddLayer(new ReLU()); network.AddLayer(new MaxPooling(2, 2)); network.AddLayer(new FullyConnectedLayer(128)); network.AddLayer(new ReLU()); network.AddLayer(new FullyConnectedLayer(128)); network.AddLayer(new ReLU()); network.AddLayer(new FullyConnectedLayer(43)); network.AddLayer(new SoftMax()); NetworkTrainer.TrainingMode = "new"; /***************************************************** * (4) Train network ******************************************************/ Console.WriteLine("\n========================================="); Console.WriteLine(" Network training"); Console.WriteLine("=========================================\n"); // Set output files save paths string trainingSavePath = "../../../../Results/LossError/"; NetworkTrainer.TrainingEpochSavePath = trainingSavePath + network.Name + "_trainingEpochs.txt"; NetworkTrainer.ValidationEpochSavePath = trainingSavePath + network.Name + "_validationEpochs.txt"; NetworkTrainer.NetworkOutputFilePath = "../../../../Results/Networks/"; NetworkTrainer.MomentumMultiplier = 0.9; NetworkTrainer.WeightDecayCoeff = 0.000; NetworkTrainer.MaxTrainingEpochs = 1; NetworkTrainer.EpochsBeforeRegularization = 0; NetworkTrainer.MiniBatchSize = 64; NetworkTrainer.ConsoleOutputLag = 1; // 1 = print every epoch, N = print every N epochs NetworkTrainer.EvaluateBeforeTraining = true; NetworkTrainer.DropoutFullyConnected = 1.0; NetworkTrainer.DropoutConvolutional = 1.0; NetworkTrainer.DropoutInput = 1.0; NetworkTrainer.Patience = 1000; NetworkTrainer.LearningRateDecayFactor = Math.Sqrt(10.0); NetworkTrainer.MaxConsecutiveAnnealings = 3; NetworkTrainer.WeightMaxNorm = Double.PositiveInfinity; NetworkTrainer.LearningRate = eta[iEta]; NetworkTrainer.Train(network, trainingSet, null); network = null; GC.Collect(); } // VGG_ELU ____________________________________________________________________________________________________ for (int iEta = 0; iEta < eta.Length; iEta++) { Console.WriteLine("\n\n\n New learning rate = {0}", eta[iEta]); Console.WriteLine("\n========================================="); Console.WriteLine(" Neural network creation"); Console.WriteLine("=========================================\n"); // OPTION 1: Create a new network NeuralNetwork network = new NeuralNetwork("EtaTest_VGG_ELU"); network.AddLayer(new InputLayer(1, 32, 32)); network.AddLayer(new ConvolutionalLayer(3, 32, 1, 1)); network.AddLayer(new ELU(1.0f)); network.AddLayer(new ConvolutionalLayer(3, 32, 1, 1)); network.AddLayer(new ELU(1.0f)); network.AddLayer(new MaxPooling(2, 2)); network.AddLayer(new ConvolutionalLayer(3, 64, 1, 1)); network.AddLayer(new ELU(1.0f)); network.AddLayer(new ConvolutionalLayer(3, 64, 1, 1)); network.AddLayer(new ELU(1.0f)); network.AddLayer(new MaxPooling(2, 2)); network.AddLayer(new ConvolutionalLayer(3, 128, 1, 1)); network.AddLayer(new ELU(1.0f)); network.AddLayer(new ConvolutionalLayer(3, 128, 1, 1)); network.AddLayer(new ELU(1.0f)); network.AddLayer(new MaxPooling(2, 2)); network.AddLayer(new FullyConnectedLayer(128)); network.AddLayer(new ELU(1.0f)); network.AddLayer(new FullyConnectedLayer(128)); network.AddLayer(new ELU(1.0f)); network.AddLayer(new FullyConnectedLayer(43)); network.AddLayer(new SoftMax()); NetworkTrainer.TrainingMode = "new"; /***************************************************** * (3) Gradient check ****************\********************************* * GradientChecker.Check(network, validationSet); * * * * * * /***************************************************** * (4) Train network ***************************************************** * * Console.WriteLine("\n========================================="); * Console.WriteLine(" Network training"); * Console.WriteLine("=========================================\n"); * * // Set output files save paths * string trainingSavePath = "../../../../Results/LossError/"; * NetworkTrainer.TrainingEpochSavePath = trainingSavePath + network.Name + "_trainingEpochs.txt"; * NetworkTrainer.ValidationEpochSavePath = trainingSavePath + network.Name + "_validationEpochs.txt"; * NetworkTrainer.NetworkOutputFilePath = "../../../../Results/Networks/"; * * NetworkTrainer.MomentumMultiplier = 0.9; * NetworkTrainer.WeightDecayCoeff = 0.0; * NetworkTrainer.MaxTrainingEpochs = 1; * NetworkTrainer.EpochsBeforeRegularization = 0; * NetworkTrainer.MiniBatchSize = 64; * NetworkTrainer.ConsoleOutputLag = 1; // 1 = print every epoch, N = print every N epochs * NetworkTrainer.EvaluateBeforeTraining = true; * NetworkTrainer.DropoutFullyConnected = 1.0; * NetworkTrainer.DropoutConvolutional = 1.0; * NetworkTrainer.Patience = 20; * * NetworkTrainer.LearningRate = eta[iEta]; * NetworkTrainer.Train(network, trainingSet, null); * } * * * */ // RESNET_RELU ____________________________________________________________________________________________________ /* * for (int iEta = 0; iEta < eta.Length; iEta++) * { * Console.WriteLine("\n\n\n New learning rate = {0}", eta[iEta]); * * * * Console.WriteLine("\n========================================="); * Console.WriteLine(" Neural network creation"); * Console.WriteLine("=========================================\n"); * * // OPTION 1: Create a new network * * NeuralNetwork network = new NeuralNetwork("EtaTest_ResNet_ReLU"); * * network.AddLayer(new InputLayer(1, 32, 32)); * * network.AddLayer(new ConvolutionalLayer(3, 32, 1, 1)); * network.AddLayer(new ReLU()); * * network.AddLayer(new ResidualModule(3, 32, 1, 1, "ReLU")); * network.AddLayer(new ReLU()); * * network.AddLayer(new ConvolutionalLayer(3, 64, 2, 1)); // downsampling * * network.AddLayer(new ResidualModule(3, 64, 1, 1, "ReLU")); * network.AddLayer(new ReLU()); * * network.AddLayer(new ConvolutionalLayer(3, 128, 2, 1)); // downsampling * * network.AddLayer(new ResidualModule(3, 128, 1, 1, "ReLU")); * network.AddLayer(new ReLU()); * * network.AddLayer(new AveragePooling()); * * network.AddLayer(new FullyConnectedLayer(43)); * network.AddLayer(new SoftMax()); * * NetworkTrainer.TrainingMode = "new"; * * * /***************************************************** * (3) Gradient check ****************\********************************* * GradientChecker.Check(network, validationSet); * * * * * * /***************************************************** * (4) Train network *****************************************************/ /* * Console.WriteLine("\n========================================="); * Console.WriteLine(" Network training"); * Console.WriteLine("=========================================\n"); * * // Set output files save paths * string trainingSavePath = "../../../../Results/LossError/"; * NetworkTrainer.TrainingEpochSavePath = trainingSavePath + network.Name + "_trainingEpochs.txt"; * NetworkTrainer.ValidationEpochSavePath = trainingSavePath + network.Name + "_validationEpochs.txt"; * NetworkTrainer.NetworkOutputFilePath = "../../../../Results/Networks/"; * * NetworkTrainer.MomentumMultiplier = 0.9; * NetworkTrainer.WeightDecayCoeff = 0.0; * NetworkTrainer.MaxTrainingEpochs = 1; * NetworkTrainer.EpochsBeforeRegularization = 0; * NetworkTrainer.MiniBatchSize = 64; * NetworkTrainer.ConsoleOutputLag = 1; // 1 = print every epoch, N = print every N epochs * NetworkTrainer.EvaluateBeforeTraining = true; * NetworkTrainer.DropoutFullyConnected = 1.0; * NetworkTrainer.DropoutConvolutional = 1.0; * NetworkTrainer.Patience = 20; * * NetworkTrainer.LearningRate = eta[iEta]; * NetworkTrainer.Train(network, trainingSet, null); * } * */ // RESNET_ELU ____________________________________________________________________________________________________ /* * for (int iEta = 0; iEta < eta.Length; iEta++) * { * Console.WriteLine("\n\n\n New learning rate = {0}", eta[iEta]); * * * * Console.WriteLine("\n========================================="); * Console.WriteLine(" Neural network creation"); * Console.WriteLine("=========================================\n"); * * // OPTION 1: Create a new network * * * NeuralNetwork network = new NeuralNetwork("EtaTest_ResNet_ReLU"); * * network.AddLayer(new InputLayer(1, 32, 32)); * * network.AddLayer(new ConvolutionalLayer(3, 32, 1, 1)); * network.AddLayer(new ELU(1.0f)); * * network.AddLayer(new ResidualModule(3, 32, 1, 1, "ELU")); * network.AddLayer(new ELU(1.0f)); * * network.AddLayer(new ConvolutionalLayer(3, 64, 2, 1)); // downsampling * * network.AddLayer(new ResidualModule(3, 64, 1, 1, "ELU")); * network.AddLayer(new ELU(1.0f)); * * network.AddLayer(new ConvolutionalLayer(3, 128, 2, 1)); // downsampling * * network.AddLayer(new ResidualModule(3, 128, 1, 1, "ELU")); * network.AddLayer(new ELU(1.0f)); * * network.AddLayer(new AveragePooling()); * * network.AddLayer(new FullyConnectedLayer(43)); * network.AddLayer(new SoftMax()); * * NetworkTrainer.TrainingMode = "new"; * * /***************************************************** * (3) Gradient check ****************\********************************* * GradientChecker.Check(network, validationSet); * * * * * * /***************************************************** * (4) Train network ***************************************************** * * * Console.WriteLine("\n========================================="); * Console.WriteLine(" Network training"); * Console.WriteLine("=========================================\n"); * * // Set output files save paths * string trainingSavePath = "../../../../Results/LossError/"; * NetworkTrainer.TrainingEpochSavePath = trainingSavePath + network.Name + "_trainingEpochs.txt"; * NetworkTrainer.ValidationEpochSavePath = trainingSavePath + network.Name + "_validationEpochs.txt"; * NetworkTrainer.NetworkOutputFilePath = "../../../../Results/Networks/"; * * NetworkTrainer.MomentumMultiplier = 0.9; * NetworkTrainer.WeightDecayCoeff = 0.0; * NetworkTrainer.MaxTrainingEpochs = 1; * NetworkTrainer.EpochsBeforeRegularization = 0; * NetworkTrainer.MiniBatchSize = 64; * NetworkTrainer.ConsoleOutputLag = 1; // 1 = print every epoch, N = print every N epochs * NetworkTrainer.EvaluateBeforeTraining = true; * NetworkTrainer.DropoutFullyConnected = 1.0; * NetworkTrainer.DropoutConvolutional = 1.0; * NetworkTrainer.Patience = 20; * * NetworkTrainer.LearningRate = eta[iEta]; * NetworkTrainer.Train(network, trainingSet, null); * } * * * * * /***************************************************** * (5) Test network ***************************************************** * Console.WriteLine("\nFINAL EVALUATION:"); * * * // Load best network from file * NeuralNetwork bestNetwork = Utils.LoadNetworkFromFile("../../../../Results/Networks/", network.Name); * bestNetwork.Set("MiniBatchSize", 64); // this SHOULDN'T matter! * bestNetwork.InitializeParameters("load"); * bestNetwork.Set("Inference", true); * * double loss; * double error; * * // Pre-inference pass: Computes cumulative averages in BatchNorm layers (needed for evaluation) * //bestNetwork.Set("PreInference", true); * //networkEvaluator.PreEvaluateNetwork(bestNetwork, testSet); * * * * //networkEvaluator.EvaluateNetwork(bestNetwork, trainingSet, out loss, out error); * //Console.WriteLine("\nTraining set:\n\tLoss = {0}\n\tError = {1}", loss, error); * * NetworkEvaluator.EvaluateNetwork(bestNetwork, validationSet, out loss, out error); * Console.WriteLine("\nValidation set:\n\tLoss = {0}\n\tError = {1}", loss, error); * * NetworkEvaluator.EvaluateNetwork(bestNetwork, testSet, out loss, out error); * Console.WriteLine("\nTest set:\n\tLoss = {0}\n\tError = {1}", loss, error); * * // Save misclassified examples * //NetworkEvaluator.SaveMisclassifiedExamples(bestNetwork, trainingSet, "../../../../Results/MisclassifiedExamples/" + network.Name + "_training.txt"); * //NetworkEvaluator.SaveMisclassifiedExamples(bestNetwork, validationSet, "../../../../Results/MisclassifiedExamples/" + network.Name + "_validation.txt"); * //NetworkEvaluator.SaveMisclassifiedExamples(bestNetwork, testSet, "../../../../Results/MisclassifiedExamples/" + network.Name + "_test.txt"); * * // Save filters of first conv layer * if (bestNetwork.Layers[1].Type == "Convolutional") * Utils.SaveFilters(bestNetwork, "../../../../Results/Filters/" + network.Name + "_filters.txt"); * * /*****************************************************/ } }
public override void FeedForward() { #if TIMING_LAYERS Utils.BNFCForwardTimer.Start(); #endif if (isEpochBeginning) { iCumulativeAverage = 0; // Wipe cumulative means and variances (theoretically, this is redundant) OpenCLSpace.WipeBuffer(cumulativeMeanGPU, nInputUnits, typeof(float)); OpenCLSpace.WipeBuffer(cumulativeVarianceGPU, nInputUnits, typeof(float)); isEpochBeginning = false; } // If training, compute means and variances, and update cumulative averages if (isTraining || isPreInference) { OpenCLSpace.ClError = Cl.SetKernelArg(OpenCLSpace.BNFCComputeMeansVariances, 0, meanGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNFCComputeMeansVariances, 1, varianceGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNFCComputeMeansVariances, 2, cumulativeMeanGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNFCComputeMeansVariances, 3, cumulativeVarianceGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNFCComputeMeansVariances, 4, inputNeurons.ActivationsGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNFCComputeMeansVariances, 5, (IntPtr)sizeof(int), nInputUnits); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNFCComputeMeansVariances, 6, (IntPtr)sizeof(int), inputNeurons.MiniBatchSize); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNFCComputeMeansVariances, 7, (IntPtr)sizeof(int), Convert.ToInt32(isPreInference)); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNFCComputeMeansVariances, 8, (IntPtr)sizeof(int), iCumulativeAverage); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.SetKernelArg"); OpenCLSpace.ClError = Cl.EnqueueNDRangeKernel(OpenCLSpace.Queue, OpenCLSpace.BNFCComputeMeansVariances, 1, null, nUnitsGlobalWorkSizePtr, optimalLocalWorkSizePtr, 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.EnqueueNDRangeKernel"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); if (isPreInference) { iCumulativeAverage++; // increase cumulative average counter } } /* ------------------------- DEBUGGING --------------------------------------------- * * Console.WriteLine("\nPRE-INFERENCE MINI-BATCH {0}\n", iCumulativeAverage); * // Display cum means * * float[] cumMeans = new float[nInputUnits]; * * OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue, * cumulativeMeanGPU, // source * Bool.True, * (IntPtr)0, * (IntPtr)(nInputUnits * sizeof(float)), * cumMeans, // destination * 0, * null, * out OpenCLSpace.ClEvent); * OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.clEnqueueReadBuffer"); * * Console.WriteLine("\nCumulative means:\n"); * for (int i = 0; i < nInputUnits; i++) * Console.Write("{0} ", cumMeans[i]); * //Console.ReadKey(); * * // Display cum var * float[] cumVar = new float[nInputUnits]; * * OpenCLSpace.ClError = Cl.EnqueueReadBuffer(OpenCLSpace.Queue, * cumulativeVarianceGPU, // source * Bool.True, * (IntPtr)0, * (IntPtr)(nInputUnits * sizeof(float)), * cumVar, // destination * 0, * null, * out OpenCLSpace.ClEvent); * OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.clEnqueueReadBuffer"); * * Console.WriteLine("\n\nCumulative variance:\n"); * for (int i = 0; i < nInputUnits; i++) * Console.Write("{0} ", cumVar[i]); * Console.ReadKey(); * * * /* ------------------------- END DEBUGGING --------------------------------------------- */ //Normalize input, scale and shift OpenCLSpace.ClError = Cl.SetKernelArg(OpenCLSpace.BNFCForward, 0, outputNeurons.ActivationsGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNFCForward, 1, normalizedInputGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNFCForward, 2, inputNeurons.ActivationsGPU); if (isTraining) { OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNFCForward, 3, meanGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNFCForward, 4, varianceGPU); } else if (isPreInference || isInference) { OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNFCForward, 3, cumulativeMeanGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNFCForward, 4, cumulativeVarianceGPU); } else { throw new InvalidOperationException("ERROR: BatchNormConv is currently not in training mode, nor pre-inference, nor inference."); } OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNFCForward, 5, gammaGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNFCForward, 6, betaGPU); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNFCForward, 7, (IntPtr)sizeof(int), nInputUnits); OpenCLSpace.ClError |= Cl.SetKernelArg(OpenCLSpace.BNFCForward, 8, (IntPtr)sizeof(int), inputNeurons.MiniBatchSize); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.SetKernelArg"); OpenCLSpace.ClError = Cl.EnqueueNDRangeKernel(OpenCLSpace.Queue, OpenCLSpace.BNFCForward, 1, null, nActivationsGlobalWorkSizePtr, optimalLocalWorkSizePtr, 0, null, out OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.EnqueueNDRangeKernel"); OpenCLSpace.ClError = Cl.ReleaseEvent(OpenCLSpace.ClEvent); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.ReleaseEvent"); OpenCLSpace.ClError = Cl.Finish(OpenCLSpace.Queue); OpenCLSpace.CheckErr(OpenCLSpace.ClError, "Cl.Finish"); #if TIMING_LAYERS Utils.BNFCForwardTimer.Stop(); #endif }