コード例 #1
0
        public override unsafe float[] CalculateLayer(float[,] weightMx, float[] bias, float[] prevActivations, IActivationFunction sigmoidFunction)
        {
            int matrixRows = weightMx.GetLength(0);

            float[] output       = new float[matrixRows];
            int[]   configParams = new int[] { /*rows: */ weightMx.GetLength(0), /*cols: */ weightMx.GetLength(1), /*ApplySigmoid*/ sigmoidFunction.GetOpenCLFunctionId() };

            fixed(int *configPtr = configParams)
            {
                fixed(float *weightArrayPtr = weightMx, biasPtr = bias, prevActivationPtr = prevActivations)
                {
                    MemoryAllocation mem_param_weightMx, mem_param_bias, mem_param_prevActivation, mem_param_config, mem_param_output;

                    mem_param_weightMx       = computeFramework.GetMemoryFor(weightMx.Length * 4, MemoryFlag.ReadOnly | MemoryFlag.CopyHostPointer, new IntPtr(weightArrayPtr));
                    mem_param_bias           = computeFramework.GetMemoryFor(bias.Length * 4, MemoryFlag.ReadOnly | MemoryFlag.CopyHostPointer, new IntPtr(biasPtr));
                    mem_param_prevActivation = computeFramework.GetMemoryFor(prevActivations.Length * 4, MemoryFlag.ReadOnly | MemoryFlag.CopyHostPointer, new IntPtr(prevActivationPtr));
                    mem_param_config         = computeFramework.GetMemoryFor(configParams.Length * 4, MemoryFlag.ReadOnly | MemoryFlag.CopyHostPointer, new IntPtr(configPtr));
                    mem_param_output         = computeFramework.GetMemoryFor(matrixRows * 4, MemoryFlag.WriteOnly, IntPtr.Zero);

                    computeFramework.SetKernelArg(calcLayerKernel, 0, mem_param_weightMx);
                    computeFramework.SetKernelArg(calcLayerKernel, 1, mem_param_bias);
                    computeFramework.SetKernelArg(calcLayerKernel, 2, mem_param_prevActivation);
                    computeFramework.SetKernelArg(calcLayerKernel, 3, mem_param_config);
                    computeFramework.SetKernelArg(calcLayerKernel, 4, mem_param_output);

                    int localWorkgroupSize = 32;
                    int globalWorkSize     = ExtendGlobalWorkSize(matrixRows, localWorkgroupSize);

                    computeFramework.EnqueueKernel(calcLayerKernel, new IntPtr[] { new IntPtr(globalWorkSize) }, new IntPtr[] { new IntPtr(localWorkgroupSize) });


                    fixed(float *outputPtr = output)
                    {
                        computeFramework.ReadBuffer(mem_param_output, true, UIntPtr.Zero, new UIntPtr((uint)matrixRows * 4U), new IntPtr(outputPtr));
                    }
                }
            }

            computeFramework.UnuseMemoryAllocations();

            return(output);
        }
コード例 #2
0
        public override unsafe List <List <NeuronData> > CalculateAccumulatedGradientForMinibatch(Network network, TrainingSuite suite, int trainingDataBegin, int trainingDataEnd)
        {
            int trainingSamples = trainingDataEnd - trainingDataBegin;
            var ret             = Utils.CreateGradientVector(network);

            int[] networkConfigParams     = null;
            int   totalWeightAndBiasCount = 0;
            int   delta_k_vectorSize      = 0;
            int   totalActivationCount    = 0; //Add
            int   inputActivationCount    = network.layers.First().GetWeightsPerNeuron();

            {
                foreach (var item in network.layers)
                {
                    totalActivationCount += item.GetNeuronCount();
                }

                List <int> networkConfigParamsList = new List <int>();
                //0
                networkConfigParamsList.Add(0);                                                          //layer index to be processed
                //1
                networkConfigParamsList.Add(network.layers[0].activationFunction.GetOpenCLFunctionId()); //Activation function
                //2
                networkConfigParamsList.Add(network.layers.Count);                                       //Layer count
                //3
                networkConfigParamsList.Add(trainingSamples);                                            //numTrainingSamples
                //4
                networkConfigParamsList.Add(suite.config.costFunction.GetOpenCLFunctionID());            //Cost function
                //5
                networkConfigParamsList.Add(totalActivationCount);                                       //totalActivationCount
                //6
                networkConfigParamsList.Add(0);                                                          //totalWeightsAndBiases
                //7
                networkConfigParamsList.Add(0);                                                          //widestLayerNeuronCount
                //8
                networkConfigParamsList.Add(network.layers.First().GetWeightsPerNeuron());               //Input count
                for (int i = 0; i < network.layers.Count; i++)
                {
                    networkConfigParamsList.Add(network.layers[i].GetNeuronCount()); //Layer neuron count
                    totalWeightAndBiasCount += network.layers[i].biases.Length;
                    totalWeightAndBiasCount += network.layers[i].weightMx.Length;
                    if (i > 0) //The first layer will not write the delta_k vector, so it shouldn't contribute to its size.
                    {
                        delta_k_vectorSize = Math.Max(network.layers[i].GetNeuronCount(), delta_k_vectorSize);
                    }
                }

                networkConfigParamsList[6] = totalWeightAndBiasCount;
                networkConfigParamsList[7] = delta_k_vectorSize;

                networkConfigParams = networkConfigParamsList.ToArray();
            }

            float[] desiredOutputs   = new float[network.layers.Last().GetNeuronCount() * trainingSamples];
            float[] outputGradient   = new float[totalWeightAndBiasCount];//Memory layout is: [weights, biases for trainingsample0, layer0-N][weights, biases for trainingsample1, layer0-N] ...
            float[] inputParameters  = new float[trainingSamples * inputActivationCount];
            float[] weightsAndBiases = new float[totalWeightAndBiasCount];

            fixed(int *networkConfigParamsPtr = networkConfigParams)
            {
                fixed(float *outputGradientPtr = outputGradient, desiredOutputsPtr = desiredOutputs, inputParametersPtr = inputParameters, weightsAndBiasesPtr = weightsAndBiases)
                {
                    MemoryAllocation mem_NetworkConfigParams = computeFramework.GetMemoryFor(networkConfigParams.Length * 4, MemoryFlag.ReadOnly | MemoryFlag.CopyHostPointer, new IntPtr(networkConfigParamsPtr));

                    for (int i = 0; i < trainingSamples; ++i)
                    {
                        Buffer.BlockCopy(suite.trainingData[trainingDataBegin + i].input, 0, inputParameters, i * inputActivationCount * 4, inputActivationCount * 4);
                    }
                    MemoryAllocation mem_InputActivations = computeFramework.GetMemoryFor(inputParameters.Length * 4, MemoryFlag.ReadOnly | MemoryFlag.CopyHostPointer, new IntPtr(inputParametersPtr));

                    ///Contains the whole network's activation values, and Z values for each training sample
                    ///Memory layout for one layer is like this: [...input values...][...first layer's activations...][...second layer's activations]...[last layer's activations][first layer's z values][second layer's zvalues]...[last layer's z values]
                    ///After that, the next layer's same values are there
                    MemoryAllocation mem_activationsAndZValues = computeFramework.GetMemoryFor(totalActivationCount * trainingSamples * 2 * 4, MemoryFlag.ReadWrite, IntPtr.Zero);

                    {
                        int offset = 0;
                        foreach (var layer in network.layers)
                        {
                            Buffer.BlockCopy(layer.weightMx, 0, weightsAndBiases, offset, layer.weightMx.Length * 4);
                            offset += layer.weightMx.Length * 4;
                            Buffer.BlockCopy(layer.biases, 0, weightsAndBiases, offset, layer.biases.Length * 4);
                            offset += layer.biases.Length * 4;
                        }
                    }
                    MemoryAllocation mem_weightsAndBiases = computeFramework.GetMemoryFor(weightsAndBiases.Length * 4, MemoryFlag.ReadOnly | MemoryFlag.CopyHostPointer, new IntPtr(weightsAndBiasesPtr));

                    //delta_k_vector is double buffered (hence the * 2). In a pass, the previous delta_k values are read, and the next ones are written
                    //Memory layout is: [delta_k_vector buffer1 of trainingSample0][delta_k_vector buffer2 of trainingSample0] [delta_k_vector buffer1 of trainingSample1][delta_k_vector buffer2 of trainingSample1] ...
                    MemoryAllocation mem_delta_k_vector = computeFramework.GetMemoryFor(Math.Max(1, delta_k_vectorSize * trainingSamples * 2 * 4), MemoryFlag.ReadWrite, IntPtr.Zero);

                    computeFramework.SetKernelArg(forwardPass, 0, mem_NetworkConfigParams);
                    computeFramework.SetKernelArg(forwardPass, 1, mem_activationsAndZValues);
                    computeFramework.SetKernelArg(forwardPass, 2, mem_InputActivations);
                    computeFramework.SetKernelArg(forwardPass, 3, mem_weightsAndBiases);

                    int[] dynamic_kernel_arguments = new int[network.layers.Count * 2];

                    for (int i = 0; i < network.layers.Count; ++i)
                    {
                        dynamic_kernel_arguments[i * 2]     = i;                                                          //Layer index
                        dynamic_kernel_arguments[i * 2 + 1] = network.layers[i].activationFunction.GetOpenCLFunctionId(); //Layer's activation function
                    }
                    int num_dynamic_kernel_args = dynamic_kernel_arguments.Length / network.layers.Count;

                    var localWorkGroupSize = new IntPtr[] { new IntPtr(deviceConfig.idealWorkgroupSizeX), new IntPtr(deviceConfig.idealWorkgroupSizeY) };
                    var globalWorkSize     = new IntPtr[] { new IntPtr(0), new IntPtr(ExtendGlobalWorkSize(trainingSamples, localWorkGroupSize[1].ToInt32())) };

                    #region Forward pass
                    for (int i = 0; i < network.layers.Count; ++i)
                    {
                        if (i > 0)
                        {
                            computeFramework.UploadToMemory(mem_NetworkConfigParams, 0, i * num_dynamic_kernel_args, dynamic_kernel_arguments, false, num_dynamic_kernel_args);
                        }

                        globalWorkSize[0] = new IntPtr(ExtendGlobalWorkSize(network.layers[i].GetNeuronCount(), localWorkGroupSize[0].ToInt32()));
                        computeFramework.EnqueueKernel(forwardPass, globalWorkSize, localWorkGroupSize);
                        // todo: run forward pass
                    }
                    #endregion

                    #region backward pass

                    int desiredOutputByteSizePerTrainigSample = network.layers.Last().GetNeuronCount() * 4;
                    for (int i = 0; i < trainingSamples; ++i)
                    {
                        Buffer.BlockCopy(suite.trainingData[trainingDataBegin + i].desiredOutput, 0, desiredOutputs, i * desiredOutputByteSizePerTrainigSample, desiredOutputByteSizePerTrainigSample);
                    }
                    var mem_desired_outputs = computeFramework.GetMemoryFor(desiredOutputs.Length * 4, MemoryFlag.ReadOnly | MemoryFlag.CopyHostPointer, new IntPtr(desiredOutputsPtr));

                    var mem_param_gradient = computeFramework.GetMemoryFor(outputGradient.Length * 4, MemoryFlag.ReadWrite | MemoryFlag.CopyHostPointer, new IntPtr(outputGradientPtr));

                    computeFramework.SetKernelArg(backwardPassKernel, 0, mem_NetworkConfigParams);
                    computeFramework.SetKernelArg(backwardPassKernel, 1, mem_activationsAndZValues);
                    computeFramework.SetKernelArg(backwardPassKernel, 2, mem_delta_k_vector);
                    computeFramework.SetKernelArg(backwardPassKernel, 3, mem_param_gradient);
                    computeFramework.SetKernelArg(backwardPassKernel, 4, mem_desired_outputs);
                    computeFramework.SetKernelArg(backwardPassKernel, 5, mem_InputActivations);
                    computeFramework.SetKernelArg(backwardPassKernel, 6, mem_weightsAndBiases);

                    //Run backward pass for all hidden layers
                    for (int i = network.layers.Count - 1; i >= 0; --i)
                    {
                        globalWorkSize[0] = new IntPtr(ExtendGlobalWorkSize(network.layers[i].GetNeuronCount(), localWorkGroupSize[0].ToInt32()));
                        if (i != network.layers.Count - 1)
                        {
                            computeFramework.UploadToMemory(mem_NetworkConfigParams, 0, i * num_dynamic_kernel_args, dynamic_kernel_arguments, false, num_dynamic_kernel_args);
                        }
                        computeFramework.EnqueueKernel(backwardPassKernel, globalWorkSize, localWorkGroupSize);
                    }
                    #endregion

                    computeFramework.FlushCommandBuffer();

                    computeFramework.ReadBuffer(mem_param_gradient, true, new UIntPtr(0), new UIntPtr(mem_param_gradient.bufferSizeInBytes), new IntPtr(outputGradientPtr));
                }
            }

            computeFramework.UnuseMemoryAllocations();

            int gradIdx = 0;
            foreach (var layer in ret)
            {
                foreach (var neuron in layer)
                {
                    Buffer.BlockCopy(outputGradient, gradIdx * 4, neuron.weights, 0, neuron.weights.Length * 4);
                    gradIdx    += neuron.weights.Length;
                    neuron.bias = outputGradient[gradIdx];
                    ++gradIdx;
                }
            }

            return(ret);
        }