public override unsafe float[] CalculateLayer(float[,] weightMx, float[] bias, float[] prevActivations, IActivationFunction sigmoidFunction)
        {
            int matrixRows = weightMx.GetLength(0);

            float[] output       = new float[matrixRows];
            int[]   configParams = new int[] { /*rows: */ weightMx.GetLength(0), /*cols: */ weightMx.GetLength(1), /*ApplySigmoid*/ sigmoidFunction.GetOpenCLFunctionId() };

            fixed(int *configPtr = configParams)
            {
                fixed(float *weightArrayPtr = weightMx, biasPtr = bias, prevActivationPtr = prevActivations)
                {
                    MemoryAllocation mem_param_weightMx, mem_param_bias, mem_param_prevActivation, mem_param_config, mem_param_output;

                    mem_param_weightMx       = computeFramework.GetMemoryFor(weightMx.Length * 4, MemoryFlag.ReadOnly | MemoryFlag.CopyHostPointer, new IntPtr(weightArrayPtr));
                    mem_param_bias           = computeFramework.GetMemoryFor(bias.Length * 4, MemoryFlag.ReadOnly | MemoryFlag.CopyHostPointer, new IntPtr(biasPtr));
                    mem_param_prevActivation = computeFramework.GetMemoryFor(prevActivations.Length * 4, MemoryFlag.ReadOnly | MemoryFlag.CopyHostPointer, new IntPtr(prevActivationPtr));
                    mem_param_config         = computeFramework.GetMemoryFor(configParams.Length * 4, MemoryFlag.ReadOnly | MemoryFlag.CopyHostPointer, new IntPtr(configPtr));
                    mem_param_output         = computeFramework.GetMemoryFor(matrixRows * 4, MemoryFlag.WriteOnly, IntPtr.Zero);

                    computeFramework.SetKernelArg(calcLayerKernel, 0, mem_param_weightMx);
                    computeFramework.SetKernelArg(calcLayerKernel, 1, mem_param_bias);
                    computeFramework.SetKernelArg(calcLayerKernel, 2, mem_param_prevActivation);
                    computeFramework.SetKernelArg(calcLayerKernel, 3, mem_param_config);
                    computeFramework.SetKernelArg(calcLayerKernel, 4, mem_param_output);

                    int localWorkgroupSize = 32;
                    int globalWorkSize     = ExtendGlobalWorkSize(matrixRows, localWorkgroupSize);

                    computeFramework.EnqueueKernel(calcLayerKernel, new IntPtr[] { new IntPtr(globalWorkSize) }, new IntPtr[] { new IntPtr(localWorkgroupSize) });


                    fixed(float *outputPtr = output)
                    {
                        computeFramework.ReadBuffer(mem_param_output, true, UIntPtr.Zero, new UIntPtr((uint)matrixRows * 4U), new IntPtr(outputPtr));
                    }
                }
            }

            computeFramework.UnuseMemoryAllocations();

            return(output);
        }
        private unsafe float[] EvaluateNetwork(float[] input, Network network, ref List <float[]> z_values)
        {
            bool z_values_mode = z_values != null;

            int largest_layer_size     = input.Length;
            int largest_weight_mx_size = 0;
            int largest_bias_size      = 0;

            foreach (var layer in network.layers)
            {
                largest_layer_size     = Math.Max(layer.GetNeuronCount(), largest_layer_size);
                largest_weight_mx_size = Math.Max(layer.weightMx.Length, largest_weight_mx_size);
                largest_bias_size      = Math.Max(layer.biases.Length, largest_bias_size);
            }

            float[]          output             = new float[network.layers.Last().GetNeuronCount()];
            int[]            configParams       = new int[3];
            MemoryAllocation mem_param_config   = computeFramework.GetMemoryFor(configParams.Length * 4, MemoryFlag.ReadOnly | MemoryFlag.CopyHostPointer, IntPtr.Zero);
            MemoryAllocation mem_param_weightMx = computeFramework.GetMemoryFor(largest_weight_mx_size * 4, MemoryFlag.ReadOnly | MemoryFlag.CopyHostPointer, IntPtr.Zero);
            MemoryAllocation mem_param_bias     = computeFramework.GetMemoryFor(largest_bias_size * 4, MemoryFlag.ReadOnly | MemoryFlag.CopyHostPointer, IntPtr.Zero);

            fixed(float *input_ptr = input)
            {
                fixed(int *configPtr = configParams)
                {
                    MemoryAllocation mem_param_input  = computeFramework.GetMemoryFor(largest_layer_size * 4, MemoryFlag.ReadWrite | MemoryFlag.CopyHostPointer, new IntPtr(input_ptr), input.Length * 4);
                    MemoryAllocation mem_param_output = computeFramework.GetMemoryFor(largest_layer_size * 4, MemoryFlag.ReadWrite, IntPtr.Zero);

                    float[] layer_args = input;
                    for (int i = 0; i < network.layers.Count; ++i)
                    {
                        var layer      = network.layers[i];
                        int matrixRows = layer.weightMx.GetLength(0);
                        IActivationFunction activation = z_values_mode ? new PasstroughActivation() : layer.activationFunction;

                        fixed(float *weightArrayPtr = layer.weightMx, biasPtr = layer.biases, prevActivationPtr = layer_args)
                        {
                            computeFramework.UploadToMemory(mem_param_weightMx, 0, layer.weightMx.Length * 4, new IntPtr(weightArrayPtr), false);
                            computeFramework.UploadToMemory(mem_param_bias, 0, layer.biases.Length * 4, new IntPtr(biasPtr), false);

                            configParams[0] = layer.weightMx.GetLength(0); //rows
                            configParams[1] = layer.weightMx.GetLength(1); //columns
                            configParams[2] = activation.GetOpenCLFunctionId();
                            computeFramework.UploadToMemory(mem_param_config, 0, configParams.Length * 3, new IntPtr(configPtr), false);

                            computeFramework.SetKernelArg(calcLayerKernel, 0, mem_param_weightMx);
                            computeFramework.SetKernelArg(calcLayerKernel, 1, mem_param_bias);
                            computeFramework.SetKernelArg(calcLayerKernel, 2, mem_param_config);
                            computeFramework.SetKernelArg(calcLayerKernel, 3, mem_param_input);
                            computeFramework.SetKernelArg(calcLayerKernel, 4, mem_param_output);

                            int localWorkgroupSize = 32;
                            int globalWorkSize     = ExtendGlobalWorkSize(matrixRows, localWorkgroupSize);

                            computeFramework.EnqueueKernel(calcLayerKernel, new IntPtr[] { new IntPtr(globalWorkSize) }, new IntPtr[] { new IntPtr(localWorkgroupSize) });

                            if (z_values_mode || i == network.layers.Count - 1)
                            {
                                float[] target_array = z_values_mode ? new float[matrixRows] : output;
                                fixed(float *target_ptr = target_array)
                                {
                                    computeFramework.ReadBuffer(mem_param_output, true, UIntPtr.Zero, new UIntPtr((uint)matrixRows * 4U), new IntPtr(target_ptr));
                                    if (z_values_mode)
                                    {
                                        z_values.Add(target_array);
                                    }
                                }
                            }

                            MemoryAllocation tmp = mem_param_input;

                            mem_param_input  = mem_param_output;
                            mem_param_output = tmp;
                            computeFramework.FlushCommandBuffer();
                        }
                    }
                }
            }

            computeFramework.UnuseMemoryAllocations();

            return(output);
        }