Exemple #1
0
        public static void SingleOutputBackward(NdArray <Real> y, NdArray <Real> x, NdArray <Real> weight, NdArray <Real> bias, ComputeKernel backwardgWKernel, ComputeKernel backwardgXKernel, ICompressibleActivation <Real> activation)
        {
            int outputCount = weight.Shape[0];
            int inputCount  = weight.Shape[1];

            Real[] gx          = new Real[x.Data.Length];
            Real[] activatedgy = activation != null?activation.GetActivatedgy(y, x) : y.Grad;

            if (bias != null)
            {
                LinearFunc.CalcBiasGrad(activatedgy, y.BatchCount, outputCount, bias.Grad);
            }

            using (ComputeBuffer <Real> gpugY = new ComputeBuffer <Real>(OpenCL.Context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, activatedgy))
            {
                using (ComputeBuffer <Real> gpugW = new ComputeBuffer <Real>(OpenCL.Context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.UseHostPointer, weight.Grad))
                    using (ComputeBuffer <Real> gpuX = new ComputeBuffer <Real>(OpenCL.Context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, x.Data))
                    {
                        backwardgWKernel.SetMemoryArgument(0, gpugY);
                        backwardgWKernel.SetMemoryArgument(1, gpuX);
                        backwardgWKernel.SetMemoryArgument(2, gpugW);
                        backwardgWKernel.SetValueArgument(3, y.BatchCount);
                        backwardgWKernel.SetValueArgument(4, outputCount);
                        backwardgWKernel.SetValueArgument(5, inputCount);

                        OpenCL.CommandQueue.Execute
                        (
                            backwardgWKernel,
                            null,
                            new long[] { inputCount, outputCount },
                            null,
                            null
                        );

                        OpenCL.CommandQueue.Finish();
                        OpenCL.CommandQueue.ReadFromBuffer(gpugW, ref weight.Grad, true, null);
                    }

                using (ComputeBuffer <Real> gpugX = new ComputeBuffer <Real>(OpenCL.Context, ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.AllocateHostPointer, gx.Length))
                    using (ComputeBuffer <Real> gpuW = new ComputeBuffer <Real>(OpenCL.Context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, weight.Data))
                    {
                        backwardgXKernel.SetMemoryArgument(0, gpugY);
                        backwardgXKernel.SetMemoryArgument(1, gpuW);
                        backwardgXKernel.SetMemoryArgument(2, gpugX);
                        backwardgXKernel.SetValueArgument(3, y.BatchCount);
                        backwardgXKernel.SetValueArgument(4, outputCount);
                        backwardgXKernel.SetValueArgument(5, inputCount);

                        OpenCL.CommandQueue.Execute
                        (
                            backwardgXKernel,
                            null,
                            new long[] { inputCount, y.BatchCount },
                            null,
                            null
                        );

                        OpenCL.CommandQueue.Finish();
                        OpenCL.CommandQueue.ReadFromBuffer(gpugX, ref gx, true, null);
                    }
            }

            for (int i = 0; i < x.Grad.Length; i++)
            {
                x.Grad[i] += gx[i];
            }
        }