public static void SingleOutputBackward(NdArray <Real> y, NdArray <Real> x, NdArray <Real> weight, NdArray <Real> bias, ComputeKernel backwardgWKernel, ComputeKernel backwardgXKernel, ICompressibleActivation <Real> activation) { int outputCount = weight.Shape[0]; int inputCount = weight.Shape[1]; Real[] gx = new Real[x.Data.Length]; Real[] activatedgy = activation != null?activation.GetActivatedgy(y, x) : y.Grad; if (bias != null) { LinearFunc.CalcBiasGrad(activatedgy, y.BatchCount, outputCount, bias.Grad); } using (ComputeBuffer <Real> gpugY = new ComputeBuffer <Real>(OpenCL.Context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, activatedgy)) { using (ComputeBuffer <Real> gpugW = new ComputeBuffer <Real>(OpenCL.Context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.UseHostPointer, weight.Grad)) using (ComputeBuffer <Real> gpuX = new ComputeBuffer <Real>(OpenCL.Context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, x.Data)) { backwardgWKernel.SetMemoryArgument(0, gpugY); backwardgWKernel.SetMemoryArgument(1, gpuX); backwardgWKernel.SetMemoryArgument(2, gpugW); backwardgWKernel.SetValueArgument(3, y.BatchCount); backwardgWKernel.SetValueArgument(4, outputCount); backwardgWKernel.SetValueArgument(5, inputCount); OpenCL.CommandQueue.Execute ( backwardgWKernel, null, new long[] { inputCount, outputCount }, null, null ); OpenCL.CommandQueue.Finish(); OpenCL.CommandQueue.ReadFromBuffer(gpugW, ref weight.Grad, true, null); } using (ComputeBuffer <Real> gpugX = new ComputeBuffer <Real>(OpenCL.Context, ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.AllocateHostPointer, gx.Length)) using (ComputeBuffer <Real> gpuW = new ComputeBuffer <Real>(OpenCL.Context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, weight.Data)) { backwardgXKernel.SetMemoryArgument(0, gpugY); backwardgXKernel.SetMemoryArgument(1, gpuW); backwardgXKernel.SetMemoryArgument(2, gpugX); backwardgXKernel.SetValueArgument(3, y.BatchCount); backwardgXKernel.SetValueArgument(4, outputCount); backwardgXKernel.SetValueArgument(5, inputCount); OpenCL.CommandQueue.Execute ( backwardgXKernel, null, new long[] { inputCount, y.BatchCount }, null, null ); OpenCL.CommandQueue.Finish(); OpenCL.CommandQueue.ReadFromBuffer(gpugX, ref gx, true, null); } } for (int i = 0; i < x.Grad.Length; i++) { x.Grad[i] += gx[i]; } }