public static void SingleOutputBackward(NdArray <Real> y, NdArray <Real> x, NdArray <Real> weight, NdArray <Real> bias, ICompressibleActivation <Real> activation) { int outputCount = weight.Shape[0]; int inputCount = weight.Shape[1]; Real[] activatedgy = activation != null?activation.GetActivatedgy(y, x) : y.Grad; if (bias != null) { CalcBiasGrad(activatedgy, y.BatchCount, outputCount, bias.Grad); } for (int batchCount = 0; batchCount < y.BatchCount; batchCount++) { for (int i = 0; i < outputCount; i++) { Real gyData = activatedgy[i + batchCount * outputCount]; for (int j = 0; j < inputCount; j++) { weight.Grad[i * inputCount + j] += x.Data[batchCount * inputCount + j] * gyData; x.Grad[batchCount * inputCount + j] += weight.Data[i * inputCount + j] * gyData; } } } }
public static void SingleOutputBackward(NdArray <Real> y, NdArray <Real> x, NdArray <Real> weight, NdArray <Real> bias, int strideX, int strideY, int padX, int padY, ICompressibleActivation <Real> activation) { //int outputCount = weight.Shape[0]; int inputCount = weight.Shape[1]; int kernelHeight = weight.Shape[2]; int kernelWidth = weight.Shape[3]; Real[] activatedgy = activation != null?activation.GetActivatedgy(y, x): y.Grad; if (bias != null) { CalcBiasGrad(activatedgy, y.Shape, y.BatchCount, bias.Grad); } for (int batchCounter = 0; batchCounter < y.BatchCount; batchCounter++) { int yBatchOffset = batchCounter * y.Length; int xBatchOffset = batchCounter * x.Length; for (int och = 0; och < y.Shape[0]; och++) { int wOchOffset = och * inputCount * kernelHeight * kernelWidth; int yChOffset = och * y.Shape[1] * y.Shape[2]; for (int oy = 0; oy < y.Shape[1] * strideY; oy += strideY) { int iyStart = oy - padY < 0 ? 0 : oy - padY; int iyLimit = kernelHeight + oy - padY < x.Shape[1] ? kernelHeight + oy - padY : x.Shape[1]; for (int ox = 0; ox < y.Shape[2] * strideX; ox += strideX) { int ixStart = ox - padX < 0 ? 0 : ox - padX; int ixLimit = kernelWidth + ox - padX < x.Shape[2] ? kernelWidth + ox - padX : x.Shape[2]; int gyIndex = yBatchOffset + yChOffset + oy / strideY * y.Shape[2] + ox / strideX; for (int ich = 0; ich < x.Shape[0]; ich++) { int wIchOffset = wOchOffset + ich * kernelHeight * kernelWidth; int xChOffset = xBatchOffset + ich * x.Shape[1] * x.Shape[2]; for (int iy = iyStart; iy < iyLimit; iy++) { for (int ix = ixStart; ix < ixLimit; ix++) { int wIndex = wIchOffset + (iy - oy + padY) * kernelWidth + ix - ox + padX; int xIndex = xChOffset + iy * x.Shape[2] + ix; weight.Grad[wIndex] += x.Data[xIndex] * activatedgy[gyIndex]; x.Grad[xIndex] += weight.Data[wIndex] * activatedgy[gyIndex]; } } } } } } } }
public static void SingleOutputBackward(NdArray <Real> y, NdArray <Real> x, NdArray <Real> weight, NdArray <Real> bias, int strideX, int strideY, int padX, int padY, ICompressibleActivation <Real> activation) { int inputCount = weight.Shape[0]; int outputCount = weight.Shape[1]; Real[] activatedgy = activation != null?activation.GetActivatedgy(y, x): y.Grad; if (bias != null) { CalcBiasGrad(activatedgy, bias.Grad, y.Shape, y.BatchCount); } for (int batchCount = 0; batchCount < y.BatchCount; batchCount++) { for (int och = 0; och < outputCount; och++) { int wOchOffset = och * weight.Shape[2] * weight.Shape[3]; int yChOffset = och * y.Shape[1] * y.Shape[2]; for (int oy = padY; oy < y.Shape[1] + padY; oy++) { int iyLimit = oy / strideY + 1 < x.Shape[1] ? oy / strideY + 1 : x.Shape[1]; int iyStart = oy - weight.Shape[2] < 0 ? 0 : (oy - weight.Shape[2]) / strideY + 1; for (int ox = padX; ox < y.Shape[2] + padX; ox++) { int ixLimit = ox / strideX + 1 < x.Shape[2] ? ox / strideX + 1 : x.Shape[2]; int ixStart = ox - weight.Shape[3] < 0 ? 0 : (ox - weight.Shape[3]) / strideX + 1; int gyIndex = batchCount * y.Length + yChOffset + (oy - padY) * y.Shape[2] + ox - padX; for (int ich = 0; ich < inputCount; ich++) { int wIchOffset = ich * weight.Shape[1] * weight.Shape[2] * weight.Shape[3] + wOchOffset; int xChOffset = batchCount * x.Length + ich * x.Shape[1] * x.Shape[2]; for (int iy = iyStart; iy < iyLimit; iy++) { for (int ix = ixStart; ix < ixLimit; ix++) { int xIndex = xChOffset + iy * x.Shape[2] + ix; int wIndex = wIchOffset + (oy - iy * strideY) * weight.Shape[3] + (ox - ix * strideX); weight.Grad[wIndex] += x.Data[xIndex] * activatedgy[gyIndex]; x.Grad[xIndex] += weight.Data[wIndex] * activatedgy[gyIndex]; } } } } } } } }
public static void SingleOutputBackward(NdArray <Real> y, NdArray <Real> x, NdArray <Real> weight, NdArray <Real> bias, int strideX, int strideY, int padX, int padY, ComputeKernel backwardgWKernel, ComputeKernel backwardgXKernel, ICompressibleActivation <Real> activation) { int inputCount = weight.Shape[0]; int outputCount = weight.Shape[1]; int kernelHeight = weight.Shape[2]; int kernelWidth = weight.Shape[3]; Real[] gx = new Real[x.Data.Length]; Real[] activatedgy = activation != null?activation.GetActivatedgy(y, x) : y.Grad; if (bias != null) { Deconvolution2DFunc.CalcBiasGrad(activatedgy, bias.Grad, y.Shape, y.BatchCount); } //gyは共通で使用 using (ComputeBuffer <Real> gpugY = new ComputeBuffer <Real>(OpenCL.Context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, activatedgy)) { using (ComputeBuffer <Real> gpugW = new ComputeBuffer <Real>(OpenCL.Context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.UseHostPointer, weight.Grad)) using (ComputeBuffer <Real> gpuX = new ComputeBuffer <Real>(OpenCL.Context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, x.Data)) { backwardgWKernel.SetMemoryArgument(0, gpugY); backwardgWKernel.SetMemoryArgument(1, gpuX); backwardgWKernel.SetMemoryArgument(2, gpugW); backwardgWKernel.SetValueArgument(3, y.BatchCount); backwardgWKernel.SetValueArgument(4, outputCount); backwardgWKernel.SetValueArgument(5, y.Length); backwardgWKernel.SetValueArgument(6, y.Shape[1]); backwardgWKernel.SetValueArgument(7, y.Shape[2]); backwardgWKernel.SetValueArgument(8, x.Shape[1]); backwardgWKernel.SetValueArgument(9, x.Shape[2]); backwardgWKernel.SetValueArgument(10, x.Length); backwardgWKernel.SetValueArgument(11, strideX); backwardgWKernel.SetValueArgument(12, strideY); backwardgWKernel.SetValueArgument(13, padX); backwardgWKernel.SetValueArgument(14, padY); backwardgWKernel.SetValueArgument(15, kernelHeight); backwardgWKernel.SetValueArgument(16, kernelWidth); OpenCL.CommandQueue.Execute ( backwardgWKernel, null, new long[] { inputCount *outputCount, kernelHeight, kernelWidth }, null, null ); OpenCL.CommandQueue.Finish(); OpenCL.CommandQueue.ReadFromBuffer(gpugW, ref weight.Grad, true, null); } using (ComputeBuffer <Real> gpugX = new ComputeBuffer <Real>(OpenCL.Context, ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.AllocateHostPointer, gx.Length)) using (ComputeBuffer <Real> gpuW = new ComputeBuffer <Real>(OpenCL.Context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, weight.Data)) { backwardgXKernel.SetMemoryArgument(0, gpugY); backwardgXKernel.SetMemoryArgument(1, gpuW); backwardgXKernel.SetMemoryArgument(2, gpugX); backwardgXKernel.SetValueArgument(3, outputCount); backwardgXKernel.SetValueArgument(4, inputCount); backwardgXKernel.SetValueArgument(5, y.Length); backwardgXKernel.SetValueArgument(6, y.Shape[1]); backwardgXKernel.SetValueArgument(7, y.Shape[2]); backwardgXKernel.SetValueArgument(8, x.Shape[1]); backwardgXKernel.SetValueArgument(9, x.Shape[2]); backwardgXKernel.SetValueArgument(10, x.Length); backwardgXKernel.SetValueArgument(11, strideX); backwardgXKernel.SetValueArgument(12, strideY); backwardgXKernel.SetValueArgument(13, padX); backwardgXKernel.SetValueArgument(14, padY); backwardgXKernel.SetValueArgument(15, kernelHeight); backwardgXKernel.SetValueArgument(16, kernelWidth); OpenCL.CommandQueue.Execute ( backwardgXKernel, null, new long[] { x.BatchCount *x.Shape[0], x.Shape[1], x.Shape[2] }, null, null ); OpenCL.CommandQueue.Finish(); OpenCL.CommandQueue.ReadFromBuffer(gpugX, ref gx, true, null); } } for (int i = 0; i < x.Grad.Length; i++) { x.Grad[i] += gx[i]; } }
public static void SingleOutputBackward(NdArray <Real> y, NdArray <Real> x, NdArray <Real> weight, NdArray <Real> bias, ComputeKernel backwardgWKernel, ComputeKernel backwardgXKernel, ICompressibleActivation <Real> activation) { int outputCount = weight.Shape[0]; int inputCount = weight.Shape[1]; Real[] gx = new Real[x.Data.Length]; Real[] activatedgy = activation != null?activation.GetActivatedgy(y, x) : y.Grad; if (bias != null) { LinearFunc.CalcBiasGrad(activatedgy, y.BatchCount, outputCount, bias.Grad); } using (ComputeBuffer <Real> gpugY = new ComputeBuffer <Real>(OpenCL.Context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, activatedgy)) { using (ComputeBuffer <Real> gpugW = new ComputeBuffer <Real>(OpenCL.Context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.UseHostPointer, weight.Grad)) using (ComputeBuffer <Real> gpuX = new ComputeBuffer <Real>(OpenCL.Context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, x.Data)) { backwardgWKernel.SetMemoryArgument(0, gpugY); backwardgWKernel.SetMemoryArgument(1, gpuX); backwardgWKernel.SetMemoryArgument(2, gpugW); backwardgWKernel.SetValueArgument(3, y.BatchCount); backwardgWKernel.SetValueArgument(4, outputCount); backwardgWKernel.SetValueArgument(5, inputCount); OpenCL.CommandQueue.Execute ( backwardgWKernel, null, new long[] { inputCount, outputCount }, null, null ); OpenCL.CommandQueue.Finish(); OpenCL.CommandQueue.ReadFromBuffer(gpugW, ref weight.Grad, true, null); } using (ComputeBuffer <Real> gpugX = new ComputeBuffer <Real>(OpenCL.Context, ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.AllocateHostPointer, gx.Length)) using (ComputeBuffer <Real> gpuW = new ComputeBuffer <Real>(OpenCL.Context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, weight.Data)) { backwardgXKernel.SetMemoryArgument(0, gpugY); backwardgXKernel.SetMemoryArgument(1, gpuW); backwardgXKernel.SetMemoryArgument(2, gpugX); backwardgXKernel.SetValueArgument(3, y.BatchCount); backwardgXKernel.SetValueArgument(4, outputCount); backwardgXKernel.SetValueArgument(5, inputCount); OpenCL.CommandQueue.Execute ( backwardgXKernel, null, new long[] { inputCount, y.BatchCount }, null, null ); OpenCL.CommandQueue.Finish(); OpenCL.CommandQueue.ReadFromBuffer(gpugX, ref gx, true, null); } } for (int i = 0; i < x.Grad.Length; i++) { x.Grad[i] += gx[i]; } }