//////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> Need previous backward GPU. </summary> /// /// <param name="y"> A NdArray to process. </param> /// <param name="x"> A NdArray to process. </param> /// /// <seealso cref="M:KelpNet.Common.Functions.CompressibleFunction.NeedPreviousBackwardGpu(NdArray,NdArray)"/> //////////////////////////////////////////////////////////////////////////////////////////////////// protected override void NeedPreviousBackwardGpu([NotNull] NdArray y, [NotNull] NdArray x) { Real[] gx = new Real[x.Data.Length]; Real[] activatedgy = Activator != null?GetActivatedgy(y) : y.Grad; if (!NoBias) { CalcBiasGrad(activatedgy, y.BatchCount); } using (ComputeBuffer <Real> gpugY = new ComputeBuffer <Real>(Weaver.Context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, activatedgy)) { using (ComputeBuffer <Real> gpugW = new ComputeBuffer <Real>(Weaver.Context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.CopyHostPointer, Weight.Grad)) { using (ComputeBuffer <Real> gpuX = new ComputeBuffer <Real>(Weaver.Context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, x.Data)) { BackwardgWKernel.SetMemoryArgument(0, gpugY); BackwardgWKernel.SetMemoryArgument(1, gpuX); BackwardgWKernel.SetMemoryArgument(2, gpugW); BackwardgWKernel.SetValueArgument(3, y.BatchCount); BackwardgWKernel.SetValueArgument(4, OutputCount); BackwardgWKernel.SetValueArgument(5, InputCount); Weaver.CommandQueue.Execute(BackwardgWKernel, null, new long[] { InputCount, OutputCount }, null, null); Weaver.CommandQueue.Finish(); Weaver.CommandQueue.ReadFromBuffer(gpugW, ref Weight.Grad, true, null); } } using (ComputeBuffer <Real> gpugX = new ComputeBuffer <Real>(Weaver.Context, ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.AllocateHostPointer, gx.Length)) { using (ComputeBuffer <Real> gpuW = new ComputeBuffer <Real>(Weaver.Context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, Weight.Data)) { BackwardgXKernel.SetMemoryArgument(0, gpugY); BackwardgXKernel.SetMemoryArgument(1, gpuW); BackwardgXKernel.SetMemoryArgument(2, gpugX); BackwardgXKernel.SetValueArgument(3, y.BatchCount); BackwardgXKernel.SetValueArgument(4, OutputCount); BackwardgXKernel.SetValueArgument(5, InputCount); Weaver.CommandQueue.Execute(BackwardgXKernel, null, new long[] { InputCount, y.BatchCount }, null, null); Weaver.CommandQueue.Finish(); Weaver.CommandQueue.ReadFromBuffer(gpugX, ref gx, true, null); } } } for (int i = 0; i < x.Grad.Length; i++) { x.Grad[i] += gx[i]; } }
//////////////////////////////////////////////////////////////////////////////////////////////////// /// <summary> Need previous backward GPU. </summary> /// /// <param name="y"> A NdArray to process. </param> /// <param name="x"> A NdArray to process. </param> /// /// <seealso cref="M:KelpNet.Common.Functions.CompressibleFunction.NeedPreviousBackwardGpu(NdArray,NdArray)"/> //////////////////////////////////////////////////////////////////////////////////////////////////// protected override void NeedPreviousBackwardGpu([NotNull] NdArray y, [NotNull] NdArray x) { Real[] gx = new Real[x.Data.Length]; Real[] activatedgy = Activator != null?GetActivatedgy(y) : y.Grad; if (!NoBias) { CalcBiasGrad(activatedgy, y.Shape, y.BatchCount); } // gy is used in common using (ComputeBuffer <Real> gpugY = new ComputeBuffer <Real>(Weaver.Context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, activatedgy)) { using (ComputeBuffer <Real> gpugW = new ComputeBuffer <Real>(Weaver.Context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.CopyHostPointer, Weight.Grad)) { using (ComputeBuffer <Real> gpuX = new ComputeBuffer <Real>(Weaver.Context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, x.Data)) { BackwardgWKernel.SetMemoryArgument(0, gpugY); BackwardgWKernel.SetMemoryArgument(1, gpuX); BackwardgWKernel.SetMemoryArgument(2, gpugW); BackwardgWKernel.SetValueArgument(3, y.BatchCount); BackwardgWKernel.SetValueArgument(4, InputCount); BackwardgWKernel.SetValueArgument(5, y.Length); BackwardgWKernel.SetValueArgument(6, y.Shape[1]); BackwardgWKernel.SetValueArgument(7, y.Shape[2]); BackwardgWKernel.SetValueArgument(8, x.Shape[1]); BackwardgWKernel.SetValueArgument(9, x.Shape[2]); BackwardgWKernel.SetValueArgument(10, x.Length); BackwardgWKernel.SetValueArgument(11, _subSampleX); BackwardgWKernel.SetValueArgument(12, _subSampleY); BackwardgWKernel.SetValueArgument(13, _trimX); BackwardgWKernel.SetValueArgument(14, _trimY); BackwardgWKernel.SetValueArgument(15, _kHeight); BackwardgWKernel.SetValueArgument(16, _kWidth); Weaver.CommandQueue.Execute(BackwardgWKernel, null, new long[] { OutputCount *InputCount, _kHeight, _kWidth }, null, null); Weaver.CommandQueue.Finish(); Weaver.CommandQueue.ReadFromBuffer(gpugW, ref Weight.Grad, true, null); } } using (ComputeBuffer <Real> gpugX = new ComputeBuffer <Real>(Weaver.Context, ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.AllocateHostPointer, gx.Length)) { using (ComputeBuffer <Real> gpuW = new ComputeBuffer <Real>(Weaver.Context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, Weight.Data)) { BackwardgXKernel.SetMemoryArgument(0, gpugY); BackwardgXKernel.SetMemoryArgument(1, gpuW); BackwardgXKernel.SetMemoryArgument(2, gpugX); BackwardgXKernel.SetValueArgument(3, OutputCount); BackwardgXKernel.SetValueArgument(4, InputCount); BackwardgXKernel.SetValueArgument(5, y.Length); BackwardgXKernel.SetValueArgument(6, y.Shape[1]); BackwardgXKernel.SetValueArgument(7, y.Shape[2]); BackwardgXKernel.SetValueArgument(8, x.Shape[1]); BackwardgXKernel.SetValueArgument(9, x.Shape[2]); BackwardgXKernel.SetValueArgument(10, x.Length); BackwardgXKernel.SetValueArgument(11, _subSampleX); BackwardgXKernel.SetValueArgument(12, _subSampleY); BackwardgXKernel.SetValueArgument(13, _trimX); BackwardgXKernel.SetValueArgument(14, _trimY); BackwardgXKernel.SetValueArgument(15, _kHeight); BackwardgXKernel.SetValueArgument(16, _kWidth); Weaver.CommandQueue.Execute(BackwardgXKernel, null, new long[] { y.BatchCount *x.Shape[0], x.Shape[1], x.Shape[2] }, null, null); Weaver.CommandQueue.Finish(); Weaver.CommandQueue.ReadFromBuffer(gpugX, ref gx, true, null); } } } for (int i = 0; i < x.Grad.Length; i++) { x.Grad[i] += gx[i]; } }
public override void SingleOutputBackward(NdArray y, NdArray x) { //フラグチェック if (!IsParallel) { base.SingleOutputBackward(y, x); return; } Real[] gx = new Real[x.Data.Length]; Real[] activatedgy = this.Activation != null?this.GetActivatedgy(y) : y.Grad; if (!NoBias) { CalcBiasGrad(activatedgy, y.BatchCount); } using (ComputeBuffer <Real> gpugY = new ComputeBuffer <Real>(OpenCL.Context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, activatedgy)) { using (ComputeBuffer <Real> gpugW = new ComputeBuffer <Real>(OpenCL.Context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.UseHostPointer, this.Weight.Grad)) using (ComputeBuffer <Real> gpuX = new ComputeBuffer <Real>(OpenCL.Context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, x.Data)) { BackwardgWKernel.SetMemoryArgument(0, gpugY); BackwardgWKernel.SetMemoryArgument(1, gpuX); BackwardgWKernel.SetMemoryArgument(2, gpugW); BackwardgWKernel.SetValueArgument(3, y.BatchCount); BackwardgWKernel.SetValueArgument(4, this.OutputCount); BackwardgWKernel.SetValueArgument(5, this.InputCount); OpenCL.CommandQueue.Execute ( BackwardgWKernel, null, new long[] { this.InputCount, this.OutputCount }, null, null ); OpenCL.CommandQueue.Finish(); OpenCL.CommandQueue.ReadFromBuffer(gpugW, ref this.Weight.Grad, true, null); } using (ComputeBuffer <Real> gpugX = new ComputeBuffer <Real>(OpenCL.Context, ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.AllocateHostPointer, gx.Length)) using (ComputeBuffer <Real> gpuW = new ComputeBuffer <Real>(OpenCL.Context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, this.Weight.Data)) { BackwardgXKernel.SetMemoryArgument(0, gpugY); BackwardgXKernel.SetMemoryArgument(1, gpuW); BackwardgXKernel.SetMemoryArgument(2, gpugX); BackwardgXKernel.SetValueArgument(3, y.BatchCount); BackwardgXKernel.SetValueArgument(4, this.OutputCount); BackwardgXKernel.SetValueArgument(5, this.InputCount); OpenCL.CommandQueue.Execute ( BackwardgXKernel, null, new long[] { this.InputCount, y.BatchCount }, null, null ); OpenCL.CommandQueue.Finish(); OpenCL.CommandQueue.ReadFromBuffer(gpugX, ref gx, true, null); } } for (int i = 0; i < x.Grad.Length; i++) { x.Grad[i] += gx[i]; } }
protected override void NeedPreviousBackwardGpu(NdArray y, NdArray x) { //Real[] gx = new Real[x.Data.Length]; var gx = GetArray("gx", x.Data.Length); var activatedgy = Activator != null?GetActivatedgy(y) : y.Grad; if (!NoBias) { CalcBiasGrad(activatedgy, y.BatchCount); } //using (ComputeBuffer<Real> gpugY = new ComputeBuffer<Real>(Weaver.Context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, activatedgy)) //{ Weight.Grad.Switch(Common.ComputeDeviceTypes.Gpu); x.Data.Switch(Common.ComputeDeviceTypes.Gpu); activatedgy.Switch(Common.ComputeDeviceTypes.Gpu); var gpugW = Weight.Grad.GetBuffer(); var gpuX = x.Data.GetBuffer(); var gpugY = activatedgy.GetBuffer(); //using (ComputeBuffer<Real> gpugW = new ComputeBuffer<Real>(Weaver.Context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.CopyHostPointer, this.Weight.Grad)) //using (ComputeBuffer<Real> gpuX = new ComputeBuffer<Real>(Weaver.Context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, x.Data)) //{ BackwardgWKernel.SetMemoryArgument(0, gpugY); BackwardgWKernel.SetMemoryArgument(1, gpuX); BackwardgWKernel.SetMemoryArgument(2, gpugW); BackwardgWKernel.SetValueArgument(3, y.BatchCount); BackwardgWKernel.SetValueArgument(4, OutputCount); BackwardgWKernel.SetValueArgument(5, InputCount); Weaver.CommandQueue.Execute ( BackwardgWKernel, null, new long[] { InputCount, OutputCount }, null, null ); Weaver.CommandQueue.Finish(); //TODO //Weaver.CommandQueue.ReadFromBuffer(gpugW, ref this.Weight.Grad, true, null); //} //using (ComputeBuffer<Real> gpugX = new ComputeBuffer<Real>(Weaver.Context, ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.AllocateHostPointer, gx.Length)) //using (ComputeBuffer<Real> gpuW = new ComputeBuffer<Real>(Weaver.Context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, this.Weight.Data)) //{ gx.Switch(Common.ComputeDeviceTypes.Gpu); Weight.Data.Switch(Common.ComputeDeviceTypes.Gpu); var gpugX = gx.GetBuffer(); var gpuW = Weight.Data.GetBuffer(); BackwardgXKernel.SetMemoryArgument(0, gpugY); BackwardgXKernel.SetMemoryArgument(1, gpuW); BackwardgXKernel.SetMemoryArgument(2, gpugX); BackwardgXKernel.SetValueArgument(3, y.BatchCount); BackwardgXKernel.SetValueArgument(4, OutputCount); BackwardgXKernel.SetValueArgument(5, InputCount); Weaver.CommandQueue.Execute ( BackwardgXKernel, null, new long[] { InputCount, y.BatchCount }, null, null ); Weaver.CommandQueue.Finish(); //Weaver.CommandQueue.ReadFromBuffer(gpugX, ref gx, true, null); //} //} using (x.Grad.Switch(Common.ComputeDeviceTypes.Cpu, true)) using (gx.Switch(Common.ComputeDeviceTypes.Cpu)) { var xGrad = x.Grad.GetArray(); var gxArray = gx.GetArray(); for (int i = 0; i < x.Grad.Length; i++) { xGrad[i] += gxArray[i]; } } }