예제 #1
0
        public static void SingleOutputBackward(NdArray <Real> y, NdArray <Real> x, NdArray <Real> weight, NdArray <Real> bias, ICompressibleActivation <Real> activation)
        {
            int outputCount = weight.Shape[0];
            int inputCount  = weight.Shape[1];

            Real[] activatedgy = activation != null?activation.GetActivatedgy(y, x) : y.Grad;

            if (bias != null)
            {
                CalcBiasGrad(activatedgy, y.BatchCount, outputCount, bias.Grad);
            }

            for (int batchCount = 0; batchCount < y.BatchCount; batchCount++)
            {
                for (int i = 0; i < outputCount; i++)
                {
                    Real gyData = activatedgy[i + batchCount * outputCount];

                    for (int j = 0; j < inputCount; j++)
                    {
                        weight.Grad[i * inputCount + j]     += x.Data[batchCount * inputCount + j] * gyData;
                        x.Grad[batchCount * inputCount + j] += weight.Data[i * inputCount + j] * gyData;
                    }
                }
            }
        }
예제 #2
0
        public static void SingleOutputBackward(NdArray <Real> y, NdArray <Real> x, NdArray <Real> weight, NdArray <Real> bias, int strideX, int strideY, int padX, int padY, ICompressibleActivation <Real> activation)
        {
            //int outputCount = weight.Shape[0];
            int inputCount   = weight.Shape[1];
            int kernelHeight = weight.Shape[2];
            int kernelWidth  = weight.Shape[3];

            Real[] activatedgy = activation != null?activation.GetActivatedgy(y, x): y.Grad;

            if (bias != null)
            {
                CalcBiasGrad(activatedgy, y.Shape, y.BatchCount, bias.Grad);
            }

            for (int batchCounter = 0; batchCounter < y.BatchCount; batchCounter++)
            {
                int yBatchOffset = batchCounter * y.Length;
                int xBatchOffset = batchCounter * x.Length;

                for (int och = 0; och < y.Shape[0]; och++)
                {
                    int wOchOffset = och * inputCount * kernelHeight * kernelWidth;

                    int yChOffset = och * y.Shape[1] * y.Shape[2];

                    for (int oy = 0; oy < y.Shape[1] * strideY; oy += strideY)
                    {
                        int iyStart = oy - padY < 0 ? 0 : oy - padY;
                        int iyLimit = kernelHeight + oy - padY < x.Shape[1] ? kernelHeight + oy - padY : x.Shape[1];

                        for (int ox = 0; ox < y.Shape[2] * strideX; ox += strideX)
                        {
                            int ixStart = ox - padX < 0 ? 0 : ox - padX;
                            int ixLimit = kernelWidth + ox - padX < x.Shape[2] ? kernelWidth + ox - padX : x.Shape[2];

                            int gyIndex = yBatchOffset + yChOffset + oy / strideY * y.Shape[2] + ox / strideX;

                            for (int ich = 0; ich < x.Shape[0]; ich++)
                            {
                                int wIchOffset = wOchOffset + ich * kernelHeight * kernelWidth;

                                int xChOffset = xBatchOffset + ich * x.Shape[1] * x.Shape[2];

                                for (int iy = iyStart; iy < iyLimit; iy++)
                                {
                                    for (int ix = ixStart; ix < ixLimit; ix++)
                                    {
                                        int wIndex = wIchOffset + (iy - oy + padY) * kernelWidth + ix - ox + padX;
                                        int xIndex = xChOffset + iy * x.Shape[2] + ix;

                                        weight.Grad[wIndex] += x.Data[xIndex] * activatedgy[gyIndex];
                                        x.Grad[xIndex]      += weight.Data[wIndex] * activatedgy[gyIndex];
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }
예제 #3
0
        public static void SingleOutputBackward(NdArray <Real> y, NdArray <Real> x, NdArray <Real> weight, NdArray <Real> bias, int strideX, int strideY, int padX, int padY, ICompressibleActivation <Real> activation)
        {
            int inputCount  = weight.Shape[0];
            int outputCount = weight.Shape[1];

            Real[] activatedgy = activation != null?activation.GetActivatedgy(y, x): y.Grad;

            if (bias != null)
            {
                CalcBiasGrad(activatedgy, bias.Grad, y.Shape, y.BatchCount);
            }

            for (int batchCount = 0; batchCount < y.BatchCount; batchCount++)
            {
                for (int och = 0; och < outputCount; och++)
                {
                    int wOchOffset = och * weight.Shape[2] * weight.Shape[3];
                    int yChOffset  = och * y.Shape[1] * y.Shape[2];

                    for (int oy = padY; oy < y.Shape[1] + padY; oy++)
                    {
                        int iyLimit = oy / strideY + 1 < x.Shape[1] ? oy / strideY + 1 : x.Shape[1];
                        int iyStart = oy - weight.Shape[2] < 0 ? 0 : (oy - weight.Shape[2]) / strideY + 1;

                        for (int ox = padX; ox < y.Shape[2] + padX; ox++)
                        {
                            int ixLimit = ox / strideX + 1 < x.Shape[2] ? ox / strideX + 1 : x.Shape[2];
                            int ixStart = ox - weight.Shape[3] < 0 ? 0 : (ox - weight.Shape[3]) / strideX + 1;

                            int gyIndex = batchCount * y.Length + yChOffset + (oy - padY) * y.Shape[2] + ox - padX;

                            for (int ich = 0; ich < inputCount; ich++)
                            {
                                int wIchOffset = ich * weight.Shape[1] * weight.Shape[2] * weight.Shape[3] + wOchOffset;
                                int xChOffset  = batchCount * x.Length + ich * x.Shape[1] * x.Shape[2];

                                for (int iy = iyStart; iy < iyLimit; iy++)
                                {
                                    for (int ix = ixStart; ix < ixLimit; ix++)
                                    {
                                        int xIndex = xChOffset + iy * x.Shape[2] + ix;
                                        int wIndex = wIchOffset + (oy - iy * strideY) * weight.Shape[3] + (ox - ix * strideX);

                                        weight.Grad[wIndex] += x.Data[xIndex] * activatedgy[gyIndex];
                                        x.Grad[xIndex]      += weight.Data[wIndex] * activatedgy[gyIndex];
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }
예제 #4
0
        public static void SingleOutputBackward(NdArray <Real> y, NdArray <Real> x, NdArray <Real> weight, NdArray <Real> bias, int strideX, int strideY, int padX, int padY, ComputeKernel backwardgWKernel, ComputeKernel backwardgXKernel, ICompressibleActivation <Real> activation)
        {
            int inputCount   = weight.Shape[0];
            int outputCount  = weight.Shape[1];
            int kernelHeight = weight.Shape[2];
            int kernelWidth  = weight.Shape[3];

            Real[] gx          = new Real[x.Data.Length];
            Real[] activatedgy = activation != null?activation.GetActivatedgy(y, x) : y.Grad;

            if (bias != null)
            {
                Deconvolution2DFunc.CalcBiasGrad(activatedgy, bias.Grad, y.Shape, y.BatchCount);
            }

            //gyは共通で使用
            using (ComputeBuffer <Real> gpugY = new ComputeBuffer <Real>(OpenCL.Context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, activatedgy))
            {
                using (ComputeBuffer <Real> gpugW = new ComputeBuffer <Real>(OpenCL.Context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.UseHostPointer, weight.Grad))
                    using (ComputeBuffer <Real> gpuX = new ComputeBuffer <Real>(OpenCL.Context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, x.Data))
                    {
                        backwardgWKernel.SetMemoryArgument(0, gpugY);
                        backwardgWKernel.SetMemoryArgument(1, gpuX);
                        backwardgWKernel.SetMemoryArgument(2, gpugW);
                        backwardgWKernel.SetValueArgument(3, y.BatchCount);
                        backwardgWKernel.SetValueArgument(4, outputCount);
                        backwardgWKernel.SetValueArgument(5, y.Length);
                        backwardgWKernel.SetValueArgument(6, y.Shape[1]);
                        backwardgWKernel.SetValueArgument(7, y.Shape[2]);
                        backwardgWKernel.SetValueArgument(8, x.Shape[1]);
                        backwardgWKernel.SetValueArgument(9, x.Shape[2]);
                        backwardgWKernel.SetValueArgument(10, x.Length);
                        backwardgWKernel.SetValueArgument(11, strideX);
                        backwardgWKernel.SetValueArgument(12, strideY);
                        backwardgWKernel.SetValueArgument(13, padX);
                        backwardgWKernel.SetValueArgument(14, padY);
                        backwardgWKernel.SetValueArgument(15, kernelHeight);
                        backwardgWKernel.SetValueArgument(16, kernelWidth);

                        OpenCL.CommandQueue.Execute
                        (
                            backwardgWKernel,
                            null,
                            new long[] { inputCount *outputCount, kernelHeight, kernelWidth },
                            null,
                            null
                        );

                        OpenCL.CommandQueue.Finish();
                        OpenCL.CommandQueue.ReadFromBuffer(gpugW, ref weight.Grad, true, null);
                    }

                using (ComputeBuffer <Real> gpugX = new ComputeBuffer <Real>(OpenCL.Context, ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.AllocateHostPointer, gx.Length))
                    using (ComputeBuffer <Real> gpuW = new ComputeBuffer <Real>(OpenCL.Context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, weight.Data))
                    {
                        backwardgXKernel.SetMemoryArgument(0, gpugY);
                        backwardgXKernel.SetMemoryArgument(1, gpuW);
                        backwardgXKernel.SetMemoryArgument(2, gpugX);
                        backwardgXKernel.SetValueArgument(3, outputCount);
                        backwardgXKernel.SetValueArgument(4, inputCount);
                        backwardgXKernel.SetValueArgument(5, y.Length);
                        backwardgXKernel.SetValueArgument(6, y.Shape[1]);
                        backwardgXKernel.SetValueArgument(7, y.Shape[2]);
                        backwardgXKernel.SetValueArgument(8, x.Shape[1]);
                        backwardgXKernel.SetValueArgument(9, x.Shape[2]);
                        backwardgXKernel.SetValueArgument(10, x.Length);
                        backwardgXKernel.SetValueArgument(11, strideX);
                        backwardgXKernel.SetValueArgument(12, strideY);
                        backwardgXKernel.SetValueArgument(13, padX);
                        backwardgXKernel.SetValueArgument(14, padY);
                        backwardgXKernel.SetValueArgument(15, kernelHeight);
                        backwardgXKernel.SetValueArgument(16, kernelWidth);

                        OpenCL.CommandQueue.Execute
                        (
                            backwardgXKernel,
                            null,
                            new long[] { x.BatchCount *x.Shape[0], x.Shape[1], x.Shape[2] },
                            null,
                            null
                        );

                        OpenCL.CommandQueue.Finish();
                        OpenCL.CommandQueue.ReadFromBuffer(gpugX, ref gx, true, null);
                    }
            }

            for (int i = 0; i < x.Grad.Length; i++)
            {
                x.Grad[i] += gx[i];
            }
        }
예제 #5
0
        public static void SingleOutputBackward(NdArray <Real> y, NdArray <Real> x, NdArray <Real> weight, NdArray <Real> bias, ComputeKernel backwardgWKernel, ComputeKernel backwardgXKernel, ICompressibleActivation <Real> activation)
        {
            int outputCount = weight.Shape[0];
            int inputCount  = weight.Shape[1];

            Real[] gx          = new Real[x.Data.Length];
            Real[] activatedgy = activation != null?activation.GetActivatedgy(y, x) : y.Grad;

            if (bias != null)
            {
                LinearFunc.CalcBiasGrad(activatedgy, y.BatchCount, outputCount, bias.Grad);
            }

            using (ComputeBuffer <Real> gpugY = new ComputeBuffer <Real>(OpenCL.Context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, activatedgy))
            {
                using (ComputeBuffer <Real> gpugW = new ComputeBuffer <Real>(OpenCL.Context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.UseHostPointer, weight.Grad))
                    using (ComputeBuffer <Real> gpuX = new ComputeBuffer <Real>(OpenCL.Context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, x.Data))
                    {
                        backwardgWKernel.SetMemoryArgument(0, gpugY);
                        backwardgWKernel.SetMemoryArgument(1, gpuX);
                        backwardgWKernel.SetMemoryArgument(2, gpugW);
                        backwardgWKernel.SetValueArgument(3, y.BatchCount);
                        backwardgWKernel.SetValueArgument(4, outputCount);
                        backwardgWKernel.SetValueArgument(5, inputCount);

                        OpenCL.CommandQueue.Execute
                        (
                            backwardgWKernel,
                            null,
                            new long[] { inputCount, outputCount },
                            null,
                            null
                        );

                        OpenCL.CommandQueue.Finish();
                        OpenCL.CommandQueue.ReadFromBuffer(gpugW, ref weight.Grad, true, null);
                    }

                using (ComputeBuffer <Real> gpugX = new ComputeBuffer <Real>(OpenCL.Context, ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.AllocateHostPointer, gx.Length))
                    using (ComputeBuffer <Real> gpuW = new ComputeBuffer <Real>(OpenCL.Context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, weight.Data))
                    {
                        backwardgXKernel.SetMemoryArgument(0, gpugY);
                        backwardgXKernel.SetMemoryArgument(1, gpuW);
                        backwardgXKernel.SetMemoryArgument(2, gpugX);
                        backwardgXKernel.SetValueArgument(3, y.BatchCount);
                        backwardgXKernel.SetValueArgument(4, outputCount);
                        backwardgXKernel.SetValueArgument(5, inputCount);

                        OpenCL.CommandQueue.Execute
                        (
                            backwardgXKernel,
                            null,
                            new long[] { inputCount, y.BatchCount },
                            null,
                            null
                        );

                        OpenCL.CommandQueue.Finish();
                        OpenCL.CommandQueue.ReadFromBuffer(gpugX, ref gx, true, null);
                    }
            }

            for (int i = 0; i < x.Grad.Length; i++)
            {
                x.Grad[i] += gx[i];
            }
        }