Exemplo n.º 1
0
        protected override NdArray NeedPreviousForwardGpu(NdArray x)
        {
            Real[] y = NoBias ? new Real[OutputCount * x.BatchCount] : GetBiasedValue(x.BatchCount);

            using (ComputeBuffer <Real> gpuX = new ComputeBuffer <Real>(Weaver.Context, ComputeMemoryFlags.None, x.Data))
                using (ComputeBuffer <Real> gpuY = new ComputeBuffer <Real>(Weaver.Context, ComputeMemoryFlags.None, y))
                {
                    ForwardKernel.SetMemoryArgument(0, gpuX);
                    ForwardKernel.SetMemoryArgument(1, gpuW);
                    ForwardKernel.SetMemoryArgument(2, gpuY);
                    ForwardKernel.SetValueArgument(3, OutputCount);
                    ForwardKernel.SetValueArgument(4, InputCount);

                    Weaver.CommandQueue.Execute
                    (
                        ForwardKernel,
                        null,
                        new long[] { OutputCount, x.BatchCount },
                        null,
                        null
                    );

                    Weaver.CommandQueue.Flush();
                    //for less cpu use. this is 65% of computation time (10.4ms on 1080ti).
                    ASleep(6.5);
                    Weaver.CommandQueue.Finish();
                    Weaver.CommandQueue.ReadFromBuffer(gpuY, ref y, true, null);
                }

            return(NdArray.Convert(y, new[] { OutputCount }, x.BatchCount, this));
        }
Exemplo n.º 2
0
        protected override NdArray NeedPreviousForwardGpu(NdArray x)
        {
            Real[] y = this.NoBias ? new Real[OutputCount * x.BatchCount] : GetBiasedValue(x.BatchCount);

            using (ComputeBuffer <Real> gpuX = new ComputeBuffer <Real>(Weaver.Context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, x.Data))
                using (ComputeBuffer <Real> gpuW = new ComputeBuffer <Real>(Weaver.Context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, this.Weight.Data))
                    using (ComputeBuffer <Real> gpuY = new ComputeBuffer <Real>(Weaver.Context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.CopyHostPointer, y))
                    {
                        ForwardKernel.SetMemoryArgument(0, gpuX);
                        ForwardKernel.SetMemoryArgument(1, gpuW);
                        ForwardKernel.SetMemoryArgument(2, gpuY);
                        ForwardKernel.SetValueArgument(3, this.OutputCount);
                        ForwardKernel.SetValueArgument(4, this.InputCount);

                        Weaver.CommandQueue.Execute
                        (
                            ForwardKernel,
                            null,
                            new long[] { OutputCount, x.BatchCount },
                            null,
                            null
                        );

                        Weaver.CommandQueue.Finish();
                        Weaver.CommandQueue.ReadFromBuffer(gpuY, ref y, true, null);
                    }

            return(NdArray.Convert(y, new[] { OutputCount }, x.BatchCount, this));
        }
Exemplo n.º 3
0
        public override NdArray SingleInputForward(NdArray x)
        {
            //フラグチェック
            if (!IsParallel)
            {
                return(base.SingleInputForward(x));
            }

            Real[] y = this.NoBias ? new Real[OutputCount * x.BatchCount] : GetBiasedValue(x.BatchCount);

            using (ComputeBuffer <Real> gpuX = new ComputeBuffer <Real>(OpenCL.Context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, x.Data))
                using (ComputeBuffer <Real> gpuW = new ComputeBuffer <Real>(OpenCL.Context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, this.Weight.Data))
                    using (ComputeBuffer <Real> gpuY = new ComputeBuffer <Real>(OpenCL.Context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.UseHostPointer, y))
                    {
                        ForwardKernel.SetMemoryArgument(0, gpuX);
                        ForwardKernel.SetMemoryArgument(1, gpuW);
                        ForwardKernel.SetMemoryArgument(2, gpuY);
                        ForwardKernel.SetValueArgument(3, this.OutputCount);
                        ForwardKernel.SetValueArgument(4, this.InputCount);

                        OpenCL.CommandQueue.Execute
                        (
                            ForwardKernel,
                            null,
                            new long[] { OutputCount, x.BatchCount },
                            null,
                            null
                        );

                        OpenCL.CommandQueue.Finish();
                        OpenCL.CommandQueue.ReadFromBuffer(gpuY, ref y, true, null);
                    }

            return(NdArray.Convert(y, new[] { OutputCount }, x.BatchCount, this));
        }
Exemplo n.º 4
0
        protected override NdArray NeedPreviousForwardGpu(NdArray x)
        {
            var ytemp = NoBias ? new Real[OutputCount * x.BatchCount] : GetBiasedValue(x.BatchCount);
            var y     = GetArray("y", ytemp.Length, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.CopyHostPointer);

            y.Write(ytemp);

            x.Data.Switch(Common.ComputeDeviceTypes.Gpu);
            Weight.Data.Switch(Common.ComputeDeviceTypes.Gpu);
            y.Switch(Common.ComputeDeviceTypes.Gpu);

            var gpuX = x.Data.GetBuffer();
            var gpuW = Weight.Data.GetBuffer();
            var gpuY = y.GetBuffer();

            ForwardKernel.SetMemoryArgument(0, gpuX);
            ForwardKernel.SetMemoryArgument(1, gpuW);
            ForwardKernel.SetMemoryArgument(2, gpuY);
            ForwardKernel.SetValueArgument(3, OutputCount);
            ForwardKernel.SetValueArgument(4, InputCount);

            Weaver.CommandQueue.Execute
            (
                ForwardKernel,
                null,
                new long[] { OutputCount, x.BatchCount },
                null,
                null
            );

            Weaver.CommandQueue.Flush();
            Weaver.CommandQueue.Finish();

            return(new NdArray(y, GetArray("y.Grad", y.Length, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.CopyHostPointer), new[] { OutputCount }, x.BatchCount, this));
        }
Exemplo n.º 5
0
        public override NdArray SingleInputForward(NdArray input)
        {
            //フラグチェック
            if (!IsParallel)
            {
                return(base.SingleInputForward(input));
            }

            int outputHeight = (input.Shape[1] - 1) * this.StrideY + this.KernelHeight - this.PadY * 2;
            int outputWidth  = (input.Shape[2] - 1) * this.StrideX + this.KernelWidth - this.PadX * 2;

            Real[] result = new Real[input.BatchCount * this.OutputCount * outputWidth * outputHeight];

            using (ComputeBuffer <Real> gpuX = new ComputeBuffer <Real>(OpenCL.Context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, input.Data))
                using (ComputeBuffer <Real> gpuW = new ComputeBuffer <Real>(OpenCL.Context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, this.Weight.Data))
                    using (ComputeBuffer <Real> gpub = new ComputeBuffer <Real>(OpenCL.Context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.UseHostPointer, this.NoBias ? new Real[OutputCount] : this.Bias.Data))
                        using (ComputeBuffer <Real> gpuY = new ComputeBuffer <Real>(OpenCL.Context, ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.AllocateHostPointer, result.Length))
                        {
                            ForwardKernel.SetMemoryArgument(0, gpuX);
                            ForwardKernel.SetMemoryArgument(1, gpuW);
                            ForwardKernel.SetMemoryArgument(2, gpub);
                            ForwardKernel.SetMemoryArgument(3, gpuY);
                            ForwardKernel.SetValueArgument(4, input.Shape[1]);
                            ForwardKernel.SetValueArgument(5, input.Shape[2]);
                            ForwardKernel.SetValueArgument(6, input.Length);
                            ForwardKernel.SetValueArgument(7, outputWidth);
                            ForwardKernel.SetValueArgument(8, outputHeight);
                            ForwardKernel.SetValueArgument(9, this.StrideX);
                            ForwardKernel.SetValueArgument(10, this.StrideY);
                            ForwardKernel.SetValueArgument(11, this.PadX);
                            ForwardKernel.SetValueArgument(12, this.PadY);
                            ForwardKernel.SetValueArgument(13, this.KernelHeight);
                            ForwardKernel.SetValueArgument(14, this.KernelWidth);
                            ForwardKernel.SetValueArgument(15, this.OutputCount);
                            ForwardKernel.SetValueArgument(16, this.InputCount);

                            OpenCL.CommandQueue.Execute
                            (
                                ForwardKernel,
                                null,
                                new long[] { input.BatchCount *OutputCount, outputHeight, outputWidth },
                                null,
                                null
                            );

                            OpenCL.CommandQueue.Finish();
                            OpenCL.CommandQueue.ReadFromBuffer(gpuY, ref result, true, null);
                        }

            return(NdArray.Convert(result, new[] { this.OutputCount, outputHeight, outputWidth }, input.BatchCount, this));
        }
Exemplo n.º 6
0
        protected override NdArray NeedPreviousForwardGpu(NdArray input)
        {
            int outputHeight = (int)Math.Floor((input.Shape[1] - this._kHeight + this._padY * 2.0) / this._strideY) + 1;
            int outputWidth  = (int)Math.Floor((input.Shape[2] - this._kWidth + this._padX * 2.0) / this._strideX) + 1;

            Real[] result = new Real[this.OutputCount * outputHeight * outputWidth * input.BatchCount];

            using (ComputeBuffer <Real> gpuX = new ComputeBuffer <Real>(Weaver.Context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, input.Data))
                using (ComputeBuffer <Real> gpuW = new ComputeBuffer <Real>(Weaver.Context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, this.Weight.Data))
                    //TODO
                    using (ComputeBuffer <Real> gpub = new ComputeBuffer <Real>(Weaver.Context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, this.NoBias ? new Real[OutputCount] : (Real[])this.Bias.Data))
                        using (ComputeBuffer <Real> gpuY = new ComputeBuffer <Real>(Weaver.Context, ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.AllocateHostPointer, result.Length))
                        {
                            ForwardKernel.SetMemoryArgument(0, gpuX);
                            ForwardKernel.SetMemoryArgument(1, gpuW);
                            ForwardKernel.SetMemoryArgument(2, gpub);
                            ForwardKernel.SetMemoryArgument(3, gpuY);
                            ForwardKernel.SetValueArgument(4, input.Shape[1]);
                            ForwardKernel.SetValueArgument(5, input.Shape[2]);
                            ForwardKernel.SetValueArgument(6, input.Length);
                            ForwardKernel.SetValueArgument(7, outputWidth);
                            ForwardKernel.SetValueArgument(8, outputHeight);
                            ForwardKernel.SetValueArgument(9, this._strideX);
                            ForwardKernel.SetValueArgument(10, this._strideY);
                            ForwardKernel.SetValueArgument(11, this._padX);
                            ForwardKernel.SetValueArgument(12, this._padY);
                            ForwardKernel.SetValueArgument(13, this._kHeight);
                            ForwardKernel.SetValueArgument(14, this._kWidth);
                            ForwardKernel.SetValueArgument(15, this.OutputCount);
                            ForwardKernel.SetValueArgument(16, this.InputCount);

                            Weaver.CommandQueue.Execute
                            (
                                ForwardKernel,
                                null,
                                new long[] { input.BatchCount *OutputCount, outputHeight, outputWidth },
                                null,
                                null
                            );

                            Weaver.CommandQueue.Finish();
                            Weaver.CommandQueue.ReadFromBuffer(gpuY, ref result, true, null);
                        }

            return(NdArray.Convert(result, new[] { this.OutputCount, outputHeight, outputWidth }, input.BatchCount, this));
        }
Exemplo n.º 7
0
        protected override NdArray NeedPreviousForwardGpu([NotNull] NdArray input)
        {
            int outputHeight = (input.Shape[1] - 1) * _subSampleY + _kHeight - _trimY * 2;
            int outputWidth  = (input.Shape[2] - 1) * _subSampleX + _kWidth - _trimX * 2;

            Real[] result = new Real[input.BatchCount * OutputCount * outputWidth * outputHeight];

            using (ComputeBuffer <Real> gpuX = new ComputeBuffer <Real>(Weaver.Context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, input.Data))
            {
                using (ComputeBuffer <Real> gpuW = new ComputeBuffer <Real>(Weaver.Context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, Weight.Data))
                {
                    using (ComputeBuffer <Real> gpub = new ComputeBuffer <Real>(Weaver.Context, ComputeMemoryFlags.ReadOnly | ComputeMemoryFlags.CopyHostPointer, NoBias ? new Real[OutputCount] : Bias.Data))
                    {
                        using (ComputeBuffer <Real> gpuY = new ComputeBuffer <Real>(Weaver.Context, ComputeMemoryFlags.WriteOnly | ComputeMemoryFlags.AllocateHostPointer, result.Length))
                        {
                            ForwardKernel.SetMemoryArgument(0, gpuX);
                            ForwardKernel.SetMemoryArgument(1, gpuW);
                            ForwardKernel.SetMemoryArgument(2, gpub);
                            ForwardKernel.SetMemoryArgument(3, gpuY);
                            ForwardKernel.SetValueArgument(4, input.Shape[1]);
                            ForwardKernel.SetValueArgument(5, input.Shape[2]);
                            ForwardKernel.SetValueArgument(6, input.Length);
                            ForwardKernel.SetValueArgument(7, outputWidth);
                            ForwardKernel.SetValueArgument(8, outputHeight);
                            ForwardKernel.SetValueArgument(9, _subSampleX);
                            ForwardKernel.SetValueArgument(10, _subSampleY);
                            ForwardKernel.SetValueArgument(11, _trimX);
                            ForwardKernel.SetValueArgument(12, _trimY);
                            ForwardKernel.SetValueArgument(13, _kHeight);
                            ForwardKernel.SetValueArgument(14, _kWidth);
                            ForwardKernel.SetValueArgument(15, OutputCount);
                            ForwardKernel.SetValueArgument(16, InputCount);

                            Weaver.CommandQueue.Execute(ForwardKernel, null, new long[] { input.BatchCount *OutputCount, outputHeight, outputWidth },
                                                        null, null);
                            Weaver.CommandQueue.Finish();
                            Weaver.CommandQueue.ReadFromBuffer(gpuY, ref result, true, null);
                        }
                    }
                }
            }

            return(NdArray.Convert(result, new[] { OutputCount, outputHeight, outputWidth }, input.BatchCount, this));
        }