Exemplo n.º 1
0
        public override void Conv2D(Tensor t, Tensor kernels, int stride, Tensor.PaddingType padding, Tensor result)
        {
            int outputWidth = 0, outputHeight = 0, paddingX = 0, paddingY = 0;

            Tensor.GetPaddingParams(padding, t.Width, t.Height, kernels.Width, kernels.Height, stride, out outputHeight, out outputWidth, out paddingX, out paddingY);

            t.CopyToDevice();
            kernels.CopyToDevice();
            result.CopyToDevice();

            using (var convolutionDesc = new ConvolutionDescriptor())
                using (var tDesc = new TensorDescriptor())
                    using (var kernelsDesc = new FilterDescriptor())
                        using (var resultDesc = new TensorDescriptor())
                        {
                            convolutionDesc.SetConvolution2dDescriptor(paddingY, paddingX, stride, stride, 1, 1, cudnnConvolutionMode.CrossCorrelation, cudnnDataType.Float);
                            tDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Float, t.Shape.Dimensions[3], t.Shape.Dimensions[2], t.Shape.Dimensions[1], t.Shape.Dimensions[0]);
                            kernelsDesc.SetFilter4dDescriptor(cudnnDataType.Float, cudnnTensorFormat.NCHW, kernels.Shape.Dimensions[3], kernels.Shape.Dimensions[2], kernels.Shape.Dimensions[1], kernels.Shape.Dimensions[0]);
                            resultDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Float, result.Shape.Dimensions[3], result.Shape.Dimensions[2], result.Shape.Dimensions[1], result.Shape.Dimensions[0]);

                            var algo = _CudnnContext.GetConvolutionForwardAlgorithm(tDesc, kernelsDesc, convolutionDesc, resultDesc, cudnnConvolutionFwdPreference.PreferFastest, IntPtr.Zero);

                            var workspaceSize = _CudnnContext.GetConvolutionForwardWorkspaceSize(tDesc, kernelsDesc, convolutionDesc, resultDesc, algo);
                            workspaceSize = workspaceSize == 0 ? new SizeT(1) : workspaceSize;

                            if (result.GpuData.ConvWorkspace == null || result.GpuData.ConvWorkspace.Size != workspaceSize)
                            {
                                result.GpuData.ConvWorkspace = new CudaDeviceVariable <byte>(workspaceSize);
                            }

                            _CudnnContext.ConvolutionForward(1.0f, tDesc, t.GpuData.DeviceVar, kernelsDesc, kernels.GpuData.DeviceVar, convolutionDesc, algo, result.GpuData.ConvWorkspace, 0.0f, resultDesc, result.GpuData.DeviceVar);
                        }
        }
Exemplo n.º 2
0
        public override void Conv2D(Tensor t, Tensor kernels, int stride, Tensor.PaddingType padding, Tensor result)
        {
            t.CopyToHost();
            kernels.CopyToHost();
            result.CurrentLocation = Tensor.Location.Host;

            int outputWidth = 0, outputHeight = 0, paddingX = 0, paddingY = 0;

            Tensor.GetPaddingParams(padding, t.Width, t.Height, kernels.Width, kernels.Height, stride, out outputHeight, out outputWidth, out paddingX, out paddingY);

            Parallel.For(0, t.BatchSize, n =>
            {
                Parallel.For(0, kernels.BatchSize, outD => {
                    for (int h = -paddingY, outH = 0; outH < result.Height; h += stride, ++outH)
                    {
                        for (int w = -paddingX, outW = 0; outW < result.Width; w += stride, ++outW)
                        {
                            float val = 0;

                            for (int kernelD = 0; kernelD < kernels.Depth; ++kernelD)
                            {
                                for (int kernelH = 0; kernelH < kernels.Height; ++kernelH)
                                {
                                    for (int kernelW = 0; kernelW < kernels.Width; ++kernelW)
                                    {
                                        val += t.TryGet(0, w + kernelW, h + kernelH, kernelD, n) *
                                               kernels[kernelW, kernelH, kernelD, outD];
                                    }
                                }
                            }

                            result[outW, outH, outD, n] = val;
                        }
                    }
                });
            });
        }
Exemplo n.º 3
0
        public override void Conv2DKernelsGradient(Tensor input, Tensor gradient, int stride, Tensor.PaddingType padding, Tensor kernelsGradient)
        {
            input.CopyToHost();
            gradient.CopyToHost();
            kernelsGradient.CopyToHost();

            int outputWidth = 0, outputHeight = 0, paddingX = 0, paddingY = 0;

            Tensor.GetPaddingParams(padding, input.Width, input.Height, kernelsGradient.Width, kernelsGradient.Height, stride, out outputHeight, out outputWidth, out paddingX, out paddingY);

            for (int n = 0; n < gradient.BatchSize; ++n)
            {
                Parallel.For(0, kernelsGradient.BatchSize, outD =>
                {
                    for (int h = -paddingY, outH = 0; outH < gradient.Height; h += stride, ++outH)
                    {
                        for (int w = -paddingX, outW = 0; outW < gradient.Width; w += stride, ++outW)
                        {
                            float grad = gradient[outW, outH, outD, n];

                            for (int kernelD = 0; kernelD < kernelsGradient.Depth; ++kernelD)
                            {
                                for (int kernelH = 0; kernelH < kernelsGradient.Height; ++kernelH)
                                {
                                    for (int kernelW = 0; kernelW < kernelsGradient.Width; ++kernelW)
                                    {
                                        float kernGradVal = input.TryGet(0, w + kernelW, h + kernelH, kernelD, n) * grad;
                                        kernelsGradient[kernelW, kernelH, kernelD, outD] += kernGradVal;
                                    }
                                }
                            }
                        }
                    }
                });
            }
        }
Exemplo n.º 4
0
        public override void Conv2DInputGradient(Tensor gradient, Tensor kernels, int stride, Tensor.PaddingType padding, Tensor inputGradients)
        {
            gradient.CopyToHost();
            kernels.CopyToHost();
            inputGradients.CopyToHost();

            Tensor rotKernels = kernels.Rotated180();

            padding = Tensor.PaddingType.Full;

            int outputWidth = 0, outputHeight = 0, paddingX = 0, paddingY = 0;

            Tensor.GetPaddingParams(padding, gradient.Width, gradient.Height, kernels.Width, kernels.Height, stride, out outputHeight, out outputWidth, out paddingX, out paddingY);

            Parallel.For(0, gradient.BatchSize, n =>
            {
                for (int outH = 0, h = -paddingY; outH < inputGradients.Height; h += stride, ++outH)
                {
                    for (int outW = 0, w = -paddingX; outW < inputGradients.Width; w += stride, ++outW)
                    {
                        Parallel.For(0, inputGradients.Depth, outD =>
                        {
                            for (int kernelN = 0; kernelN < rotKernels.BatchSize; ++kernelN)
                            {
                                for (int kernelH = 0; kernelH < rotKernels.Height; ++kernelH)
                                {
                                    for (int kernelW = 0; kernelW < rotKernels.Width; ++kernelW)
                                    {
                                        inputGradients[outW, outH, outD, n] += gradient.TryGet(0, w + kernelW, h + kernelH, kernelN, n) * rotKernels[kernelW, kernelH, outD, kernelN];
                                    }
                                }
                            }
                        });
                    }
                }
            });
        }
Exemplo n.º 5
0
        public override void Conv2DKernelsGradient(Tensor input, Tensor gradient, int stride, Tensor.PaddingType padding, Tensor kernelsGradient)
        {
            int outputWidth = 0, outputHeight = 0, paddingX = 0, paddingY = 0;

            Tensor.GetPaddingParams(padding, input.Width, input.Height, kernelsGradient.Width, kernelsGradient.Height, stride, out outputHeight, out outputWidth, out paddingX, out paddingY);

            gradient.CopyToDevice();
            input.CopyToDevice();
            kernelsGradient.CopyToDevice();

            using (var convolutionDesc = new ConvolutionDescriptor())
                using (var gradientDesc = new TensorDescriptor())
                    using (var inputDesc = new TensorDescriptor())
                        using (var kernelsGradientsDesc = new FilterDescriptor())
                        {
                            convolutionDesc.SetConvolution2dDescriptor(paddingY, paddingX, stride, stride, 1, 1, cudnnConvolutionMode.CrossCorrelation, cudnnDataType.Float);
                            gradientDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Float, gradient.Shape.Dimensions[3], gradient.Shape.Dimensions[2], gradient.Shape.Dimensions[1], gradient.Shape.Dimensions[0]);
                            inputDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Float, input.Shape.Dimensions[3], input.Shape.Dimensions[2], input.Shape.Dimensions[1], input.Shape.Dimensions[0]);
                            kernelsGradientsDesc.SetFilter4dDescriptor(cudnnDataType.Float, cudnnTensorFormat.NCHW, kernelsGradient.Shape.Dimensions[3], kernelsGradient.Shape.Dimensions[2], kernelsGradient.Shape.Dimensions[1], kernelsGradient.Shape.Dimensions[0]);

                            var algo          = _CudnnContext.GetConvolutionBackwardFilterAlgorithm(inputDesc, gradientDesc, convolutionDesc, kernelsGradientsDesc, cudnnConvolutionBwdFilterPreference.PreferFastest, IntPtr.Zero);
                            var workspaceSize = _CudnnContext.GetConvolutionBackwardFilterWorkspaceSize(inputDesc, gradientDesc, convolutionDesc, kernelsGradientsDesc, algo);
                            workspaceSize = workspaceSize == 0 ? new SizeT(1) : workspaceSize;

                            if (kernelsGradient.GpuData.ConvBackKernelWorkspace == null || kernelsGradient.GpuData.ConvBackKernelWorkspace.Size != workspaceSize)
                            {
                                kernelsGradient.GpuData.ConvBackKernelWorkspace = new CudaDeviceVariable <byte>(workspaceSize);
                            }

                            _CudnnContext.ConvolutionBackwardFilter(1.0f, inputDesc, input.GpuData.DeviceVar, gradientDesc, gradient.GpuData.DeviceVar, convolutionDesc, algo, kernelsGradient.GpuData.ConvBackKernelWorkspace, 0.0f, kernelsGradientsDesc, kernelsGradient.GpuData.DeviceVar);
                        }
        }
Exemplo n.º 6
0
        public virtual void Conv2DKernelsGradient(Tensor input, Tensor gradient, int stride, Tensor.PaddingType padding, Tensor kernelsGradient)
        {
            input.CopyToHost();
            gradient.CopyToHost();
            kernelsGradient.CopyToHost();

            int outputWidth = 0, outputHeight = 0, paddingX = 0, paddingY = 0;

            Tensor.GetPaddingParams(padding, input.Width, input.Height, kernelsGradient.Width, kernelsGradient.Height, stride, out outputHeight, out outputWidth, out paddingX, out paddingY);

            for (int kernelD = 0; kernelD < kernelsGradient.Depth; ++kernelD)
            {
                for (int kernelH = 0; kernelH < kernelsGradient.Height; ++kernelH)
                {
                    for (int kernelW = 0; kernelW < kernelsGradient.Width; ++kernelW)
                    {
                        for (int kernelN = 0; kernelN < kernelsGradient.BatchSize; ++kernelN)
                        {
                            for (int outN = 0; outN < gradient.BatchSize; ++outN)
                            {
                                for (int h = -paddingY, outH = 0; outH < gradient.Height; h += stride, ++outH)
                                {
                                    for (int w = -paddingX, outW = 0; outW < gradient.Width; w += stride, ++outW)
                                    {
                                        float grad        = gradient[outW, outH, kernelN, outN];
                                        float kernGradVal = input.TryGet(0, w + kernelW, h + kernelH, kernelD, outN) * grad;
                                        kernelsGradient[kernelW, kernelH, kernelD, kernelN] += kernGradVal;

                                        //if (kernelsGradient.Shape.GetIndex(kernelW, kernelH, kernelD, kernelN) == 0)
                                        //{
                                        //    Trace.WriteLine($"cid={outN * output.Height * output.Width + outH * output.Width + outW} - {kernGradVal}");
                                        //}
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }