예제 #1
0
        public Conv2Layer(IAllocator allocator, SeedSource seedSource, DType elementType, int batchSize, int inputWidth, int inputHeight, int nInputPlane, int nOutputPlane, ConvolutionDesc2d cd)
        {
            this.cd = cd;

            this.weight = new NDArray(allocator, elementType, nOutputPlane, nInputPlane * cd.kW * cd.kH);
            this.bias   = new NDArray(allocator, elementType, nOutputPlane, 1);

            this.gradWeight = new NDArray(allocator, elementType, this.weight.Shape);
            this.gradBias   = new NDArray(allocator, elementType, this.bias.Shape);

            inputSizes     = new long[] { batchSize, nInputPlane, inputHeight, inputWidth };
            this.gradInput = new NDArray(allocator, elementType, inputSizes);

            outputSizes     = SpatialConvolutionMM.OutputSize(inputSizes, weight.Shape, cd);
            this.activation = new NDArray(allocator, elementType, outputSizes);



            this.OutputSizes = outputSizes;

            var stdv = 1.0f / (float)Math.Sqrt(cd.kW * cd.kH * nInputPlane);

            Ops.RandomUniform(weight, seedSource, -stdv, stdv);
            Ops.RandomUniform(bias, seedSource, -stdv, stdv);
        }
예제 #2
0
        public MaxPoolLayer(IAllocator allocator, DType elementType, int batchSize, long nInputPlane, long inputWidth, long inputHeight, ConvolutionDesc2d cd, bool ceilMode = true)
        {
            this.cd       = cd;
            this.ceilMode = ceilMode;

            var inputSizes  = new long[] { batchSize, nInputPlane, inputWidth, inputHeight };
            var outputSizes = CpuMaxPoolingOps.OutputSize(inputSizes, ceilMode, cd);

            this.OutputSizes = outputSizes;

            this.activation = new NDArray(allocator, elementType, outputSizes);
            this.indices    = new NDArray(allocator, elementType, outputSizes);
            this.gradInput  = new NDArray(allocator, elementType, inputSizes);
        }
예제 #3
0
        private static long[] AddCnnLayer(IAllocator allocator, SeedSource seedSource, DType elementType, Sequential model, long[] inputSizes, int nOutputPlane, bool useCudnn)
        {
            var conv = LayerBuilder.BuildConvLayer(allocator, seedSource, elementType, (int)inputSizes[0], (int)inputSizes[3], (int)inputSizes[2], (int)inputSizes[1], nOutputPlane,
                                                   new ConvolutionDesc2d(5, 5, 1, 1, 0, 0), useCudnn);

            model.Add(conv);

            var cdPool    = new ConvolutionDesc2d(2, 2, 1, 1, 0, 0);
            var poolLayer = LayerBuilder.BuildPoolLayer(allocator, elementType, conv.OutputSizes, cdPool, useCudnn);

            model.Add(poolLayer);

            model.Add(new ReLULayer(allocator, elementType, poolLayer.OutputSizes));

            return(poolLayer.OutputSizes);
        }
예제 #4
0
 public MaxPoolCuda(IAllocator allocator, DType elementType, int batchSize, long nInputPlane, long inputWidth, long inputHeight, ConvolutionDesc2d cd, bool ceilMode = true)
     : base(allocator, elementType, batchSize, nInputPlane, inputWidth, inputHeight, cd, ceilMode)
 {
 }
예제 #5
0
 public MaxPoolCudnn(IAllocator allocator, DType elementType, int batchSize, long nInputPlane, long inputWidth, long inputHeight, ConvolutionDesc2d cd, bool ceilMode = true)
     : base(allocator, elementType, batchSize, nInputPlane, inputWidth, inputHeight, cd, ceilMode)
 {
     this.poolingDesc = new DNNPoolingDesc(DNNPoolingMode.Max, cd.kH, cd.kW, cd.padH, cd.padW, cd.dH, cd.dW);
 }
예제 #6
0
        public void SpatialMaxPoolingBackward(Tensor input, Tensor gradOutput, Tensor gradInput, Tensor indices, ConvolutionDesc2d cd, bool ceilMode)
        {
            var context     = CudaHelpers.TSContextForTensor(gradOutput);
            var cudaContext = context.CudaContextForTensor(gradOutput);

            var dimw = 3;
            var dimh = 2;
            var dimc = 1;

            var nbatch  = input.Sizes[0];
            var nslices = input.Sizes[dimc];
            var iheight = input.Sizes[dimh];
            var iwidth  = input.Sizes[dimw];
            var owidth  = gradOutput.Sizes[dimw];
            var oheight = gradOutput.Sizes[dimh];

            using var gradOutputContig = Ops.AsContiguous(gradOutput);
            var gradOutputPtr = CudaHelpers.GetBufferStart(gradOutputContig);
            var indicesPtr    = CudaHelpers.GetBufferStart(indices);
            var gradInputPtr  = CudaHelpers.GetBufferStart(gradInput);

            var count = (int)input.ElementCount();

            this.Invoke(context, cudaContext, "MaxPoolBackward", new dim3(NNThreads.NumBlocks(count)), new dim3(NNThreads.NumThreads), 0, CUstream.NullStream,
                        count, gradOutputPtr, indicesPtr, nbatch, nslices, iheight, iwidth, oheight, owidth,
                        cd.kH, cd.kW, cd.dH, cd.dW, cd.padH, cd.padW, gradInputPtr);
        }
예제 #7
0
        public void SpatialMaxPoolingForward(Tensor input, Tensor output, Tensor indices, ConvolutionDesc2d cd, bool ceilMode)
        {
            var context     = CudaHelpers.TSContextForTensor(input);
            var cudaContext = context.CudaContextForTensor(input);

            var iwidth      = input.Sizes[3];
            var iheight     = input.Sizes[2];
            var nInputPlane = input.Sizes[1];
            var batchSize   = input.Sizes[0];

            long owidth;
            long oheight;

            if (ceilMode)
            {
                // ReSharper disable once ArrangeRedundantParentheses
                oheight = (long)(Math.Ceiling((float)(iheight - cd.kH + 2 * cd.padH) / cd.dH)) + 1;
                // ReSharper disable once ArrangeRedundantParentheses
                owidth = (long)(Math.Ceiling((float)(iwidth - cd.kW + 2 * cd.padW) / cd.dW)) + 1;
            }
            else
            {
                // ReSharper disable once ArrangeRedundantParentheses
                oheight = (long)(Math.Floor((float)(iheight - cd.kH + 2 * cd.padH) / cd.dH)) + 1;
                // ReSharper disable once ArrangeRedundantParentheses
                owidth = (long)(Math.Floor((float)(iwidth - cd.kW + 2 * cd.padW) / cd.dW)) + 1;
            }

            if (cd.padW != 0 || cd.padH != 0)
            {
                // ensure that the last pooling starts inside the image
                if ((oheight - 1) * cd.dH >= iheight + cd.padH)
                {
                    --oheight;
                }

                if ((owidth - 1) * cd.dW >= iwidth + cd.padW)
                {
                    --owidth;
                }
            }

            using var inputContig = Ops.AsContiguous(input);
            var inputPtr   = CudaHelpers.GetBufferStart(inputContig);
            var outputPtr  = CudaHelpers.GetBufferStart(output);
            var indicesPtr = CudaHelpers.GetBufferStart(indices);

            var count = (int)output.ElementCount();

            this.Invoke(context, cudaContext, "MaxPoolForward", new dim3(NNThreads.NumBlocks(count)), new dim3(NNThreads.NumThreads), 0, CUstream.NullStream,
                        count, inputPtr, batchSize, nInputPlane, iheight, iwidth, oheight, owidth,
                        cd.kH, cd.kW, cd.dH, cd.dW, cd.padH, cd.padW, outputPtr, indicesPtr);
        }
예제 #8
0
 public static MaxPoolLayer BuildPoolLayer(IAllocator allocator, DType elementType, long[] inputSizes, ConvolutionDesc2d cdPool, bool useCudnn = false)
 {
     if (allocator is CpuAllocator)
     {
         return(new MaxPoolCpu(allocator, elementType, (int)inputSizes[0], inputSizes[1], inputSizes[3], inputSizes[2], cdPool));
     }
     else if (allocator is CudaAllocator)
     {
         if (useCudnn)
         {
             return(new MaxPoolCudnn(allocator, elementType, (int)inputSizes[0], inputSizes[1], inputSizes[3], inputSizes[2], cdPool));
         }
         else
         {
             return(new MaxPoolCuda(allocator, elementType, (int)inputSizes[0], inputSizes[1], inputSizes[3], inputSizes[2], cdPool));
         }
     }
     else
     {
         throw new NotSupportedException("Allocator type " + allocator.GetType() + " not supported");
     }
 }
예제 #9
0
 public static Conv2Layer BuildConvLayer(IAllocator allocator, SeedSource seedSource, DType elementType, int batchSize, int inputWidth, int inputHeight, int nInputPlane, int nOutputPlane, ConvolutionDesc2d cd, bool useCudnn = false)
 {
     if (allocator is CpuAllocator)
     {
         return(new Conv2Cpu(allocator, seedSource, elementType, batchSize, inputWidth, inputHeight, nInputPlane, nOutputPlane, cd));
     }
     else if (allocator is CudaAllocator)
     {
         if (useCudnn)
         {
             return(new Conv2Cudnn(allocator, seedSource, elementType, batchSize, inputWidth, inputHeight, nInputPlane, nOutputPlane, cd));
         }
         else
         {
             return(new Conv2Cuda(allocator, seedSource, elementType, batchSize, inputWidth, inputHeight, nInputPlane, nOutputPlane, cd));
         }
     }
     else
     {
         throw new NotSupportedException("Allocator type " + allocator.GetType() + " not supported");
     }
 }
예제 #10
0
        public void Conv2BackwardFilter(Tensor input, Tensor gradOutput, Tensor gradWeight, Tensor gradBias, Tensor finput, Tensor fgradInput, ConvolutionDesc2d cd)
        {
            var nOutputPlane = gradWeight.Sizes[0];
            var batchSize    = input.Sizes[0];

            var nInputPlane = input.Sizes[1];
            var inputWidth  = input.Sizes[3];
            var inputHeight = input.Sizes[2];

            var outputWidth  = (inputWidth + 2 * cd.padW - cd.kW) / cd.dW + 1;
            var outputHeight = (inputHeight + 2 * cd.padH - cd.kH) / cd.dH + 1;

            for (long i = 0; i < batchSize; ++i)
            {
                using (var input_i = input.Select(0, i))
                    using (var gradOutput_i = gradOutput.Select(0, i))
                    {
                        im2colKernels.Im2Col(input_i, finput, (int)nInputPlane, (int)inputHeight, (int)inputWidth,
                                             cd.kH, cd.kW, cd.padH, cd.padW, cd.dH, cd.dW, 1, 1);

                        using (var gradOutput2d = gradOutput_i.View(gradOutput_i.Sizes[0], gradOutput_i.Sizes[1] * gradOutput_i.Sizes[2]))
                            using (var finputT = finput.Transpose())
                            {
                                Ops.Addmm(gradWeight, 1, gradWeight, 1, gradOutput2d, finputT);
                                Ops.Sum(gradBias, gradOutput2d, 1);
                            }
                    }
            }
        }
예제 #11
0
        public void Conv2BackwardInput(Tensor input, Tensor gradOutput, Tensor gradInput, Tensor weight, Tensor finput, Tensor fgradInput, ConvolutionDesc2d cd)
        {
            var nOutputPlane = weight.Sizes[0];
            var batchSize    = input.Sizes[0];

            var nInputPlane = input.Sizes[1];
            var inputWidth  = input.Sizes[3];
            var inputHeight = input.Sizes[2];

            var outputWidth  = (inputWidth + 2 * cd.padW - cd.kW) / cd.dW + 1;
            var outputHeight = (inputHeight + 2 * cd.padH - cd.kH) / cd.dH + 1;


            for (long i = 0; i < batchSize; ++i)
            {
                using (var gradInput_i = gradInput.Select(0, i))
                    using (var gradOutput_i = gradOutput.Select(0, i))
                        using (var gradOutput_i2d = gradOutput_i.View(nOutputPlane, outputHeight * outputWidth))
                            using (var weightT = weight.Transpose())
                            {
                                Ops.Addmm(fgradInput, 0, fgradInput, 1, weightT, gradOutput_i2d);

                                im2colKernels.Col2Im(fgradInput, gradInput_i, (int)nInputPlane, (int)inputHeight, (int)inputWidth,
                                                     cd.kH, cd.kW, cd.padH, cd.padW, cd.dH, cd.dW, 1, 1);
                            }
            }
        }
예제 #12
0
        public void Conv2Forward(Tensor input, Tensor output, Tensor weight, Tensor bias, Tensor finput, ConvolutionDesc2d cd)
        {
            var batchSize    = input.Sizes[0];
            var nInputPlane  = input.Sizes[1];
            var inputWidth   = input.Sizes[3];
            var inputHeight  = input.Sizes[2];
            var nOutputPlane = weight.Sizes[0];

            var outputWidth  = (inputWidth + 2 * cd.padW - cd.kW) / cd.dW + 1;
            var outputHeight = (inputHeight + 2 * cd.padH - cd.kH) / cd.dH + 1;


            for (long i = 0; i < batchSize; ++i)
            {
                using (var input_i = input.Select(0, i))
                    using (var output_i = output.Select(0, i))
                    {
                        using (var output2d = output_i.View(nOutputPlane, outputHeight * outputWidth))
                        {
                            if (bias != null)
                            {
                                using (var biasExp = bias.Expand(nOutputPlane, output2d.Sizes[1]))
                                {
                                    Ops.Copy(output2d, biasExp);
                                }
                            }
                            else
                            {
                                Ops.Fill(output_i, 0);
                            }

                            im2colKernels.Im2Col(input_i, finput, (int)nInputPlane, (int)inputHeight, (int)inputWidth,
                                                 cd.kH, cd.kW, cd.padH, cd.padW, cd.dH, cd.dW, 1, 1);

                            Ops.Addmm(output2d, 1, output2d, 1, weight, finput);
                        }
                    }
            }
        }
예제 #13
0
 public static long[] FInputSize(long[] inputSizes, long[] outputSizes, ConvolutionDesc2d cd)
 {
     return(new long[] { cd.kW *cd.kH *inputSizes[1], outputSizes[2] * outputSizes[3] });
 }
예제 #14
0
        public Conv2Cudnn(IAllocator allocator, SeedSource seedSource, DType elementType, int batchSize, int inputWidth, int inputHeight, int nInputPlane, int nOutputPlane, ConvolutionDesc2d cd)
            : base(allocator, seedSource, elementType, batchSize, inputWidth, inputHeight, nInputPlane, nOutputPlane, cd)
        {
            // Reshape weight and bias - CuDNN expects the dimensions to be structured slightly differently
            this.weight     = ViewReplace(this.weight, nOutputPlane, nInputPlane, cd.kH, cd.kW);
            this.bias       = ViewReplace(this.bias, 1, nOutputPlane, 1, 1);
            this.gradWeight = ViewReplace(this.gradWeight, this.weight.Shape);
            this.gradBias   = ViewReplace(this.gradBias, this.bias.Shape);


            var fwdWorkspace = DNN.GetConvolutionForwardWorkspaceSize(allocator, fwdAlgo, cd,
                                                                      new TensorShape(elementType, new long[] { batchSize, nInputPlane, inputHeight, inputWidth }),
                                                                      new TensorShape(weight),
                                                                      new TensorShape(activation));

            var bwdFilterWorkspace = DNN.GetConvolutionBackwardFilterWorkspaceSize(allocator, bwdFilterAlgo, cd,
                                                                                   new TensorShape(elementType, new long[] { batchSize, nInputPlane, inputHeight, inputWidth }),
                                                                                   new TensorShape(activation),
                                                                                   new TensorShape(weight));

            var bwdFilterInputWorkspace = DNN.GetConvolutionBackwardDataWorkspaceSize(allocator, bwdDataAlgo, cd,
                                                                                      new TensorShape(weight),
                                                                                      new TensorShape(activation),
                                                                                      new TensorShape(elementType, new long[] { batchSize, nInputPlane, inputHeight, inputWidth }));

            var workspaceSize = Math.Max(Math.Max(fwdWorkspace, bwdFilterWorkspace), bwdFilterInputWorkspace);

            this.workspace = (CudaStorage)allocator.Allocate(DType.UInt8, workspaceSize);
        }
예제 #15
0
        public Conv2Cuda(IAllocator allocator, SeedSource seedSource, DType elementType, int batchSize, int inputWidth, int inputHeight, int nInputPlane, int nOutputPlane, ConvolutionDesc2d cd)
            : base(allocator, seedSource, elementType, batchSize, inputWidth, inputHeight, nInputPlane, nOutputPlane, cd)
        {
            var finputSizes = TensorSharp.CUDA.SpatialConvolution.FInputSize(inputSizes, outputSizes, cd);

            this.finput     = new NDArray(allocator, elementType, finputSizes);
            this.fgradInput = new NDArray(allocator, elementType, finputSizes);
        }