public Conv2Layer(IAllocator allocator, SeedSource seedSource, DType elementType, int batchSize, int inputWidth, int inputHeight, int nInputPlane, int nOutputPlane, ConvolutionDesc2d cd) { this.cd = cd; this.weight = new NDArray(allocator, elementType, nOutputPlane, nInputPlane * cd.kW * cd.kH); this.bias = new NDArray(allocator, elementType, nOutputPlane, 1); this.gradWeight = new NDArray(allocator, elementType, this.weight.Shape); this.gradBias = new NDArray(allocator, elementType, this.bias.Shape); inputSizes = new long[] { batchSize, nInputPlane, inputHeight, inputWidth }; this.gradInput = new NDArray(allocator, elementType, inputSizes); outputSizes = SpatialConvolutionMM.OutputSize(inputSizes, weight.Shape, cd); this.activation = new NDArray(allocator, elementType, outputSizes); this.OutputSizes = outputSizes; var stdv = 1.0f / (float)Math.Sqrt(cd.kW * cd.kH * nInputPlane); Ops.RandomUniform(weight, seedSource, -stdv, stdv); Ops.RandomUniform(bias, seedSource, -stdv, stdv); }
public MaxPoolLayer(IAllocator allocator, DType elementType, int batchSize, long nInputPlane, long inputWidth, long inputHeight, ConvolutionDesc2d cd, bool ceilMode = true) { this.cd = cd; this.ceilMode = ceilMode; var inputSizes = new long[] { batchSize, nInputPlane, inputWidth, inputHeight }; var outputSizes = CpuMaxPoolingOps.OutputSize(inputSizes, ceilMode, cd); this.OutputSizes = outputSizes; this.activation = new NDArray(allocator, elementType, outputSizes); this.indices = new NDArray(allocator, elementType, outputSizes); this.gradInput = new NDArray(allocator, elementType, inputSizes); }
private static long[] AddCnnLayer(IAllocator allocator, SeedSource seedSource, DType elementType, Sequential model, long[] inputSizes, int nOutputPlane, bool useCudnn) { var conv = LayerBuilder.BuildConvLayer(allocator, seedSource, elementType, (int)inputSizes[0], (int)inputSizes[3], (int)inputSizes[2], (int)inputSizes[1], nOutputPlane, new ConvolutionDesc2d(5, 5, 1, 1, 0, 0), useCudnn); model.Add(conv); var cdPool = new ConvolutionDesc2d(2, 2, 1, 1, 0, 0); var poolLayer = LayerBuilder.BuildPoolLayer(allocator, elementType, conv.OutputSizes, cdPool, useCudnn); model.Add(poolLayer); model.Add(new ReLULayer(allocator, elementType, poolLayer.OutputSizes)); return(poolLayer.OutputSizes); }
public MaxPoolCuda(IAllocator allocator, DType elementType, int batchSize, long nInputPlane, long inputWidth, long inputHeight, ConvolutionDesc2d cd, bool ceilMode = true) : base(allocator, elementType, batchSize, nInputPlane, inputWidth, inputHeight, cd, ceilMode) { }
public MaxPoolCudnn(IAllocator allocator, DType elementType, int batchSize, long nInputPlane, long inputWidth, long inputHeight, ConvolutionDesc2d cd, bool ceilMode = true) : base(allocator, elementType, batchSize, nInputPlane, inputWidth, inputHeight, cd, ceilMode) { this.poolingDesc = new DNNPoolingDesc(DNNPoolingMode.Max, cd.kH, cd.kW, cd.padH, cd.padW, cd.dH, cd.dW); }
public void SpatialMaxPoolingBackward(Tensor input, Tensor gradOutput, Tensor gradInput, Tensor indices, ConvolutionDesc2d cd, bool ceilMode) { var context = CudaHelpers.TSContextForTensor(gradOutput); var cudaContext = context.CudaContextForTensor(gradOutput); var dimw = 3; var dimh = 2; var dimc = 1; var nbatch = input.Sizes[0]; var nslices = input.Sizes[dimc]; var iheight = input.Sizes[dimh]; var iwidth = input.Sizes[dimw]; var owidth = gradOutput.Sizes[dimw]; var oheight = gradOutput.Sizes[dimh]; using var gradOutputContig = Ops.AsContiguous(gradOutput); var gradOutputPtr = CudaHelpers.GetBufferStart(gradOutputContig); var indicesPtr = CudaHelpers.GetBufferStart(indices); var gradInputPtr = CudaHelpers.GetBufferStart(gradInput); var count = (int)input.ElementCount(); this.Invoke(context, cudaContext, "MaxPoolBackward", new dim3(NNThreads.NumBlocks(count)), new dim3(NNThreads.NumThreads), 0, CUstream.NullStream, count, gradOutputPtr, indicesPtr, nbatch, nslices, iheight, iwidth, oheight, owidth, cd.kH, cd.kW, cd.dH, cd.dW, cd.padH, cd.padW, gradInputPtr); }
public void SpatialMaxPoolingForward(Tensor input, Tensor output, Tensor indices, ConvolutionDesc2d cd, bool ceilMode) { var context = CudaHelpers.TSContextForTensor(input); var cudaContext = context.CudaContextForTensor(input); var iwidth = input.Sizes[3]; var iheight = input.Sizes[2]; var nInputPlane = input.Sizes[1]; var batchSize = input.Sizes[0]; long owidth; long oheight; if (ceilMode) { // ReSharper disable once ArrangeRedundantParentheses oheight = (long)(Math.Ceiling((float)(iheight - cd.kH + 2 * cd.padH) / cd.dH)) + 1; // ReSharper disable once ArrangeRedundantParentheses owidth = (long)(Math.Ceiling((float)(iwidth - cd.kW + 2 * cd.padW) / cd.dW)) + 1; } else { // ReSharper disable once ArrangeRedundantParentheses oheight = (long)(Math.Floor((float)(iheight - cd.kH + 2 * cd.padH) / cd.dH)) + 1; // ReSharper disable once ArrangeRedundantParentheses owidth = (long)(Math.Floor((float)(iwidth - cd.kW + 2 * cd.padW) / cd.dW)) + 1; } if (cd.padW != 0 || cd.padH != 0) { // ensure that the last pooling starts inside the image if ((oheight - 1) * cd.dH >= iheight + cd.padH) { --oheight; } if ((owidth - 1) * cd.dW >= iwidth + cd.padW) { --owidth; } } using var inputContig = Ops.AsContiguous(input); var inputPtr = CudaHelpers.GetBufferStart(inputContig); var outputPtr = CudaHelpers.GetBufferStart(output); var indicesPtr = CudaHelpers.GetBufferStart(indices); var count = (int)output.ElementCount(); this.Invoke(context, cudaContext, "MaxPoolForward", new dim3(NNThreads.NumBlocks(count)), new dim3(NNThreads.NumThreads), 0, CUstream.NullStream, count, inputPtr, batchSize, nInputPlane, iheight, iwidth, oheight, owidth, cd.kH, cd.kW, cd.dH, cd.dW, cd.padH, cd.padW, outputPtr, indicesPtr); }
public static MaxPoolLayer BuildPoolLayer(IAllocator allocator, DType elementType, long[] inputSizes, ConvolutionDesc2d cdPool, bool useCudnn = false) { if (allocator is CpuAllocator) { return(new MaxPoolCpu(allocator, elementType, (int)inputSizes[0], inputSizes[1], inputSizes[3], inputSizes[2], cdPool)); } else if (allocator is CudaAllocator) { if (useCudnn) { return(new MaxPoolCudnn(allocator, elementType, (int)inputSizes[0], inputSizes[1], inputSizes[3], inputSizes[2], cdPool)); } else { return(new MaxPoolCuda(allocator, elementType, (int)inputSizes[0], inputSizes[1], inputSizes[3], inputSizes[2], cdPool)); } } else { throw new NotSupportedException("Allocator type " + allocator.GetType() + " not supported"); } }
public static Conv2Layer BuildConvLayer(IAllocator allocator, SeedSource seedSource, DType elementType, int batchSize, int inputWidth, int inputHeight, int nInputPlane, int nOutputPlane, ConvolutionDesc2d cd, bool useCudnn = false) { if (allocator is CpuAllocator) { return(new Conv2Cpu(allocator, seedSource, elementType, batchSize, inputWidth, inputHeight, nInputPlane, nOutputPlane, cd)); } else if (allocator is CudaAllocator) { if (useCudnn) { return(new Conv2Cudnn(allocator, seedSource, elementType, batchSize, inputWidth, inputHeight, nInputPlane, nOutputPlane, cd)); } else { return(new Conv2Cuda(allocator, seedSource, elementType, batchSize, inputWidth, inputHeight, nInputPlane, nOutputPlane, cd)); } } else { throw new NotSupportedException("Allocator type " + allocator.GetType() + " not supported"); } }
public void Conv2BackwardFilter(Tensor input, Tensor gradOutput, Tensor gradWeight, Tensor gradBias, Tensor finput, Tensor fgradInput, ConvolutionDesc2d cd) { var nOutputPlane = gradWeight.Sizes[0]; var batchSize = input.Sizes[0]; var nInputPlane = input.Sizes[1]; var inputWidth = input.Sizes[3]; var inputHeight = input.Sizes[2]; var outputWidth = (inputWidth + 2 * cd.padW - cd.kW) / cd.dW + 1; var outputHeight = (inputHeight + 2 * cd.padH - cd.kH) / cd.dH + 1; for (long i = 0; i < batchSize; ++i) { using (var input_i = input.Select(0, i)) using (var gradOutput_i = gradOutput.Select(0, i)) { im2colKernels.Im2Col(input_i, finput, (int)nInputPlane, (int)inputHeight, (int)inputWidth, cd.kH, cd.kW, cd.padH, cd.padW, cd.dH, cd.dW, 1, 1); using (var gradOutput2d = gradOutput_i.View(gradOutput_i.Sizes[0], gradOutput_i.Sizes[1] * gradOutput_i.Sizes[2])) using (var finputT = finput.Transpose()) { Ops.Addmm(gradWeight, 1, gradWeight, 1, gradOutput2d, finputT); Ops.Sum(gradBias, gradOutput2d, 1); } } } }
public void Conv2BackwardInput(Tensor input, Tensor gradOutput, Tensor gradInput, Tensor weight, Tensor finput, Tensor fgradInput, ConvolutionDesc2d cd) { var nOutputPlane = weight.Sizes[0]; var batchSize = input.Sizes[0]; var nInputPlane = input.Sizes[1]; var inputWidth = input.Sizes[3]; var inputHeight = input.Sizes[2]; var outputWidth = (inputWidth + 2 * cd.padW - cd.kW) / cd.dW + 1; var outputHeight = (inputHeight + 2 * cd.padH - cd.kH) / cd.dH + 1; for (long i = 0; i < batchSize; ++i) { using (var gradInput_i = gradInput.Select(0, i)) using (var gradOutput_i = gradOutput.Select(0, i)) using (var gradOutput_i2d = gradOutput_i.View(nOutputPlane, outputHeight * outputWidth)) using (var weightT = weight.Transpose()) { Ops.Addmm(fgradInput, 0, fgradInput, 1, weightT, gradOutput_i2d); im2colKernels.Col2Im(fgradInput, gradInput_i, (int)nInputPlane, (int)inputHeight, (int)inputWidth, cd.kH, cd.kW, cd.padH, cd.padW, cd.dH, cd.dW, 1, 1); } } }
public void Conv2Forward(Tensor input, Tensor output, Tensor weight, Tensor bias, Tensor finput, ConvolutionDesc2d cd) { var batchSize = input.Sizes[0]; var nInputPlane = input.Sizes[1]; var inputWidth = input.Sizes[3]; var inputHeight = input.Sizes[2]; var nOutputPlane = weight.Sizes[0]; var outputWidth = (inputWidth + 2 * cd.padW - cd.kW) / cd.dW + 1; var outputHeight = (inputHeight + 2 * cd.padH - cd.kH) / cd.dH + 1; for (long i = 0; i < batchSize; ++i) { using (var input_i = input.Select(0, i)) using (var output_i = output.Select(0, i)) { using (var output2d = output_i.View(nOutputPlane, outputHeight * outputWidth)) { if (bias != null) { using (var biasExp = bias.Expand(nOutputPlane, output2d.Sizes[1])) { Ops.Copy(output2d, biasExp); } } else { Ops.Fill(output_i, 0); } im2colKernels.Im2Col(input_i, finput, (int)nInputPlane, (int)inputHeight, (int)inputWidth, cd.kH, cd.kW, cd.padH, cd.padW, cd.dH, cd.dW, 1, 1); Ops.Addmm(output2d, 1, output2d, 1, weight, finput); } } } }
public static long[] FInputSize(long[] inputSizes, long[] outputSizes, ConvolutionDesc2d cd) { return(new long[] { cd.kW *cd.kH *inputSizes[1], outputSizes[2] * outputSizes[3] }); }
public Conv2Cudnn(IAllocator allocator, SeedSource seedSource, DType elementType, int batchSize, int inputWidth, int inputHeight, int nInputPlane, int nOutputPlane, ConvolutionDesc2d cd) : base(allocator, seedSource, elementType, batchSize, inputWidth, inputHeight, nInputPlane, nOutputPlane, cd) { // Reshape weight and bias - CuDNN expects the dimensions to be structured slightly differently this.weight = ViewReplace(this.weight, nOutputPlane, nInputPlane, cd.kH, cd.kW); this.bias = ViewReplace(this.bias, 1, nOutputPlane, 1, 1); this.gradWeight = ViewReplace(this.gradWeight, this.weight.Shape); this.gradBias = ViewReplace(this.gradBias, this.bias.Shape); var fwdWorkspace = DNN.GetConvolutionForwardWorkspaceSize(allocator, fwdAlgo, cd, new TensorShape(elementType, new long[] { batchSize, nInputPlane, inputHeight, inputWidth }), new TensorShape(weight), new TensorShape(activation)); var bwdFilterWorkspace = DNN.GetConvolutionBackwardFilterWorkspaceSize(allocator, bwdFilterAlgo, cd, new TensorShape(elementType, new long[] { batchSize, nInputPlane, inputHeight, inputWidth }), new TensorShape(activation), new TensorShape(weight)); var bwdFilterInputWorkspace = DNN.GetConvolutionBackwardDataWorkspaceSize(allocator, bwdDataAlgo, cd, new TensorShape(weight), new TensorShape(activation), new TensorShape(elementType, new long[] { batchSize, nInputPlane, inputHeight, inputWidth })); var workspaceSize = Math.Max(Math.Max(fwdWorkspace, bwdFilterWorkspace), bwdFilterInputWorkspace); this.workspace = (CudaStorage)allocator.Allocate(DType.UInt8, workspaceSize); }
public Conv2Cuda(IAllocator allocator, SeedSource seedSource, DType elementType, int batchSize, int inputWidth, int inputHeight, int nInputPlane, int nOutputPlane, ConvolutionDesc2d cd) : base(allocator, seedSource, elementType, batchSize, inputWidth, inputHeight, nInputPlane, nOutputPlane, cd) { var finputSizes = TensorSharp.CUDA.SpatialConvolution.FInputSize(inputSizes, outputSizes, cd); this.finput = new NDArray(allocator, elementType, finputSizes); this.fgradInput = new NDArray(allocator, elementType, finputSizes); }