public override void Conv2D(Tensor t, Tensor kernels, int stride, Tensor.PaddingType padding, Tensor result) { int outputWidth = 0, outputHeight = 0, paddingX = 0, paddingY = 0; Tensor.GetPaddingParams(padding, t.Width, t.Height, kernels.Width, kernels.Height, stride, out outputHeight, out outputWidth, out paddingX, out paddingY); t.CopyToDevice(); kernels.CopyToDevice(); result.CopyToDevice(); using (var convolutionDesc = new ConvolutionDescriptor()) using (var tDesc = new TensorDescriptor()) using (var kernelsDesc = new FilterDescriptor()) using (var resultDesc = new TensorDescriptor()) { convolutionDesc.SetConvolution2dDescriptor(paddingY, paddingX, stride, stride, 1, 1, cudnnConvolutionMode.CrossCorrelation, cudnnDataType.Float); tDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Float, t.Shape.Dimensions[3], t.Shape.Dimensions[2], t.Shape.Dimensions[1], t.Shape.Dimensions[0]); kernelsDesc.SetFilter4dDescriptor(cudnnDataType.Float, cudnnTensorFormat.NCHW, kernels.Shape.Dimensions[3], kernels.Shape.Dimensions[2], kernels.Shape.Dimensions[1], kernels.Shape.Dimensions[0]); resultDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Float, result.Shape.Dimensions[3], result.Shape.Dimensions[2], result.Shape.Dimensions[1], result.Shape.Dimensions[0]); var algo = _CudnnContext.GetConvolutionForwardAlgorithm(tDesc, kernelsDesc, convolutionDesc, resultDesc, cudnnConvolutionFwdPreference.PreferFastest, IntPtr.Zero); var workspaceSize = _CudnnContext.GetConvolutionForwardWorkspaceSize(tDesc, kernelsDesc, convolutionDesc, resultDesc, algo); workspaceSize = workspaceSize == 0 ? new SizeT(1) : workspaceSize; if (result.GpuData.ConvWorkspace == null || result.GpuData.ConvWorkspace.Size != workspaceSize) { result.GpuData.ConvWorkspace = new CudaDeviceVariable <byte>(workspaceSize); } _CudnnContext.ConvolutionForward(1.0f, tDesc, t.GpuData.DeviceVar, kernelsDesc, kernels.GpuData.DeviceVar, convolutionDesc, algo, result.GpuData.ConvWorkspace, 0.0f, resultDesc, result.GpuData.DeviceVar); } }
public override void Conv2D(Tensor t, Tensor kernels, int stride, Tensor.PaddingType padding, Tensor result) { t.CopyToHost(); kernels.CopyToHost(); result.CurrentLocation = Tensor.Location.Host; int outputWidth = 0, outputHeight = 0, paddingX = 0, paddingY = 0; Tensor.GetPaddingParams(padding, t.Width, t.Height, kernels.Width, kernels.Height, stride, out outputHeight, out outputWidth, out paddingX, out paddingY); Parallel.For(0, t.BatchSize, n => { Parallel.For(0, kernels.BatchSize, outD => { for (int h = -paddingY, outH = 0; outH < result.Height; h += stride, ++outH) { for (int w = -paddingX, outW = 0; outW < result.Width; w += stride, ++outW) { float val = 0; for (int kernelD = 0; kernelD < kernels.Depth; ++kernelD) { for (int kernelH = 0; kernelH < kernels.Height; ++kernelH) { for (int kernelW = 0; kernelW < kernels.Width; ++kernelW) { val += t.TryGet(0, w + kernelW, h + kernelH, kernelD, n) * kernels[kernelW, kernelH, kernelD, outD]; } } } result[outW, outH, outD, n] = val; } } }); }); }
public override void Conv2DKernelsGradient(Tensor input, Tensor gradient, int stride, Tensor.PaddingType padding, Tensor kernelsGradient) { input.CopyToHost(); gradient.CopyToHost(); kernelsGradient.CopyToHost(); int outputWidth = 0, outputHeight = 0, paddingX = 0, paddingY = 0; Tensor.GetPaddingParams(padding, input.Width, input.Height, kernelsGradient.Width, kernelsGradient.Height, stride, out outputHeight, out outputWidth, out paddingX, out paddingY); for (int n = 0; n < gradient.BatchSize; ++n) { Parallel.For(0, kernelsGradient.BatchSize, outD => { for (int h = -paddingY, outH = 0; outH < gradient.Height; h += stride, ++outH) { for (int w = -paddingX, outW = 0; outW < gradient.Width; w += stride, ++outW) { float grad = gradient[outW, outH, outD, n]; for (int kernelD = 0; kernelD < kernelsGradient.Depth; ++kernelD) { for (int kernelH = 0; kernelH < kernelsGradient.Height; ++kernelH) { for (int kernelW = 0; kernelW < kernelsGradient.Width; ++kernelW) { float kernGradVal = input.TryGet(0, w + kernelW, h + kernelH, kernelD, n) * grad; kernelsGradient[kernelW, kernelH, kernelD, outD] += kernGradVal; } } } } } }); } }
public override void Conv2DInputGradient(Tensor gradient, Tensor kernels, int stride, Tensor.PaddingType padding, Tensor inputGradients) { gradient.CopyToHost(); kernels.CopyToHost(); inputGradients.CopyToHost(); Tensor rotKernels = kernels.Rotated180(); padding = Tensor.PaddingType.Full; int outputWidth = 0, outputHeight = 0, paddingX = 0, paddingY = 0; Tensor.GetPaddingParams(padding, gradient.Width, gradient.Height, kernels.Width, kernels.Height, stride, out outputHeight, out outputWidth, out paddingX, out paddingY); Parallel.For(0, gradient.BatchSize, n => { for (int outH = 0, h = -paddingY; outH < inputGradients.Height; h += stride, ++outH) { for (int outW = 0, w = -paddingX; outW < inputGradients.Width; w += stride, ++outW) { Parallel.For(0, inputGradients.Depth, outD => { for (int kernelN = 0; kernelN < rotKernels.BatchSize; ++kernelN) { for (int kernelH = 0; kernelH < rotKernels.Height; ++kernelH) { for (int kernelW = 0; kernelW < rotKernels.Width; ++kernelW) { inputGradients[outW, outH, outD, n] += gradient.TryGet(0, w + kernelW, h + kernelH, kernelN, n) * rotKernels[kernelW, kernelH, outD, kernelN]; } } } }); } } }); }
public override void Conv2DKernelsGradient(Tensor input, Tensor gradient, int stride, Tensor.PaddingType padding, Tensor kernelsGradient) { int outputWidth = 0, outputHeight = 0, paddingX = 0, paddingY = 0; Tensor.GetPaddingParams(padding, input.Width, input.Height, kernelsGradient.Width, kernelsGradient.Height, stride, out outputHeight, out outputWidth, out paddingX, out paddingY); gradient.CopyToDevice(); input.CopyToDevice(); kernelsGradient.CopyToDevice(); using (var convolutionDesc = new ConvolutionDescriptor()) using (var gradientDesc = new TensorDescriptor()) using (var inputDesc = new TensorDescriptor()) using (var kernelsGradientsDesc = new FilterDescriptor()) { convolutionDesc.SetConvolution2dDescriptor(paddingY, paddingX, stride, stride, 1, 1, cudnnConvolutionMode.CrossCorrelation, cudnnDataType.Float); gradientDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Float, gradient.Shape.Dimensions[3], gradient.Shape.Dimensions[2], gradient.Shape.Dimensions[1], gradient.Shape.Dimensions[0]); inputDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Float, input.Shape.Dimensions[3], input.Shape.Dimensions[2], input.Shape.Dimensions[1], input.Shape.Dimensions[0]); kernelsGradientsDesc.SetFilter4dDescriptor(cudnnDataType.Float, cudnnTensorFormat.NCHW, kernelsGradient.Shape.Dimensions[3], kernelsGradient.Shape.Dimensions[2], kernelsGradient.Shape.Dimensions[1], kernelsGradient.Shape.Dimensions[0]); var algo = _CudnnContext.GetConvolutionBackwardFilterAlgorithm(inputDesc, gradientDesc, convolutionDesc, kernelsGradientsDesc, cudnnConvolutionBwdFilterPreference.PreferFastest, IntPtr.Zero); var workspaceSize = _CudnnContext.GetConvolutionBackwardFilterWorkspaceSize(inputDesc, gradientDesc, convolutionDesc, kernelsGradientsDesc, algo); workspaceSize = workspaceSize == 0 ? new SizeT(1) : workspaceSize; if (kernelsGradient.GpuData.ConvBackKernelWorkspace == null || kernelsGradient.GpuData.ConvBackKernelWorkspace.Size != workspaceSize) { kernelsGradient.GpuData.ConvBackKernelWorkspace = new CudaDeviceVariable <byte>(workspaceSize); } _CudnnContext.ConvolutionBackwardFilter(1.0f, inputDesc, input.GpuData.DeviceVar, gradientDesc, gradient.GpuData.DeviceVar, convolutionDesc, algo, kernelsGradient.GpuData.ConvBackKernelWorkspace, 0.0f, kernelsGradientsDesc, kernelsGradient.GpuData.DeviceVar); } }
public virtual void Conv2DKernelsGradient(Tensor input, Tensor gradient, int stride, Tensor.PaddingType padding, Tensor kernelsGradient) { input.CopyToHost(); gradient.CopyToHost(); kernelsGradient.CopyToHost(); int outputWidth = 0, outputHeight = 0, paddingX = 0, paddingY = 0; Tensor.GetPaddingParams(padding, input.Width, input.Height, kernelsGradient.Width, kernelsGradient.Height, stride, out outputHeight, out outputWidth, out paddingX, out paddingY); for (int kernelD = 0; kernelD < kernelsGradient.Depth; ++kernelD) { for (int kernelH = 0; kernelH < kernelsGradient.Height; ++kernelH) { for (int kernelW = 0; kernelW < kernelsGradient.Width; ++kernelW) { for (int kernelN = 0; kernelN < kernelsGradient.BatchSize; ++kernelN) { for (int outN = 0; outN < gradient.BatchSize; ++outN) { for (int h = -paddingY, outH = 0; outH < gradient.Height; h += stride, ++outH) { for (int w = -paddingX, outW = 0; outW < gradient.Width; w += stride, ++outW) { float grad = gradient[outW, outH, kernelN, outN]; float kernGradVal = input.TryGet(0, w + kernelW, h + kernelH, kernelD, outN) * grad; kernelsGradient[kernelW, kernelH, kernelD, kernelN] += kernGradVal; //if (kernelsGradient.Shape.GetIndex(kernelW, kernelH, kernelD, kernelN) == 0) //{ // Trace.WriteLine($"cid={outN * output.Height * output.Width + outH * output.Width + outW} - {kernGradVal}"); //} } } } } } } } }