protected override void DoBiasGradient(Volume <float> biasGradient) { var outputGradientStorage = this._volumeStorage; var biasGradientStorage = biasGradient.Storage as VolumeStorage; // Copy to device if not already done outputGradientStorage.CopyToDevice(); biasGradientStorage.CopyToDevice(); using (var dOutputDesc = new TensorDescriptor()) using (var dBiasDesc = new TensorDescriptor()) { dOutputDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Float, this.Shape.GetDimension(3), this.Shape.GetDimension(2), this.Shape.GetDimension(1), this.Shape.GetDimension(0)); dBiasDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Float, biasGradient.Shape.GetDimension(3), biasGradient.Shape.GetDimension(2), biasGradient.Shape.GetDimension(1), biasGradient.Shape.GetDimension(0)); // bias this._context.CudnnContext.ConvolutionBackwardBias(1.0f, dOutputDesc, outputGradientStorage.DeviceBuffer, 0.0f, dBiasDesc, biasGradientStorage.DeviceBuffer); } }
protected override void DoMultiply(Volume <double> result, double factor) { var resultStorage = result.Storage as VolumeStorage; if (resultStorage == null) { throw new ArgumentException($"{nameof(result)} storage should be VolumeStorage", nameof(result)); } // Copy to device if not already done this._volumeStorage.CopyToDevice(); resultStorage.CopyToDevice(); // result = this DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpy(resultStorage.DeviceBuffer.DevicePointer, this._volumeStorage.DeviceBuffer.DevicePointer, this.Shape.TotalLength * sizeof(double)); resultStorage.CopiedToDevice = true; // Synchro this._context.DefaultStream.Synchronize(); // Add tensors using (var srcDesc = new TensorDescriptor()) { var n = result.Shape.GetDimension(3); var c = result.Shape.GetDimension(2); var h = result.Shape.GetDimension(1); var w = result.Shape.GetDimension(0); srcDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Double, n, c, h, w); this._context.CudnnContext.ScaleTensor(srcDesc, resultStorage.DeviceBuffer, factor); } }
public override void DoSoftmax(Volume <double> output) { var inputStorage = this._volumeStorage; var outputStorage = output.Storage as VolumeStorage; // Copy to device if not already done inputStorage.CopyToDevice(); outputStorage.CopyToDevice(); using (var srcDesc = new TensorDescriptor()) using (var destDesc = new TensorDescriptor()) { var n = this.Shape.GetDimension(3); var c = this.Shape.GetDimension(2); var h = this.Shape.GetDimension(1); var w = this.Shape.GetDimension(0); srcDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Double, n, c, h, w); destDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Double, n, c, h, w); this._context.CudnnContext.SoftmaxForward(cudnnSoftmaxAlgorithm.Accurate, cudnnSoftmaxMode.Channel, 1.0, srcDesc, inputStorage.DeviceBuffer, 0.0, destDesc, outputStorage.DeviceBuffer); } }
public override void Conv2D(Tensor t, Tensor kernels, int stride, Tensor.PaddingType padding, Tensor result) { int outputWidth = 0, outputHeight = 0, paddingX = 0, paddingY = 0; Tensor.GetPaddingParams(padding, t.Width, t.Height, kernels.Width, kernels.Height, stride, out outputHeight, out outputWidth, out paddingX, out paddingY); t.CopyToDevice(); kernels.CopyToDevice(); result.CopyToDevice(); using (var convolutionDesc = new ConvolutionDescriptor()) using (var tDesc = new TensorDescriptor()) using (var kernelsDesc = new FilterDescriptor()) using (var resultDesc = new TensorDescriptor()) { convolutionDesc.SetConvolution2dDescriptor(paddingY, paddingX, stride, stride, 1, 1, cudnnConvolutionMode.CrossCorrelation, cudnnDataType.Float); tDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Float, t.Shape.Dimensions[3], t.Shape.Dimensions[2], t.Shape.Dimensions[1], t.Shape.Dimensions[0]); kernelsDesc.SetFilter4dDescriptor(cudnnDataType.Float, cudnnTensorFormat.NCHW, kernels.Shape.Dimensions[3], kernels.Shape.Dimensions[2], kernels.Shape.Dimensions[1], kernels.Shape.Dimensions[0]); resultDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Float, result.Shape.Dimensions[3], result.Shape.Dimensions[2], result.Shape.Dimensions[1], result.Shape.Dimensions[0]); var algo = _CudnnContext.GetConvolutionForwardAlgorithm(tDesc, kernelsDesc, convolutionDesc, resultDesc, cudnnConvolutionFwdPreference.PreferFastest, IntPtr.Zero); var workspaceSize = _CudnnContext.GetConvolutionForwardWorkspaceSize(tDesc, kernelsDesc, convolutionDesc, resultDesc, algo); workspaceSize = workspaceSize == 0 ? new SizeT(1) : workspaceSize; if (result.GpuData.ConvWorkspace == null || result.GpuData.ConvWorkspace.Size != workspaceSize) { result.GpuData.ConvWorkspace = new CudaDeviceVariable <byte>(workspaceSize); } _CudnnContext.ConvolutionForward(1.0f, tDesc, t.GpuData.DeviceVar, kernelsDesc, kernels.GpuData.DeviceVar, convolutionDesc, algo, result.GpuData.ConvWorkspace, 0.0f, resultDesc, result.GpuData.DeviceVar); } }
public override void Conv2DKernelsGradient(Tensor input, Tensor gradient, int stride, Tensor.PaddingType padding, Tensor kernelsGradient) { int outputWidth = 0, outputHeight = 0, paddingX = 0, paddingY = 0; Tensor.GetPaddingParams(padding, input.Width, input.Height, kernelsGradient.Width, kernelsGradient.Height, stride, out outputHeight, out outputWidth, out paddingX, out paddingY); gradient.CopyToDevice(); input.CopyToDevice(); kernelsGradient.CopyToDevice(); using (var convolutionDesc = new ConvolutionDescriptor()) using (var gradientDesc = new TensorDescriptor()) using (var inputDesc = new TensorDescriptor()) using (var kernelsGradientsDesc = new FilterDescriptor()) { convolutionDesc.SetConvolution2dDescriptor(paddingY, paddingX, stride, stride, 1, 1, cudnnConvolutionMode.CrossCorrelation, cudnnDataType.Float); gradientDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Float, gradient.Shape.Dimensions[3], gradient.Shape.Dimensions[2], gradient.Shape.Dimensions[1], gradient.Shape.Dimensions[0]); inputDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Float, input.Shape.Dimensions[3], input.Shape.Dimensions[2], input.Shape.Dimensions[1], input.Shape.Dimensions[0]); kernelsGradientsDesc.SetFilter4dDescriptor(cudnnDataType.Float, cudnnTensorFormat.NCHW, kernelsGradient.Shape.Dimensions[3], kernelsGradient.Shape.Dimensions[2], kernelsGradient.Shape.Dimensions[1], kernelsGradient.Shape.Dimensions[0]); var algo = _CudnnContext.GetConvolutionBackwardFilterAlgorithm(inputDesc, gradientDesc, convolutionDesc, kernelsGradientsDesc, cudnnConvolutionBwdFilterPreference.PreferFastest, IntPtr.Zero); var workspaceSize = _CudnnContext.GetConvolutionBackwardFilterWorkspaceSize(inputDesc, gradientDesc, convolutionDesc, kernelsGradientsDesc, algo); workspaceSize = workspaceSize == 0 ? new SizeT(1) : workspaceSize; if (kernelsGradient.GpuData.ConvBackKernelWorkspace == null || kernelsGradient.GpuData.ConvBackKernelWorkspace.Size != workspaceSize) { kernelsGradient.GpuData.ConvBackKernelWorkspace = new CudaDeviceVariable <byte>(workspaceSize); } _CudnnContext.ConvolutionBackwardFilter(1.0f, inputDesc, input.GpuData.DeviceVar, gradientDesc, gradient.GpuData.DeviceVar, convolutionDesc, algo, kernelsGradient.GpuData.ConvBackKernelWorkspace, 0.0f, kernelsGradientsDesc, kernelsGradient.GpuData.DeviceVar); } }
public override void Multiply(double factor, Volume <double> result) { var resultStorage = result.Storage as VolumeStorage; if (resultStorage == null) { throw new ArgumentException($"{nameof(result)} storage should be VolumeStorage", nameof(result)); } // Copy to device if not already done this._volumeStorage.CopyToDevice(); resultStorage.CopyFrom(this._volumeStorage); // Add tensors using var resultDesc = new TensorDescriptor(); var n = result.Shape.Dimensions[3]; var c = result.Shape.Dimensions[2]; var h = result.Shape.Dimensions[1]; var w = result.Shape.Dimensions[0]; resultDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Double, n, c, h, w); this._context.CudnnContext.ScaleTensor(resultDesc, resultStorage.DeviceBuffer, factor); }
public override void SoftmaxGradient(Volume <double> outputGradient, Volume <double> inputGradient) { var inputGradientStorage = (VolumeStorage)inputGradient.Storage; var outputGradientStorage = (VolumeStorage)outputGradient.Storage; var outputStorage = this._volumeStorage; // Copy to device if not already done outputStorage.CopyToDevice(); outputGradientStorage.CopyToDevice(); inputGradientStorage.CopyToDevice(); // Synchro this._context.DefaultStream.Synchronize(); using var srcDesc = new TensorDescriptor(); using var srcDiffDesc = new TensorDescriptor(); using var destDiffDesc = new TensorDescriptor(); var n = this.Shape.Dimensions[3]; var c = this.Shape.Dimensions[2]; var h = this.Shape.Dimensions[1]; var w = this.Shape.Dimensions[0]; srcDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Double, n, c, h, w); srcDiffDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Double, n, c, h, w); destDiffDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Double, n, c, h, w); this._context.CudnnContext.SoftmaxBackward(cudnnSoftmaxAlgorithm.Accurate, cudnnSoftmaxMode.Channel, 1.0, srcDesc, outputStorage.DeviceBuffer, srcDiffDesc, outputGradientStorage.DeviceBuffer, 0.0, destDiffDesc, inputGradientStorage.DeviceBuffer); }
public override void BiasGradient(Volume <double> biasGradient) { var outputGradientStorage = this._volumeStorage; var biasGradientStorage = biasGradient.Storage as VolumeStorage; // Copy to device if not already done outputGradientStorage.CopyToDevice(); biasGradientStorage.CopyToDevice(); using var dOutputDesc = new TensorDescriptor(); using var dBiasDesc = new TensorDescriptor(); dOutputDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Double, this.Shape.Dimensions[3], this.Shape.Dimensions[2], this.Shape.Dimensions[1], this.Shape.Dimensions[0]); dBiasDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Double, biasGradient.Shape.Dimensions[3], biasGradient.Shape.Dimensions[2], biasGradient.Shape.Dimensions[1], biasGradient.Shape.Dimensions[0]); // bias this._context.CudnnContext.ConvolutionBackwardBias(1.0, dOutputDesc, outputGradientStorage.DeviceBuffer, 0.0, dBiasDesc, biasGradientStorage.DeviceBuffer); }
private void DoActivation(Volume <float> result, cudnnActivationMode mode) { var resultStorage = result.Storage as VolumeStorage; if (resultStorage == null) { throw new ArgumentException($"{nameof(result)} storage should be VolumeStorage", nameof(result)); } // Copy to device if not already done this._volumeStorage.CopyToDevice(); resultStorage.CopyToDevice(); // Synchro this._context.DefaultStream.Synchronize(); // Relu using (var activationDesc = new ActivationDescriptor()) using (var srcDesc = new TensorDescriptor()) using (var resultDesc = new TensorDescriptor()) { var n = result.Shape.GetDimension(3); var c = result.Shape.GetDimension(2); var h = result.Shape.GetDimension(1); var w = result.Shape.GetDimension(0); srcDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Float, n, c, h, w); resultDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Float, n, c, h, w); activationDesc.SetActivationDescriptor(mode, cudnnNanPropagation.NotPropagateNan, 0.0); this._context.CudnnContext.ActivationForward(activationDesc, 1.0f, srcDesc, this._volumeStorage.DeviceBuffer, 0.0f, resultDesc, resultStorage.DeviceBuffer); } }
public override void DoMultiply(Volume <float> result, float factor) { var resultStorage = result.Storage as VolumeStorage; if (resultStorage == null) { throw new ArgumentException($"{nameof(result)} storage should be VolumeStorage", nameof(result)); } // Copy to device if not already done this._volumeStorage.CopyToDevice(); resultStorage.CopyFrom(this._volumeStorage); // Add tensors using (var resultDesc = new TensorDescriptor()) { var n = result.Shape.GetDimension(3); var c = result.Shape.GetDimension(2); var h = result.Shape.GetDimension(1); var w = result.Shape.GetDimension(0); resultDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Float, n, c, h, w); this._context.CudnnContext.ScaleTensor(resultDesc, resultStorage.DeviceBuffer, factor); } }
public override void DoSoftmaxGradient(Volume <float> outputGradient, Volume <float> inputGradient) { var inputGradientStorage = inputGradient.Storage as VolumeStorage; var outputGradientStorage = outputGradient.Storage as VolumeStorage; var outputStorage = this._volumeStorage; // Copy to device if not already done outputStorage.CopyToDevice(); outputGradientStorage.CopyToDevice(); inputGradientStorage.CopyToDevice(); // Synchro this._context.DefaultStream.Synchronize(); using (var srcDesc = new TensorDescriptor()) using (var srcDiffDesc = new TensorDescriptor()) using (var destDiffDesc = new TensorDescriptor()) { var n = this.Shape.GetDimension(3); var c = this.Shape.GetDimension(2); var h = this.Shape.GetDimension(1); var w = this.Shape.GetDimension(0); srcDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Float, n, c, h, w); srcDiffDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Float, n, c, h, w); destDiffDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Float, n, c, h, w); this._context.CudnnContext.SoftmaxBackward(cudnnSoftmaxAlgorithm.Accurate, cudnnSoftmaxMode.Channel, 1.0f, srcDesc, outputStorage.DeviceBuffer, srcDiffDesc, outputGradientStorage.DeviceBuffer, 0.0f, destDiffDesc, inputGradientStorage.DeviceBuffer); } }
public override void Add(Volume <double> result) { var inputStorage = this.Storage as VolumeStorage; var resultStorage = result.Storage as VolumeStorage; if (inputStorage == null) { throw new ArgumentException($"{nameof(result)} storage should be VolumeStorage", nameof(result)); } if (resultStorage == null) { throw new ArgumentException($"{nameof(result)} storage should be VolumeStorage", nameof(result)); } var resultDim3 = result.Shape.Dimensions[3]; var resultDim2 = result.Shape.Dimensions[2]; var resultDim1 = result.Shape.Dimensions[1]; var resultDim0 = result.Shape.Dimensions[0]; var dim3 = this.Shape.Dimensions[3]; var dim2 = this.Shape.Dimensions[2]; var dim1 = this.Shape.Dimensions[1]; var dim0 = this.Shape.Dimensions[0]; if (dim0 == 1 && dim1 == 1 && dim2 == 1) { resultDim3 = (int)result.Shape.TotalLength; resultDim0 = 1; resultDim1 = 1; resultDim2 = 1; } // Copy to device if not already done inputStorage.CopyToDevice(); resultStorage.CopyToDevice(); // Add tensors using var otherDesc = new TensorDescriptor(); using var resultDesc = new TensorDescriptor(); resultDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Double, resultDim3, resultDim2, resultDim1, resultDim0); otherDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Double, dim3, dim2, dim1, dim0); this._context.CudnnContext.AddTensor( 1.0, otherDesc, inputStorage.DeviceBuffer, 1.0, resultDesc, resultStorage.DeviceBuffer); }
public static void Dump(this TensorDescriptor desc) { DataType dataType; int nbDims; var dimA = new int[3]; var strideA = new int[3]; desc.GetND(out dataType, out nbDims, dimA, strideA); Console.WriteLine($"[TensorDescriptor] {nbDims} ({dimA[0]},{dimA[1]},{dimA[2]}) ({strideA[0]},{strideA[1]},{strideA[2]})"); }
public void Fill(CudaDeviceVariable <float> x, float value, TensorDescriptor desc) { _kernelManager.LaunchKernel( "fill", desc.Size, 0, x.DevicePointer, value, desc); }
public void Sum(CudaDeviceVariable <float> a, CudaDeviceVariable <float> b, TensorDescriptor desc) { _kernelManager.LaunchKernel( "sum", desc.Size, 0, a.DevicePointer, b.DevicePointer, desc); }
/// <summary> /// This function is used to query the amount of reserve needed to run dropout with the input dimensions given by /// xDesc. /// The same reserve space is expected to be passed to cudnnDropoutForward and cudnnDropoutBackward, and its contents /// is /// expected to remain unchanged between cudnnDropoutForward and cudnnDropoutBackward calls. /// </summary> /// <param name="xDesc">Handle to a previously initialized tensor descriptor, describing input to a dropout operation.</param> public SizeT GetDropoutReserveSpaceSize(TensorDescriptor xDesc) { var sizeInBytes = new SizeT(); var res = CudaDNNNativeMethods.cudnnDropoutGetReserveSpaceSize(xDesc.Desc, ref sizeInBytes); if (res != cudnnStatus.Success) { throw new CudaDNNException(res); } return(sizeInBytes); }
/// <summary> /// Gets the descriptor. /// </summary> /// <param name="tensor">The tensor.</param> /// <returns>TensorDescriptor.</returns> private static TensorDescriptor GetDescriptor(NDArray tensor) { var result = new TensorDescriptor(); result.SetTensorNdDescriptor( GetDataType(tensor.ElementType), tensor.DimensionCount, tensor.Shape.Select(x => (int)x).ToArray(), tensor.Strides.Select(x => (int)x).ToArray()); return(result); }
private static TensorDescriptor GetDescriptor(TensorShape shape) { var result = new TensorDescriptor(); result.SetTensorNdDescriptor( GetDataType(shape.ElementType), shape.DimensionCount, shape.Sizes.Select(x => (int)x).ToArray(), shape.Strides.Select(x => (int)x).ToArray()); return(result); }
public override void DoPoolGradient(Volume <double> input, Volume <double> outputGradient, Volume <double> inputGradient, int windowWidth, int windowHeight, int horizontalPad, int verticalPad, int horizontalStride, int verticalStride) { var inputStorage = input.Storage as VolumeStorage; var inputGradientStorage = inputGradient.Storage as VolumeStorage; var outputStorage = this._volumeStorage; var outputGradientStorage = outputGradient.Storage as VolumeStorage; // Copy to device if not already done //outputStorage.CopyToDevice(); outputGradientStorage.CopyToDevice(); inputStorage.CopyToDevice(); inputGradientStorage.CopyToDevice(); // Synchro this._context.DefaultStream.Synchronize(); using (var poolingDesc = new PoolingDescriptor()) using (var srcDesc = new TensorDescriptor()) using (var srcDiffDesc = new TensorDescriptor()) using (var destDesc = new TensorDescriptor()) using (var destDiffDesc = new TensorDescriptor()) { var n = this.Shape.GetDimension(3); var c = this.Shape.GetDimension(2); var h = this.Shape.GetDimension(1); var w = this.Shape.GetDimension(0); srcDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Double, n, c, h, w); srcDiffDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Double, n, c, h, w); destDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Double, inputStorage.Shape.GetDimension(3), inputStorage.Shape.GetDimension(2), inputStorage.Shape.GetDimension(1), inputStorage.Shape.GetDimension(0)); destDiffDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Double, inputStorage.Shape.GetDimension(3), inputStorage.Shape.GetDimension(2), inputStorage.Shape.GetDimension(1), inputStorage.Shape.GetDimension(0)); poolingDesc.SetPooling2dDescriptor(cudnnPoolingMode.Max, cudnnNanPropagation.NotPropagateNan, windowHeight, windowWidth, verticalPad, horizontalPad, verticalStride, horizontalStride); this._context.CudnnContext.PoolingBackward(poolingDesc, 1.0, srcDesc, outputStorage.DeviceBuffer, srcDiffDesc, outputGradientStorage.DeviceBuffer, destDesc, inputStorage.DeviceBuffer, 0.0, destDiffDesc, inputGradientStorage.DeviceBuffer); } inputGradientStorage.CopiedToDevice = true; }
private void DoReduce(Volume <double> result, cudnnReduceTensorOp op) { if (this.Shape.Equals(result.Shape)) { result.Storage.CopyFrom(this.Storage); return; } var aStorage = this._volumeStorage; var cStorage = result.Storage as VolumeStorage; // Copy to device if not already done aStorage.CopyToDevice(); cStorage.CopyToDevice(); using (var reduceTensorDesc = new ReduceTensorDescriptor()) using (var aDesc = new TensorDescriptor()) using (var cDesc = new TensorDescriptor()) { var an = this.Shape.GetDimension(3); var ac = this.Shape.GetDimension(2); var ah = this.Shape.GetDimension(1); var aw = this.Shape.GetDimension(0); var cn = result.Shape.GetDimension(3); var cc = result.Shape.GetDimension(2); var ch = result.Shape.GetDimension(1); var cw = result.Shape.GetDimension(0); aDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Double, an, ac, ah, aw); cDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Double, cn, cc, ch, cw); reduceTensorDesc.SetReduceTensorDescriptor(op, cudnnDataType.Double, cudnnNanPropagation.NotPropagateNan, cudnnReduceTensorIndices.NoIndices, cudnnIndicesType.Indices32Bit); var workspaceSize = this._context.CudnnContext.GetReductionWorkspaceSize(reduceTensorDesc, aDesc, cDesc); workspaceSize = workspaceSize == 0 ? new SizeT(1) : workspaceSize; if (this._volumeStorage.ReductionStorage == null || this._volumeStorage.ReductionStorage.Size != workspaceSize) { this._volumeStorage.ReductionStorage = new CudaDeviceVariable <byte>(workspaceSize); } this._context.CudnnContext.ReduceTensor(reduceTensorDesc, CudaDeviceVariable <uint> .Null, this._volumeStorage.ReductionStorage, this._volumeStorage.ReductionStorage.SizeInBytes, 1.0, aDesc, aStorage.DeviceBuffer, 0.0, cDesc, cStorage.DeviceBuffer); } }
/// <summary> /// This function is used to query the amount of reserve needed to run dropout with the input dimensions given by /// xDesc. /// The same reserve space is expected to be passed to cudnnDropoutForward and cudnnDropoutBackward, and its contents /// is /// expected to remain unchanged between cudnnDropoutForward and cudnnDropoutBackward calls. /// </summary> /// <param name="xDesc">Handle to a previously initialized tensor descriptor, describing input to a dropout operation.</param> public SizeT GetDropoutReserveSpaceSize(TensorDescriptor xDesc) { var sizeInBytes = new SizeT(); var res = CudaDNNNativeMethods.cudnnDropoutGetReserveSpaceSize(xDesc.Desc, ref sizeInBytes); // Debug.WriteLine("{0:G}, {1}: {2}", DateTime.Now, "cudnnDropoutGetReserveSpaceSize", res); if (res != cudnnStatus.Success) { throw new CudaDNNException(res); } return(sizeInBytes); }
public override void DoAdd(Volume <double> other, Volume <double> result) { var otherStorage = other.Storage as VolumeStorage; var resultStorage = result.Storage as VolumeStorage; if (otherStorage == null) { throw new ArgumentException($"{nameof(other)} storage should be VolumeStorage", nameof(other)); } if (resultStorage == null) { throw new ArgumentException($"{nameof(result)} storage should be VolumeStorage", nameof(result)); } // Copy to device if not already done this._volumeStorage.CopyToDevice(); otherStorage.CopyToDevice(); resultStorage.CopyToDevice(); // result = this DriverAPINativeMethods.SynchronousMemcpy_v2.cuMemcpy(resultStorage.DeviceBuffer.DevicePointer, this._volumeStorage.DeviceBuffer.DevicePointer, this.Shape.TotalLength * sizeof(double)); resultStorage.CopiedToDevice = true; // Synchro this._context.DefaultStream.Synchronize(); // Add tensors using (var biasDesc = new TensorDescriptor()) using (var srcDesc = new TensorDescriptor()) { srcDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Double, this.Shape.GetDimension(3), this.Shape.GetDimension(2), this.Shape.GetDimension(1), this.Shape.GetDimension(0)); biasDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Double, other.Shape.GetDimension(3), other.Shape.GetDimension(2), other.Shape.GetDimension(1), other.Shape.GetDimension(0)); this._context.CudnnContext.AddTensor(1.0, biasDesc, otherStorage.DeviceBuffer, 1.0, srcDesc, resultStorage.DeviceBuffer); } }
/// <summary> /// This function performs forward dropout operation over x returning results in y. If dropout was /// used as a parameter to cudnnSetDropoutDescriptor, the approximately dropout fraction of x values /// will be replaces by 0, and the rest will be scaled by 1/(1-dropout) This function should not be /// running concurrently with another cudnnDropoutForward function using the same states. /// </summary> /// <param name="dropoutDesc">Handle to a previously created dropout descriptor object.</param> /// <param name="xDesc">Handle to the previously initialized input tensor descriptor.</param> /// <param name="x">Data pointer to GPU memory associated with the tensor descriptor srcDesc.</param> /// <param name="yDesc">Handle to the previously initialized output tensor descriptor.</param> /// <param name="y">Data pointer to GPU memory associated with the output tensor descriptor destDesc.</param> /// <param name="reserveSpace"> /// Data pointer to GPU memory used by this function. It is expected that contents of /// reserveSpace doe not change between cudnnDropoutForward and cudnnDropoutBackward calls. /// </param> public void DropoutForward(DropoutDescriptor dropoutDesc, TensorDescriptor xDesc, CudaDeviceVariable <float> x, TensorDescriptor yDesc, CudaDeviceVariable <float> y, CudaDeviceVariable <byte> reserveSpace) { var res = CudaDNNNativeMethods.cudnnDropoutForward(this.Handle, dropoutDesc.Desc, xDesc.Desc, x.DevicePointer, yDesc.Desc, y.DevicePointer, reserveSpace.DevicePointer, reserveSpace.SizeInBytes); if (res != cudnnStatus.Success) { throw new CudaDNNException(res); } }
public override void Dropout(double dropProbability, Volume <double> result) { var resultStorage = result.Storage as VolumeStorage; if (resultStorage == null) { throw new ArgumentException($"{nameof(result)} storage should be VolumeStorage", nameof(result)); } // Copy to device if not already done this._volumeStorage.CopyToDevice(); resultStorage.CopyToDevice(); using var dropoutDesc = new DropoutDescriptor(this._context.CudnnContext); using var srcDesc = new TensorDescriptor(); using var resultDesc = new TensorDescriptor(); srcDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Double, this.Shape.Dimensions[3], this.Shape.Dimensions[2], this.Shape.Dimensions[1], this.Shape.Dimensions[0]); resultDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Double, result.Shape.Dimensions[3], result.Shape.Dimensions[2], result.Shape.Dimensions[1], result.Shape.Dimensions[0]); var stateSize = this._context.CudnnContext.GetDropoutStateSize(); if (resultStorage.DropoutStateStorage == null || resultStorage.DropoutStateStorage.Size != stateSize) { resultStorage.DropoutStateStorage = new CudaDeviceVariable <byte>(stateSize); } dropoutDesc.SetDropoutDescriptor((float)dropProbability, resultStorage.DropoutStateStorage, stateSize, 0); var reserveSpace = this._context.CudnnContext.GetDropoutReserveSpaceSize(srcDesc); reserveSpace = reserveSpace == 0 ? new SizeT(1) : reserveSpace; if (resultStorage.DropoutStorage == null || resultStorage.DropoutStorage.Size != reserveSpace) { resultStorage.DropoutStorage = new CudaDeviceVariable <byte>(reserveSpace); } this._context.CudnnContext.DropoutForward(dropoutDesc, srcDesc, this._volumeStorage.DeviceBuffer, resultDesc, resultStorage.DeviceBuffer, resultStorage.DropoutStorage); }
public Convolution2D(Variable <T> data, int kernelH, int kernelW, int numFilter) { Util.EnsureTrue(data.Shape.Rank == 4); Util.EnsureTrue(data.Shape[1] > 0); Util.EnsureTrue(data.Shape[2] > 0); Util.EnsureTrue(data.Shape[3] > 0); var numInputFilter = data.Shape[1]; var numOutputFilter = numFilter; var height = data.Shape[2]; var width = data.Shape[3]; // fixed padding and stride now ConvolutionDesc = new ConvolutionDescriptor(); ConvolutionDesc.Set2D(0, 0, 1, 1, 1, 1, ConvolutionMode.CROSS_CORRELATION); using (var dataDesc = new TensorDescriptor()) using (var weightDesc = new FilterDescriptor()) { var dataType = Dnn.DataTypeOf <T>(); var tempN = 100; // for temp mini batch size dataDesc.Set4D(dataType, TensorFormat.CUDNN_TENSOR_NCHW, tempN, (int)numInputFilter, (int)height, (int)width); weightDesc.Set4D(dataType, TensorFormat.CUDNN_TENSOR_NCHW, numOutputFilter, (int)numInputFilter, kernelH, kernelW); // get output dimension int n, c, h, w; ConvolutionDesc.Get2DForwardOutputDim(dataDesc, weightDesc, out n, out c, out h, out w); //Console.WriteLine($"{c},{h},{w}"); // Create variables var scale = Sqrt(3.0.AsScalar <T>() / ((double)(numInputFilter * kernelH * kernelW)).AsScalar <T>()); Data = data; Weight = Parameter(scale * (2.0.AsScalar <T>() * RandomUniform <T>(Shape.Create(numOutputFilter, numInputFilter, kernelH, kernelW), 0UL, 0UL) - 1.0.AsScalar <T>())); Bias = Parameter(Fill(Shape.Create(c), ScalarOps.Conv <T>(0.1))); Output = Variable <T>(PartialShape.Create(-1, c, h, w)); Workspace1 = AuxVariable <byte>(); Workspace2 = AuxVariable <byte>(); AddInput(Data); AddInput(Weight); AddInput(Bias); AddOutput(Output); AddAuxVar(Workspace1); AddAuxVar(Workspace2); } }
public override void Pool(Tensor t, int filterSize, int stride, Tensor.PoolType type, int paddingX, int paddingY, Tensor result) { t.CopyToDevice(); result.CopyToDevice(); using (var poolingDesc = new PoolingDescriptor()) using (var tDesc = new TensorDescriptor()) using (var resultDesc = new TensorDescriptor()) { poolingDesc.SetPooling2dDescriptor(TensorPoolTypeToCuDNNPoolType(type), cudnnNanPropagation.NotPropagateNan, filterSize, filterSize, paddingX, paddingY, stride, stride); tDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Float, t.Shape.Dimensions[3], t.Shape.Dimensions[2], t.Shape.Dimensions[1], t.Shape.Dimensions[0]); resultDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Float, result.Shape.Dimensions[3], result.Shape.Dimensions[2], result.Shape.Dimensions[1], result.Shape.Dimensions[0]); _CudnnContext.PoolingForward(poolingDesc, 1.0f, tDesc, t.GpuData.DeviceVar, 0.0f, resultDesc, result.GpuData.DeviceVar); } }
public override void Softmax(Tensor input, Tensor result) { input.CopyToDevice(); result.CopyToDevice(); using (var inputDesc = new TensorDescriptor()) using (var resultDesc = new TensorDescriptor()) { int n = input.BatchSize, c = input.Height, h = input.Depth, w = input.Width; // cuDNN expects values to be in Channel so we need to fake 'reshape' our tensor inputDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Float, n, c, h, w); resultDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Float, n, c, h, w); _CudnnContext.SoftmaxForward(cudnnSoftmaxAlgorithm.Accurate, cudnnSoftmaxMode.Channel, 1.0f, inputDesc, input.GpuData.DeviceVar, 0.0f, resultDesc, result.GpuData.DeviceVar); } }
/// <summary> /// This function performs forward dropout operation over x returning results in y. If dropout was /// used as a parameter to cudnnSetDropoutDescriptor, the approximately dropout fraction of x values /// will be replaces by 0, and the rest will be scaled by 1/(1-dropout) This function should not be /// running concurrently with another cudnnDropoutForward function using the same states. /// </summary> /// <param name="dropoutDesc">Handle to a previously created dropout descriptor object.</param> /// <param name="xDesc">Handle to the previously initialized input tensor descriptor.</param> /// <param name="x">Data pointer to GPU memory associated with the tensor descriptor srcDesc.</param> /// <param name="yDesc">Handle to the previously initialized output tensor descriptor.</param> /// <param name="y">Data pointer to GPU memory associated with the output tensor descriptor destDesc.</param> /// <param name="reserveSpace"> /// Data pointer to GPU memory used by this function. It is expected that contents of /// reserveSpace doe not change between cudnnDropoutForward and cudnnDropoutBackward calls. /// </param> public void DropoutForward(DropoutDescriptor dropoutDesc, TensorDescriptor xDesc, CudaDeviceVariable <double> x, TensorDescriptor yDesc, CudaDeviceVariable <double> y, CudaDeviceVariable <byte> reserveSpace) { var res = CudaDNNNativeMethods.cudnnDropoutForward(this.Handle, dropoutDesc.Desc, xDesc.Desc, x.DevicePointer, yDesc.Desc, y.DevicePointer, reserveSpace.DevicePointer, reserveSpace.SizeInBytes); //Debug.WriteLine("{0:G}, {1}: {2}", DateTime.Now, "cudnnDropoutForward", res); if (res != cudnnStatus.Success) { throw new CudaDNNException(res); } }
public override void DoAdd(Volume <float> other, Volume <float> result) { if (ReferenceEquals(other, result)) { throw new NotSupportedException("other and result should not be the same!"); } var otherStorage = other.Storage as VolumeStorage; var resultStorage = result.Storage as VolumeStorage; if (otherStorage == null) { throw new ArgumentException($"{nameof(other)} storage should be VolumeStorage", nameof(other)); } if (resultStorage == null) { throw new ArgumentException($"{nameof(result)} storage should be VolumeStorage", nameof(result)); } // Copy to device if not already done resultStorage.CopyFrom(this._volumeStorage); otherStorage.CopyToDevice(); // Add tensors using (var otherDesc = new TensorDescriptor()) using (var resultDesc = new TensorDescriptor()) { resultDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Float, result.Shape.GetDimension(3), result.Shape.GetDimension(2), result.Shape.GetDimension(1), result.Shape.GetDimension(0)); otherDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Float, other.Shape.GetDimension(3), other.Shape.GetDimension(2), other.Shape.GetDimension(1), other.Shape.GetDimension(0)); this._context.CudnnContext.AddTensor( 1.0f, otherDesc, otherStorage.DeviceBuffer, 1.0f, resultDesc, resultStorage.DeviceBuffer); } }
private void DoActivationGradient(Volume <double> input, Volume <double> outputGradient, Volume <double> inputGradient, cudnnActivationMode mode) { var inputStorage = input.Storage as VolumeStorage; var inputGradientStorage = inputGradient.Storage as VolumeStorage; var outputStorage = this._volumeStorage; var outputGradientStorage = outputGradient.Storage as VolumeStorage; // Copy to device if not already done outputStorage.CopyToDevice(); outputGradientStorage.CopyToDevice(); inputGradientStorage.CopyToDevice(); // Synchro this._context.DefaultStream.Synchronize(); using (var activationDesc = new ActivationDescriptor()) using (var srcDesc = new TensorDescriptor()) using (var srcDiffDesc = new TensorDescriptor()) using (var destDesc = new TensorDescriptor()) using (var destDiffDesc = new TensorDescriptor()) { var n = this.Shape.GetDimension(3); var c = this.Shape.GetDimension(2); var h = this.Shape.GetDimension(1); var w = this.Shape.GetDimension(0); srcDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Double, n, c, h, w); srcDiffDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Double, n, c, h, w); destDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Double, n, c, h, w); destDiffDesc.SetTensor4dDescriptor(cudnnTensorFormat.NCHW, cudnnDataType.Double, n, c, h, w); activationDesc.SetActivationDescriptor(mode, cudnnNanPropagation.NotPropagateNan, 0.0); this._context.CudnnContext.ActivationBackward(activationDesc, 1.0, srcDesc, outputStorage.DeviceBuffer, srcDiffDesc, outputGradientStorage.DeviceBuffer, destDesc, inputStorage.DeviceBuffer, 0.0, destDiffDesc, inputGradientStorage.DeviceBuffer); } inputGradientStorage.CopiedToDevice = true; }