Ejemplo n.º 1
0
        public Conv2Cudnn(IAllocator allocator, SeedSource seedSource, DType elementType, int batchSize, int inputWidth, int inputHeight, int nInputPlane, int nOutputPlane, ConvolutionDesc2d cd)
            : base(allocator, seedSource, elementType, batchSize, inputWidth, inputHeight, nInputPlane, nOutputPlane, cd)
        {
            // Reshape weight and bias - CuDNN expects the dimensions to be structured slightly differently
            this.weight     = ViewReplace(this.weight, nOutputPlane, nInputPlane, cd.kH, cd.kW);
            this.bias       = ViewReplace(this.bias, 1, nOutputPlane, 1, 1);
            this.gradWeight = ViewReplace(this.gradWeight, this.weight.Shape);
            this.gradBias   = ViewReplace(this.gradBias, this.bias.Shape);


            var fwdWorkspace = DNN.GetConvolutionForwardWorkspaceSize(allocator, fwdAlgo, cd,
                                                                      new TensorShape(elementType, new long[] { batchSize, nInputPlane, inputHeight, inputWidth }),
                                                                      new TensorShape(weight),
                                                                      new TensorShape(activation));

            var bwdFilterWorkspace = DNN.GetConvolutionBackwardFilterWorkspaceSize(allocator, bwdFilterAlgo, cd,
                                                                                   new TensorShape(elementType, new long[] { batchSize, nInputPlane, inputHeight, inputWidth }),
                                                                                   new TensorShape(activation),
                                                                                   new TensorShape(weight));

            var bwdFilterInputWorkspace = DNN.GetConvolutionBackwardDataWorkspaceSize(allocator, bwdDataAlgo, cd,
                                                                                      new TensorShape(weight),
                                                                                      new TensorShape(activation),
                                                                                      new TensorShape(elementType, new long[] { batchSize, nInputPlane, inputHeight, inputWidth }));

            var workspaceSize = Math.Max(Math.Max(fwdWorkspace, bwdFilterWorkspace), bwdFilterInputWorkspace);

            this.workspace = (CudaStorage)allocator.Allocate(DType.UInt8, workspaceSize);
        }
Ejemplo n.º 2
0
        public void CopyGpu(Tensor result, Tensor src, long totalElements)
        {
            // We assume here that we are using the default stream for both devices.
            TSCudaContext context = CudaHelpers.TSContextForTensor(src);

            CudaStorage resultStorage = (CudaStorage)result.Storage;
            CudaContext resultContext = context.CudaContextForTensor(result);
            CUdeviceptr resultPtr     = resultStorage.DevicePtrAtElement(result.StorageOffset);

            CudaStorage srcStorage = (CudaStorage)src.Storage;
            CudaContext srcContext = context.CudaContextForTensor(src);
            CUdeviceptr srcPtr     = srcStorage.DevicePtrAtElement(src.StorageOffset);


            if (CudaHelpers.GetDeviceId(result) != CudaHelpers.GetDeviceId(src))
            {
                // Cross-device copy. Perform two-way barrier between both devices' default streams.
                resultContext.SetCurrent();
                CudaEvent dstReady = new CudaEvent(CUEventFlags.DisableTiming);
                dstReady.Record();

                srcContext.SetCurrent();
                CUResult res = DriverAPINativeMethods.Streams.cuStreamWaitEvent(CUstream.NullStream, dstReady.Event, 0);
                if (res != CUResult.Success)
                {
                    throw new CudaException(res);
                }

                dstReady.Dispose();
            }
            else
            {
                srcContext.SetCurrent();
            }

            bool canMemcpy = CanMemcpy(result, src, totalElements);

            if (canMemcpy)
            {
                CUResult res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyAsync(
                    resultPtr, srcPtr, totalElements * src.ElementType.Size(), CUstream.NullStream);
                if (res != CUResult.Success)
                {
                    throw new CudaException(res);
                }
            }
            else
            {
                if (result.ElementType != src.ElementType)
                {
                    CopyGpuConvertTypes(result, src, totalElements);
                }
                else if (context.CanAccessPeer(CudaHelpers.GetDeviceId(src), CudaHelpers.GetDeviceId(result)))
                {
                    CopyGpuDirect(result, src, srcContext);
                }
                else
                {
                    CopyGpuIndirect(result, src, totalElements);
                }
            }
        }