Пример #1
0
        public void CopyGpu(Tensor result, Tensor src, long totalElements)
        {
            // We assume here that we are using the default stream for both devices.
            TSCudaContext context = CudaHelpers.TSContextForTensor(src);

            CudaStorage resultStorage = (CudaStorage)result.Storage;
            CudaContext resultContext = context.CudaContextForTensor(result);
            CUdeviceptr resultPtr     = resultStorage.DevicePtrAtElement(result.StorageOffset);

            CudaStorage srcStorage = (CudaStorage)src.Storage;
            CudaContext srcContext = context.CudaContextForTensor(src);
            CUdeviceptr srcPtr     = srcStorage.DevicePtrAtElement(src.StorageOffset);


            if (CudaHelpers.GetDeviceId(result) != CudaHelpers.GetDeviceId(src))
            {
                // Cross-device copy. Perform two-way barrier between both devices' default streams.
                resultContext.SetCurrent();
                CudaEvent dstReady = new CudaEvent(CUEventFlags.DisableTiming);
                dstReady.Record();

                srcContext.SetCurrent();
                CUResult res = DriverAPINativeMethods.Streams.cuStreamWaitEvent(CUstream.NullStream, dstReady.Event, 0);
                if (res != CUResult.Success)
                {
                    throw new CudaException(res);
                }

                dstReady.Dispose();
            }
            else
            {
                srcContext.SetCurrent();
            }

            bool canMemcpy = CanMemcpy(result, src, totalElements);

            if (canMemcpy)
            {
                CUResult res = DriverAPINativeMethods.AsynchronousMemcpy_v2.cuMemcpyAsync(
                    resultPtr, srcPtr, totalElements * src.ElementType.Size(), CUstream.NullStream);
                if (res != CUResult.Success)
                {
                    throw new CudaException(res);
                }
            }
            else
            {
                if (result.ElementType != src.ElementType)
                {
                    CopyGpuConvertTypes(result, src, totalElements);
                }
                else if (context.CanAccessPeer(CudaHelpers.GetDeviceId(src), CudaHelpers.GetDeviceId(result)))
                {
                    CopyGpuDirect(result, src, srcContext);
                }
                else
                {
                    CopyGpuIndirect(result, src, totalElements);
                }
            }
        }