Пример #1
0
        /// <summary cref="MemoryBuffer{T, TIndex}.CopyFromView(AcceleratorStream, ArrayView{T}, Index)"/>
        protected internal unsafe override void CopyFromView(
            AcceleratorStream stream,
            ArrayView <T> source,
            Index targetOffset)
        {
            var binding = stream.BindScoped();

            var targetAddress = ComputeEffectiveAddress(targetOffset);

            switch (source.AcceleratorType)
            {
            case AcceleratorType.CPU:
                Unsafe.CopyBlock(
                    targetAddress,
                    source.LoadEffectiveAddress(),
                    (uint)source.LengthInBytes);
                break;

            case AcceleratorType.Cuda:
                CudaException.ThrowIfFailed(CudaAPI.Current.MemcpyDeviceToHost(
                                                new IntPtr(targetAddress),
                                                new IntPtr(source.LoadEffectiveAddress()),
                                                new IntPtr(source.LengthInBytes),
                                                stream));
                break;

            default:
                throw new NotSupportedException(RuntimeErrorMessages.NotSupportedTargetAccelerator);
            }

            binding.Recover();
        }
Пример #2
0
 static void WorkaroundKnownIssue(CudaAccelerator accelerator, CuFFTAPI api)
 {
     // The CUDA release notes for 11.2 to 11.3 (inclusive) contains a known issue:
     // - cuFFT planning and plan estimation functions may not restore correct
     //   context affecting CUDA driver API applications.
     //
     // This workaround restores the accelerator context so that deallocation of
     // the memory buffers can be performed on the correct context.
     //
     // Based on the versions of CuFFT released, apply workaround to CuFFT v10.4.x.
     //
     // Release 11.1.1   CuFFT v10.3.0.105
     // Release 11.2     CuFFT v10.4.0.72
     // Release 11.3     CuFFT v10.4.2.58
     // Release 11.4     CuFFT v10.5.0.43
     //
     CuFFTException.ThrowIfFailed(
         api.GetProperty(LibraryPropertyType.MAJOR_VERSION, out var major));
     CuFFTException.ThrowIfFailed(
         api.GetProperty(LibraryPropertyType.MINOR_VERSION, out var minor));
     if (major == 10 && minor == 4)
     {
         CudaException.ThrowIfFailed(
             CudaAPI.CurrentAPI.SetCurrentContext(accelerator.NativePtr));
     }
 }
Пример #3
0
        /// <summary cref="MemoryBuffer{T, TIndex}.CopyFromViewInternal(ArrayView{T, Index}, AcceleratorType, TIndex, AcceleratorStream)"/>
        protected internal override unsafe void CopyFromViewInternal(
            ArrayView <T, Index> source,
            AcceleratorType acceleratorType,
            TIndex targetOffset,
            AcceleratorStream stream)
        {
            switch (acceleratorType)
            {
            case AcceleratorType.CPU:
                Buffer.MemoryCopy(
                    source.Pointer.ToPointer(),
                    GetSubView(targetOffset).Pointer.ToPointer(),
                    source.LengthInBytes,
                    source.LengthInBytes);
                break;

            case AcceleratorType.Cuda:
                CudaException.ThrowIfFailed(CudaAPI.Current.MemcpyDeviceToHost(
                                                GetSubView(targetOffset).Pointer,
                                                source.Pointer,
                                                new IntPtr(source.LengthInBytes),
                                                stream));
                break;

            default:
                throw new NotSupportedException(RuntimeErrorMessages.NotSupportedTargetAccelerator);
            }
        }
Пример #4
0
        /// <summary cref="MemoryBuffer{T, TIndex}.CopyToView(
        /// AcceleratorStream, ArrayView{T}, LongIndex1)"/>
        protected internal unsafe override void CopyToView(
            AcceleratorStream stream,
            ArrayView <T> target,
            LongIndex1 sourceOffset)
        {
            var binding = stream.BindScoped();

            var sourceAddress = ComputeEffectiveAddress(sourceOffset);
            var targetAddress = target.LoadEffectiveAddress();

            switch (target.AcceleratorType)
            {
            case AcceleratorType.CPU:
                Buffer.MemoryCopy(
                    sourceAddress,
                    targetAddress,
                    target.LengthInBytes,
                    target.LengthInBytes);
                break;

            case AcceleratorType.Cuda:
                CudaException.ThrowIfFailed(CudaAPI.Current.MemcpyHostToDevice(
                                                new IntPtr(targetAddress),
                                                new IntPtr(sourceAddress),
                                                new IntPtr(target.LengthInBytes),
                                                stream));
                break;

            default:
                throw new NotSupportedException(
                          RuntimeErrorMessages.NotSupportedTargetAccelerator);
            }

            binding.Recover();
        }
Пример #5
0
        /// <summary cref="DirectXBuffer{T}.Dispose(bool)"/>
        protected override void Dispose(bool disposing)
        {
            base.Dispose(disposing);

            if (cudaGraphicsResource == IntPtr.Zero)
            {
                return;
            }

            CudaException.ThrowIfFailed(
                CudaNativeMethods.cuGraphicsUnregisterResource(
                    cudaGraphicsResource));
            cudaGraphicsResource = IntPtr.Zero;
        }
Пример #6
0
        /// <summary>
        /// Constructs a page lock scope for the accelerator.
        /// </summary>
        /// <param name="accelerator">The associated accelerator.</param>
        /// <param name="hostPtr">The host buffer pointer to page lock.</param>
        /// <param name="numElements">The number of elements in the buffer.</param>
        internal CudaPageLockScope(
            CudaAccelerator accelerator,
            IntPtr hostPtr,
            long numElements)
            : base(accelerator, numElements)
        {
            if (!accelerator.Device.SupportsMappingHostMemory)
            {
                throw new NotSupportedException(
                          RuntimeErrorMessages.NotSupportedPageLock);
            }
            HostPtr = hostPtr;

            bool supportsHostPointer = accelerator
                                       .Device
                                       .SupportsUsingHostPointerForRegisteredMemory;

            // Setup internal memory registration flags.
            var flags = MemHostRegisterFlags.CU_MEMHOSTREGISTER_PORTABLE;

            if (!supportsHostPointer)
            {
                flags |= MemHostRegisterFlags.CU_MEMHOSTREGISTER_DEVICEMAP;
            }

            // Perform the memory registration.
            CudaException.ThrowIfFailed(
                CurrentAPI.MemHostRegister(
                    hostPtr,
                    new IntPtr(LengthInBytes),
                    flags));

            // Check whether we have to determine the actual device pointer or are able
            // to reuse the host pointer for all operations.
            if (supportsHostPointer)
            {
                AddrOfLockedObject = hostPtr;
            }
            else
            {
                CudaException.ThrowIfFailed(
                    CurrentAPI.MemHostGetDevicePointer(
                        out IntPtr devicePtr,
                        hostPtr,
                        0));
                AddrOfLockedObject = devicePtr;
            }
        }
Пример #7
0
        /// <summary>
        /// Registers the resource with the given flags in the scope of the Cuda runtime system.
        /// </summary>
        /// <param name="resource">The resource to register.</param>
        /// <param name="viewFlags">The view flags to use.</param>
        /// <param name="cudaGraphicsResource">The resulting graphics resource.</param>
        internal static void RegisterResource(
            Resource resource,
            DirectXViewFlags viewFlags,
            out IntPtr cudaGraphicsResource)
        {
            CudaException.ThrowIfFailed(
                CudaNativeMethods.cuGraphicsD3D11RegisterResource(
                    out cudaGraphicsResource,
                    resource.NativePointer,
                    CudaGraphicsRegisterFlags.None));

            CudaException.ThrowIfFailed(
                CudaNativeMethods.cuGraphicsResourceSetMapFlags(
                    cudaGraphicsResource,
                    (CudaGraphicsMapFlags)viewFlags));
        }
        /// <summary cref="DirectXBuffer.OnMap(DeviceContext)"/>
        protected override unsafe IntPtr OnMap(DeviceContext context)
        {
            Debug.Assert(cudaArray == IntPtr.Zero);

            CudaException.ThrowIfFailed(
                CudaNativeMethods.cuGraphicsSubResourceGetMappedArray(
                    out cudaArray,
                    cudaGraphicsResource,
                    0,
                    0));

            Debug.Assert(cudaArray != IntPtr.Zero);

            if (buffer == null)
            {
                CudaException.ThrowIfFailed(
                    CudaNativeMethods.cuArrayGetDescriptor(out desc, cudaArray));

                pixelByteSize = CudaNativeMethods.GetByteSize(desc.arrayFormat) * desc.numChannels;
                buffer        = Accelerator.Allocate <byte>(
                    desc.width.ToInt32() * desc.height.ToInt32() * pixelByteSize);
            }

            Debug.Assert(pixelByteSize > 0);

            if (ViewFlags != DirectXViewFlags.WriteDiscard)
            {
                // Copy texture data to buffer
                var args = new CudaMemcpy2DArgs()
                {
                    dstDevice     = buffer.NativePtr,
                    dstMemoryType = CudaMemoryType.Device,

                    srcArray      = cudaArray,
                    srcMemoryType = CudaMemoryType.Array,

                    WidthInBytes = new IntPtr(desc.width.ToInt32() * pixelByteSize),
                    Height       = desc.height,
                };

                CudaException.ThrowIfFailed(
                    CudaNativeMethods.cuMemcpy2D(ref args));
            }

            return(buffer.NativePtr);
        }
Пример #9
0
        /// <summary cref="DirectXInteropAccelerator.UnmapBuffers(DeviceContext, DirectXBuffer[])"/>
        internal protected override unsafe void UnmapBuffers(
            DeviceContext context,
            DirectXBuffer[] buffers)
        {
            IntPtr *cudaResources = stackalloc IntPtr[buffers.Length];

            for (int i = 0, e = buffers.Length; i < e; ++i)
            {
                var cudaBuffer = buffers[i] as ICudaDirectXBuffer;
                Debug.Assert(cudaBuffer != null, "Invalid Cuda buffer");
                cudaResources[i] = cudaBuffer.CudaGraphicsResource;
            }
            CudaException.ThrowIfFailed(
                CudaNativeMethods.cuGraphicsUnmapResources(
                    buffers.Length,
                    cudaResources,
                    IntPtr.Zero));
        }
Пример #10
0
        /// <summary>
        /// Constructs a page lock scope for the accelerator.
        /// </summary>
        /// <param name="accelerator">The associated accelerator.</param>
        /// <param name="hostPtr">The host buffer pointer to page lock.</param>
        /// <param name="numElements">The number of elements in the buffer.</param>
        internal CudaPageLockScope(
            CudaAccelerator accelerator,
            IntPtr hostPtr,
            long numElements)
            : base(accelerator)
        {
            if (!accelerator.Device.SupportsMappingHostMemory)
            {
                throw new NotSupportedException(
                          RuntimeErrorMessages.NotSupportedPageLock);
            }
            HostPtr = hostPtr;
            Length  = numElements;

            var flags = MemHostRegisterFlags.CU_MEMHOSTREGISTER_PORTABLE;

            if (!accelerator.Device.SupportsUsingHostPointerForRegisteredMemory)
            {
                flags |= MemHostRegisterFlags.CU_MEMHOSTREGISTER_DEVICEMAP;
            }
            CudaException.ThrowIfFailed(
                CurrentAPI.MemHostRegister(
                    hostPtr,
                    new IntPtr(LengthInBytes),
                    flags));
            if (accelerator.Device.SupportsUsingHostPointerForRegisteredMemory)
            {
                AddrOfLockedObject = hostPtr;
            }
            else
            {
                CudaException.ThrowIfFailed(
                    CurrentAPI.MemHostGetDevicePointer(
                        out IntPtr devicePtr,
                        hostPtr,
                        0));
                AddrOfLockedObject = devicePtr;
            }
        }
        /// <summary cref="DirectXBuffer.OnUnmap(DeviceContext)"/>
        protected override unsafe void OnUnmap(DeviceContext context)
        {
            Debug.Assert(pixelByteSize > 0);
            if (ViewFlags != DirectXViewFlags.ReadOnly)
            {
                // Copy buffer data to texture
                var args = new CudaMemcpy2DArgs()
                {
                    srcDevice     = buffer.NativePtr,
                    srcMemoryType = CudaMemoryType.Device,

                    dstArray      = cudaArray,
                    dstMemoryType = CudaMemoryType.Array,

                    WidthInBytes = new IntPtr(desc.width.ToInt32() * pixelByteSize),
                    Height       = desc.height,
                };

                CudaException.ThrowIfFailed(
                    CudaNativeMethods.cuMemcpy2D(ref args));
            }
            cudaArray = IntPtr.Zero;
        }
Пример #12
0
 protected override void DisposeAcceleratorObject(bool disposing)
 {
     CudaException.ThrowIfFailed(
         CudaAPI.CurrentAPI.FreeHostMemory(NativePtr));
     NativePtr = IntPtr.Zero;
 }