/// <summary cref="MemoryBuffer{T, TIndex}.CopyFromView(AcceleratorStream, ArrayView{T}, Index)"/> protected internal unsafe override void CopyFromView( AcceleratorStream stream, ArrayView <T> source, Index targetOffset) { var binding = stream.BindScoped(); var targetAddress = ComputeEffectiveAddress(targetOffset); switch (source.AcceleratorType) { case AcceleratorType.CPU: Unsafe.CopyBlock( targetAddress, source.LoadEffectiveAddress(), (uint)source.LengthInBytes); break; case AcceleratorType.Cuda: CudaException.ThrowIfFailed(CudaAPI.Current.MemcpyDeviceToHost( new IntPtr(targetAddress), new IntPtr(source.LoadEffectiveAddress()), new IntPtr(source.LengthInBytes), stream)); break; default: throw new NotSupportedException(RuntimeErrorMessages.NotSupportedTargetAccelerator); } binding.Recover(); }
static void WorkaroundKnownIssue(CudaAccelerator accelerator, CuFFTAPI api) { // The CUDA release notes for 11.2 to 11.3 (inclusive) contains a known issue: // - cuFFT planning and plan estimation functions may not restore correct // context affecting CUDA driver API applications. // // This workaround restores the accelerator context so that deallocation of // the memory buffers can be performed on the correct context. // // Based on the versions of CuFFT released, apply workaround to CuFFT v10.4.x. // // Release 11.1.1 CuFFT v10.3.0.105 // Release 11.2 CuFFT v10.4.0.72 // Release 11.3 CuFFT v10.4.2.58 // Release 11.4 CuFFT v10.5.0.43 // CuFFTException.ThrowIfFailed( api.GetProperty(LibraryPropertyType.MAJOR_VERSION, out var major)); CuFFTException.ThrowIfFailed( api.GetProperty(LibraryPropertyType.MINOR_VERSION, out var minor)); if (major == 10 && minor == 4) { CudaException.ThrowIfFailed( CudaAPI.CurrentAPI.SetCurrentContext(accelerator.NativePtr)); } }
/// <summary cref="MemoryBuffer{T, TIndex}.CopyFromViewInternal(ArrayView{T, Index}, AcceleratorType, TIndex, AcceleratorStream)"/> protected internal override unsafe void CopyFromViewInternal( ArrayView <T, Index> source, AcceleratorType acceleratorType, TIndex targetOffset, AcceleratorStream stream) { switch (acceleratorType) { case AcceleratorType.CPU: Buffer.MemoryCopy( source.Pointer.ToPointer(), GetSubView(targetOffset).Pointer.ToPointer(), source.LengthInBytes, source.LengthInBytes); break; case AcceleratorType.Cuda: CudaException.ThrowIfFailed(CudaAPI.Current.MemcpyDeviceToHost( GetSubView(targetOffset).Pointer, source.Pointer, new IntPtr(source.LengthInBytes), stream)); break; default: throw new NotSupportedException(RuntimeErrorMessages.NotSupportedTargetAccelerator); } }
/// <summary cref="MemoryBuffer{T, TIndex}.CopyToView( /// AcceleratorStream, ArrayView{T}, LongIndex1)"/> protected internal unsafe override void CopyToView( AcceleratorStream stream, ArrayView <T> target, LongIndex1 sourceOffset) { var binding = stream.BindScoped(); var sourceAddress = ComputeEffectiveAddress(sourceOffset); var targetAddress = target.LoadEffectiveAddress(); switch (target.AcceleratorType) { case AcceleratorType.CPU: Buffer.MemoryCopy( sourceAddress, targetAddress, target.LengthInBytes, target.LengthInBytes); break; case AcceleratorType.Cuda: CudaException.ThrowIfFailed(CudaAPI.Current.MemcpyHostToDevice( new IntPtr(targetAddress), new IntPtr(sourceAddress), new IntPtr(target.LengthInBytes), stream)); break; default: throw new NotSupportedException( RuntimeErrorMessages.NotSupportedTargetAccelerator); } binding.Recover(); }
/// <summary cref="DirectXBuffer{T}.Dispose(bool)"/> protected override void Dispose(bool disposing) { base.Dispose(disposing); if (cudaGraphicsResource == IntPtr.Zero) { return; } CudaException.ThrowIfFailed( CudaNativeMethods.cuGraphicsUnregisterResource( cudaGraphicsResource)); cudaGraphicsResource = IntPtr.Zero; }
/// <summary> /// Constructs a page lock scope for the accelerator. /// </summary> /// <param name="accelerator">The associated accelerator.</param> /// <param name="hostPtr">The host buffer pointer to page lock.</param> /// <param name="numElements">The number of elements in the buffer.</param> internal CudaPageLockScope( CudaAccelerator accelerator, IntPtr hostPtr, long numElements) : base(accelerator, numElements) { if (!accelerator.Device.SupportsMappingHostMemory) { throw new NotSupportedException( RuntimeErrorMessages.NotSupportedPageLock); } HostPtr = hostPtr; bool supportsHostPointer = accelerator .Device .SupportsUsingHostPointerForRegisteredMemory; // Setup internal memory registration flags. var flags = MemHostRegisterFlags.CU_MEMHOSTREGISTER_PORTABLE; if (!supportsHostPointer) { flags |= MemHostRegisterFlags.CU_MEMHOSTREGISTER_DEVICEMAP; } // Perform the memory registration. CudaException.ThrowIfFailed( CurrentAPI.MemHostRegister( hostPtr, new IntPtr(LengthInBytes), flags)); // Check whether we have to determine the actual device pointer or are able // to reuse the host pointer for all operations. if (supportsHostPointer) { AddrOfLockedObject = hostPtr; } else { CudaException.ThrowIfFailed( CurrentAPI.MemHostGetDevicePointer( out IntPtr devicePtr, hostPtr, 0)); AddrOfLockedObject = devicePtr; } }
/// <summary> /// Registers the resource with the given flags in the scope of the Cuda runtime system. /// </summary> /// <param name="resource">The resource to register.</param> /// <param name="viewFlags">The view flags to use.</param> /// <param name="cudaGraphicsResource">The resulting graphics resource.</param> internal static void RegisterResource( Resource resource, DirectXViewFlags viewFlags, out IntPtr cudaGraphicsResource) { CudaException.ThrowIfFailed( CudaNativeMethods.cuGraphicsD3D11RegisterResource( out cudaGraphicsResource, resource.NativePointer, CudaGraphicsRegisterFlags.None)); CudaException.ThrowIfFailed( CudaNativeMethods.cuGraphicsResourceSetMapFlags( cudaGraphicsResource, (CudaGraphicsMapFlags)viewFlags)); }
/// <summary cref="DirectXBuffer.OnMap(DeviceContext)"/> protected override unsafe IntPtr OnMap(DeviceContext context) { Debug.Assert(cudaArray == IntPtr.Zero); CudaException.ThrowIfFailed( CudaNativeMethods.cuGraphicsSubResourceGetMappedArray( out cudaArray, cudaGraphicsResource, 0, 0)); Debug.Assert(cudaArray != IntPtr.Zero); if (buffer == null) { CudaException.ThrowIfFailed( CudaNativeMethods.cuArrayGetDescriptor(out desc, cudaArray)); pixelByteSize = CudaNativeMethods.GetByteSize(desc.arrayFormat) * desc.numChannels; buffer = Accelerator.Allocate <byte>( desc.width.ToInt32() * desc.height.ToInt32() * pixelByteSize); } Debug.Assert(pixelByteSize > 0); if (ViewFlags != DirectXViewFlags.WriteDiscard) { // Copy texture data to buffer var args = new CudaMemcpy2DArgs() { dstDevice = buffer.NativePtr, dstMemoryType = CudaMemoryType.Device, srcArray = cudaArray, srcMemoryType = CudaMemoryType.Array, WidthInBytes = new IntPtr(desc.width.ToInt32() * pixelByteSize), Height = desc.height, }; CudaException.ThrowIfFailed( CudaNativeMethods.cuMemcpy2D(ref args)); } return(buffer.NativePtr); }
/// <summary cref="DirectXInteropAccelerator.UnmapBuffers(DeviceContext, DirectXBuffer[])"/> internal protected override unsafe void UnmapBuffers( DeviceContext context, DirectXBuffer[] buffers) { IntPtr *cudaResources = stackalloc IntPtr[buffers.Length]; for (int i = 0, e = buffers.Length; i < e; ++i) { var cudaBuffer = buffers[i] as ICudaDirectXBuffer; Debug.Assert(cudaBuffer != null, "Invalid Cuda buffer"); cudaResources[i] = cudaBuffer.CudaGraphicsResource; } CudaException.ThrowIfFailed( CudaNativeMethods.cuGraphicsUnmapResources( buffers.Length, cudaResources, IntPtr.Zero)); }
/// <summary> /// Constructs a page lock scope for the accelerator. /// </summary> /// <param name="accelerator">The associated accelerator.</param> /// <param name="hostPtr">The host buffer pointer to page lock.</param> /// <param name="numElements">The number of elements in the buffer.</param> internal CudaPageLockScope( CudaAccelerator accelerator, IntPtr hostPtr, long numElements) : base(accelerator) { if (!accelerator.Device.SupportsMappingHostMemory) { throw new NotSupportedException( RuntimeErrorMessages.NotSupportedPageLock); } HostPtr = hostPtr; Length = numElements; var flags = MemHostRegisterFlags.CU_MEMHOSTREGISTER_PORTABLE; if (!accelerator.Device.SupportsUsingHostPointerForRegisteredMemory) { flags |= MemHostRegisterFlags.CU_MEMHOSTREGISTER_DEVICEMAP; } CudaException.ThrowIfFailed( CurrentAPI.MemHostRegister( hostPtr, new IntPtr(LengthInBytes), flags)); if (accelerator.Device.SupportsUsingHostPointerForRegisteredMemory) { AddrOfLockedObject = hostPtr; } else { CudaException.ThrowIfFailed( CurrentAPI.MemHostGetDevicePointer( out IntPtr devicePtr, hostPtr, 0)); AddrOfLockedObject = devicePtr; } }
/// <summary cref="DirectXBuffer.OnUnmap(DeviceContext)"/> protected override unsafe void OnUnmap(DeviceContext context) { Debug.Assert(pixelByteSize > 0); if (ViewFlags != DirectXViewFlags.ReadOnly) { // Copy buffer data to texture var args = new CudaMemcpy2DArgs() { srcDevice = buffer.NativePtr, srcMemoryType = CudaMemoryType.Device, dstArray = cudaArray, dstMemoryType = CudaMemoryType.Array, WidthInBytes = new IntPtr(desc.width.ToInt32() * pixelByteSize), Height = desc.height, }; CudaException.ThrowIfFailed( CudaNativeMethods.cuMemcpy2D(ref args)); } cudaArray = IntPtr.Zero; }
protected override void DisposeAcceleratorObject(bool disposing) { CudaException.ThrowIfFailed( CudaAPI.CurrentAPI.FreeHostMemory(NativePtr)); NativePtr = IntPtr.Zero; }