static void WorkaroundKnownIssue(CudaAccelerator accelerator, CuFFTAPI api) { // The CUDA release notes for 11.2 to 11.3 (inclusive) contains a known issue: // - cuFFT planning and plan estimation functions may not restore correct // context affecting CUDA driver API applications. // // This workaround restores the accelerator context so that deallocation of // the memory buffers can be performed on the correct context. // // Based on the versions of CuFFT released, apply workaround to CuFFT v10.4.x. // // Release 11.1.1 CuFFT v10.3.0.105 // Release 11.2 CuFFT v10.4.0.72 // Release 11.3 CuFFT v10.4.2.58 // Release 11.4 CuFFT v10.5.0.43 // CuFFTException.ThrowIfFailed( api.GetProperty(LibraryPropertyType.MAJOR_VERSION, out var major)); CuFFTException.ThrowIfFailed( api.GetProperty(LibraryPropertyType.MINOR_VERSION, out var minor)); if (major == 10 && minor == 4) { CudaException.ThrowIfFailed( CudaAPI.CurrentAPI.SetCurrentContext(accelerator.NativePtr)); } }
/// <summary cref="MemoryBuffer{T, TIndex}.CopyFromViewInternal(ArrayView{T, Index}, AcceleratorType, TIndex, AcceleratorStream)"/> protected internal override unsafe void CopyFromViewInternal( ArrayView <T, Index> source, AcceleratorType acceleratorType, TIndex targetOffset, AcceleratorStream stream) { switch (acceleratorType) { case AcceleratorType.CPU: Buffer.MemoryCopy( source.Pointer.ToPointer(), GetSubView(targetOffset).Pointer.ToPointer(), source.LengthInBytes, source.LengthInBytes); break; case AcceleratorType.Cuda: CudaException.ThrowIfFailed(CudaAPI.Current.MemcpyDeviceToHost( GetSubView(targetOffset).Pointer, source.Pointer, new IntPtr(source.LengthInBytes), stream)); break; default: throw new NotSupportedException(RuntimeErrorMessages.NotSupportedTargetAccelerator); } }
/// <summary cref="MemoryBuffer{T, TIndex}.CopyToView( /// AcceleratorStream, ArrayView{T}, LongIndex1)"/> protected internal unsafe override void CopyToView( AcceleratorStream stream, ArrayView <T> target, LongIndex1 sourceOffset) { var binding = stream.BindScoped(); var sourceAddress = ComputeEffectiveAddress(sourceOffset); var targetAddress = target.LoadEffectiveAddress(); switch (target.AcceleratorType) { case AcceleratorType.CPU: Buffer.MemoryCopy( sourceAddress, targetAddress, target.LengthInBytes, target.LengthInBytes); break; case AcceleratorType.Cuda: CudaException.ThrowIfFailed(CudaAPI.Current.MemcpyHostToDevice( new IntPtr(targetAddress), new IntPtr(sourceAddress), new IntPtr(target.LengthInBytes), stream)); break; default: throw new NotSupportedException( RuntimeErrorMessages.NotSupportedTargetAccelerator); } binding.Recover(); }
/// <summary cref="DirectXBuffer{T}.Dispose(bool)"/> protected override void Dispose(bool disposing) { base.Dispose(disposing); if (cudaGraphicsResource == IntPtr.Zero) { return; } CudaException.ThrowIfFailed( CudaNativeMethods.cuGraphicsUnregisterResource( cudaGraphicsResource)); cudaGraphicsResource = IntPtr.Zero; }
/// <summary> /// Constructs a page lock scope for the accelerator. /// </summary> /// <param name="accelerator">The associated accelerator.</param> /// <param name="hostPtr">The host buffer pointer to page lock.</param> /// <param name="numElements">The number of elements in the buffer.</param> internal CudaPageLockScope( CudaAccelerator accelerator, IntPtr hostPtr, long numElements) : base(accelerator) { if (!accelerator.Device.SupportsMappingHostMemory) { throw new NotSupportedException( RuntimeErrorMessages.NotSupportedPageLock); } HostPtr = hostPtr; Length = numElements; var flags = MemHostRegisterFlags.CU_MEMHOSTREGISTER_PORTABLE; if (!accelerator.Device.SupportsUsingHostPointerForRegisteredMemory) { flags |= MemHostRegisterFlags.CU_MEMHOSTREGISTER_DEVICEMAP; } CudaException.ThrowIfFailed( CurrentAPI.MemHostRegister( hostPtr, new IntPtr(LengthInBytes), flags)); if (accelerator.Device.SupportsUsingHostPointerForRegisteredMemory) { AddrOfLockedObject = hostPtr; } else { CudaException.ThrowIfFailed( CurrentAPI.MemHostGetDevicePointer( out IntPtr devicePtr, hostPtr, 0)); AddrOfLockedObject = devicePtr; } }
double[] ExecuteGPU(int size) { // いったんメインメモリ上に変数を準備 List <double> a = new List <double>(); List <double> b = new List <double>(); List <double> c = new List <double>(); for (int i = 0; i < size * size; i++) { //a.Add(i + 1); //b.Add((i + 1) * 10); //a.Add(1d * i); //b.Add(0.1d * i); c.Add(0); } a.AddRange(new double[] { 1, 3, 2, 4 }); b.AddRange(new double[] { 5, 7, 6, 8 }); // デバイス上にメモリを転送 /*DeviceMemory memory = new DeviceMemory(); * memory.Add<float>("a", a); * memory.Add<float>("b", b); * memory.Alloc<double>("c", size); * memory.Alloc<double>("d", size); */ int elemSize = Marshal.SizeOf(typeof(double)); int byteSize = elemSize * size * size; IntPtr destA = IntPtr.Zero; IntPtr destB = IntPtr.Zero; IntPtr destC = IntPtr.Zero; cudaError result2; result2 = Runtime.API.cudaMalloc(ref destA, elemSize * 4); CudaException.Check(result2, "デバイスメモリの割り当てに失敗しました。"); result2 = Runtime.API.cudaMalloc(ref destB, elemSize * 4); CudaException.Check(result2, "デバイスメモリの割り当てに失敗しました。"); result2 = Runtime.API.cudaMalloc(ref destC, byteSize); CudaException.Check(result2, "デバイスメモリの割り当てに失敗しました。"); Console.WriteLine("cuBLAS Test destA: {0}", destA); Console.WriteLine("cuBLAS Test destB: {0}", destB); Console.WriteLine("cuBLAS Test destC: {0}", destC); cuBLAS.SetMatrix(2, 2, a.ToArray(), 2, destA, 2); cuBLAS.SetMatrix(2, 2, b.ToArray(), 2, destB, 2); cuBLAS.SetMatrix(size, size, c.ToArray(), size, destC, size); //float test = cuBLAS.Sdot(2, da, 1, db, 1); /* * cuBLAS.Dgemm( * cublasOperation.CUBLAS_OP_N, * cublasOperation.CUBLAS_OP_N, * 2, 2, 2, * 1, destA, 2, * destB, 2, * 0, destC, 2 * ); */ /* * cuBLAS.Dsymm( * cublasSideMode.CUBLAS_SIDE_RIGHT, * cublasFillMode.CUBLAS_FILL_MODE_LOWER, * 2, 2, * 1, da, 2, * db, 2, * 0, dc, 2 * ); * * cuBLAS.Dtrmm( * cublasSideMode.CUBLAS_SIDE_LEFT, * cublasFillMode.CUBLAS_FILL_MODE_LOWER, * cublasOperation.CUBLAS_OP_C, * cublasDiagType.CUBLAS_DIAG_NON_UNIT, * 2, 2, * 1, da, 2, * db, 2, * dc, 2 * ); */ /* * double[] rb = cuBLAS.GetMatrixD(2, 2, destB, 2, 2); * foreach (double cc in rb) { * Console.WriteLine("cuBLAS Test rb: {0}", cc); * } * * double[] rc = cuBLAS.GetMatrixD(size, size, destC, size, size); * foreach (double cc in rc) { * Console.WriteLine("cuBLAS Test: {0}", cc); * } */ Runtime.API.cudaFree(destA); Runtime.API.cudaFree(destB); Runtime.API.cudaFree(destC); // 全てのスレッドが終了するまで待つ //context.Synchronize(); // 結果を取得して出力画面に表示 double[] result = null; // memory.Read<double>("c", size * size); // リソースを解放する //memory.Dispose(); return(result); }