Exemple #1
0
 static void WorkaroundKnownIssue(CudaAccelerator accelerator, CuFFTAPI api)
 {
     // The CUDA release notes for 11.2 to 11.3 (inclusive) contains a known issue:
     // - cuFFT planning and plan estimation functions may not restore correct
     //   context affecting CUDA driver API applications.
     //
     // This workaround restores the accelerator context so that deallocation of
     // the memory buffers can be performed on the correct context.
     //
     // Based on the versions of CuFFT released, apply workaround to CuFFT v10.4.x.
     //
     // Release 11.1.1   CuFFT v10.3.0.105
     // Release 11.2     CuFFT v10.4.0.72
     // Release 11.3     CuFFT v10.4.2.58
     // Release 11.4     CuFFT v10.5.0.43
     //
     CuFFTException.ThrowIfFailed(
         api.GetProperty(LibraryPropertyType.MAJOR_VERSION, out var major));
     CuFFTException.ThrowIfFailed(
         api.GetProperty(LibraryPropertyType.MINOR_VERSION, out var minor));
     if (major == 10 && minor == 4)
     {
         CudaException.ThrowIfFailed(
             CudaAPI.CurrentAPI.SetCurrentContext(accelerator.NativePtr));
     }
 }
        /// <summary cref="MemoryBuffer{T, TIndex}.CopyFromViewInternal(ArrayView{T, Index}, AcceleratorType, TIndex, AcceleratorStream)"/>
        protected internal override unsafe void CopyFromViewInternal(
            ArrayView <T, Index> source,
            AcceleratorType acceleratorType,
            TIndex targetOffset,
            AcceleratorStream stream)
        {
            switch (acceleratorType)
            {
            case AcceleratorType.CPU:
                Buffer.MemoryCopy(
                    source.Pointer.ToPointer(),
                    GetSubView(targetOffset).Pointer.ToPointer(),
                    source.LengthInBytes,
                    source.LengthInBytes);
                break;

            case AcceleratorType.Cuda:
                CudaException.ThrowIfFailed(CudaAPI.Current.MemcpyDeviceToHost(
                                                GetSubView(targetOffset).Pointer,
                                                source.Pointer,
                                                new IntPtr(source.LengthInBytes),
                                                stream));
                break;

            default:
                throw new NotSupportedException(RuntimeErrorMessages.NotSupportedTargetAccelerator);
            }
        }
Exemple #3
0
        /// <summary cref="MemoryBuffer{T, TIndex}.CopyToView(
        /// AcceleratorStream, ArrayView{T}, LongIndex1)"/>
        protected internal unsafe override void CopyToView(
            AcceleratorStream stream,
            ArrayView <T> target,
            LongIndex1 sourceOffset)
        {
            var binding = stream.BindScoped();

            var sourceAddress = ComputeEffectiveAddress(sourceOffset);
            var targetAddress = target.LoadEffectiveAddress();

            switch (target.AcceleratorType)
            {
            case AcceleratorType.CPU:
                Buffer.MemoryCopy(
                    sourceAddress,
                    targetAddress,
                    target.LengthInBytes,
                    target.LengthInBytes);
                break;

            case AcceleratorType.Cuda:
                CudaException.ThrowIfFailed(CudaAPI.Current.MemcpyHostToDevice(
                                                new IntPtr(targetAddress),
                                                new IntPtr(sourceAddress),
                                                new IntPtr(target.LengthInBytes),
                                                stream));
                break;

            default:
                throw new NotSupportedException(
                          RuntimeErrorMessages.NotSupportedTargetAccelerator);
            }

            binding.Recover();
        }
        /// <summary cref="DirectXBuffer{T}.Dispose(bool)"/>
        protected override void Dispose(bool disposing)
        {
            base.Dispose(disposing);

            if (cudaGraphicsResource == IntPtr.Zero)
            {
                return;
            }

            CudaException.ThrowIfFailed(
                CudaNativeMethods.cuGraphicsUnregisterResource(
                    cudaGraphicsResource));
            cudaGraphicsResource = IntPtr.Zero;
        }
Exemple #5
0
        /// <summary>
        /// Constructs a page lock scope for the accelerator.
        /// </summary>
        /// <param name="accelerator">The associated accelerator.</param>
        /// <param name="hostPtr">The host buffer pointer to page lock.</param>
        /// <param name="numElements">The number of elements in the buffer.</param>
        internal CudaPageLockScope(
            CudaAccelerator accelerator,
            IntPtr hostPtr,
            long numElements)
            : base(accelerator)
        {
            if (!accelerator.Device.SupportsMappingHostMemory)
            {
                throw new NotSupportedException(
                          RuntimeErrorMessages.NotSupportedPageLock);
            }
            HostPtr = hostPtr;
            Length  = numElements;

            var flags = MemHostRegisterFlags.CU_MEMHOSTREGISTER_PORTABLE;

            if (!accelerator.Device.SupportsUsingHostPointerForRegisteredMemory)
            {
                flags |= MemHostRegisterFlags.CU_MEMHOSTREGISTER_DEVICEMAP;
            }
            CudaException.ThrowIfFailed(
                CurrentAPI.MemHostRegister(
                    hostPtr,
                    new IntPtr(LengthInBytes),
                    flags));
            if (accelerator.Device.SupportsUsingHostPointerForRegisteredMemory)
            {
                AddrOfLockedObject = hostPtr;
            }
            else
            {
                CudaException.ThrowIfFailed(
                    CurrentAPI.MemHostGetDevicePointer(
                        out IntPtr devicePtr,
                        hostPtr,
                        0));
                AddrOfLockedObject = devicePtr;
            }
        }
Exemple #6
0
        double[] ExecuteGPU(int size)
        {
            // いったんメインメモリ上に変数を準備
            List <double> a = new List <double>();
            List <double> b = new List <double>();
            List <double> c = new List <double>();

            for (int i = 0; i < size * size; i++)
            {
                //a.Add(i + 1);
                //b.Add((i + 1) * 10);
                //a.Add(1d * i);
                //b.Add(0.1d * i);
                c.Add(0);
            }

            a.AddRange(new double[] { 1, 3, 2, 4 });
            b.AddRange(new double[] { 5, 7, 6, 8 });

            // デバイス上にメモリを転送

            /*DeviceMemory memory = new DeviceMemory();
             * memory.Add<float>("a", a);
             * memory.Add<float>("b", b);
             * memory.Alloc<double>("c", size);
             * memory.Alloc<double>("d", size);
             */

            int       elemSize = Marshal.SizeOf(typeof(double));
            int       byteSize = elemSize * size * size;
            IntPtr    destA    = IntPtr.Zero;
            IntPtr    destB    = IntPtr.Zero;
            IntPtr    destC    = IntPtr.Zero;
            cudaError result2;

            result2 = Runtime.API.cudaMalloc(ref destA, elemSize * 4);
            CudaException.Check(result2, "デバイスメモリの割り当てに失敗しました。");
            result2 = Runtime.API.cudaMalloc(ref destB, elemSize * 4);
            CudaException.Check(result2, "デバイスメモリの割り当てに失敗しました。");
            result2 = Runtime.API.cudaMalloc(ref destC, byteSize);
            CudaException.Check(result2, "デバイスメモリの割り当てに失敗しました。");

            Console.WriteLine("cuBLAS Test destA: {0}", destA);
            Console.WriteLine("cuBLAS Test destB: {0}", destB);
            Console.WriteLine("cuBLAS Test destC: {0}", destC);

            cuBLAS.SetMatrix(2, 2, a.ToArray(), 2, destA, 2);
            cuBLAS.SetMatrix(2, 2, b.ToArray(), 2, destB, 2);
            cuBLAS.SetMatrix(size, size, c.ToArray(), size, destC, size);
            //float test = cuBLAS.Sdot(2, da, 1, db, 1);

            /*
             * cuBLAS.Dgemm(
             *      cublasOperation.CUBLAS_OP_N,
             *      cublasOperation.CUBLAS_OP_N,
             *      2, 2, 2,
             *      1, destA, 2,
             *      destB, 2,
             *      0, destC, 2
             * );
             */
            /*
             * cuBLAS.Dsymm(
             *      cublasSideMode.CUBLAS_SIDE_RIGHT,
             *      cublasFillMode.CUBLAS_FILL_MODE_LOWER,
             *      2, 2,
             *      1, da, 2,
             *      db, 2,
             *      0, dc, 2
             * );
             *
             * cuBLAS.Dtrmm(
             *      cublasSideMode.CUBLAS_SIDE_LEFT,
             *      cublasFillMode.CUBLAS_FILL_MODE_LOWER,
             *      cublasOperation.CUBLAS_OP_C,
             *      cublasDiagType.CUBLAS_DIAG_NON_UNIT,
             *      2, 2,
             *      1, da, 2,
             *      db, 2,
             *      dc, 2
             * );
             */

            /*
             * double[] rb = cuBLAS.GetMatrixD(2, 2, destB, 2, 2);
             * foreach (double cc in rb) {
             *      Console.WriteLine("cuBLAS Test rb: {0}", cc);
             * }
             *
             * double[] rc = cuBLAS.GetMatrixD(size, size, destC, size, size);
             * foreach (double cc in rc) {
             *      Console.WriteLine("cuBLAS Test: {0}", cc);
             * }
             */

            Runtime.API.cudaFree(destA);
            Runtime.API.cudaFree(destB);
            Runtime.API.cudaFree(destC);

            // 全てのスレッドが終了するまで待つ
            //context.Synchronize();

            // 結果を取得して出力画面に表示
            double[] result = null;            // memory.Read<double>("c", size * size);

            // リソースを解放する
            //memory.Dispose();

            return(result);
        }