C# (CSharp) CUDA.CopyDeviceToHostAsync示例

        static void Main(string[] args)
        {
            // Create a new instance of CUDA class, select 1st device.
            CUDA cuda = new CUDA(0, true);

            // Prepare parameters.
            int n = 16 * 1024 * 1024;
            uint nbytes = (uint)(n * sizeof(int));
            int value = 26;

            // allocate host memory
            int[] a = new int[n];

            // allocate device memory
            CUdeviceptr d_a = cuda.Allocate<int>(a);
            CUDADriver.cuMemsetD8(d_a, 0xff, nbytes);

            // load module
            cuda.LoadModule(Path.Combine(Environment.CurrentDirectory, "asyncAPI.ptx"));
            CUfunction func = cuda.GetModuleFunction("increment_kernel");

            // set kernel launch configuration
            cuda.SetFunctionBlockShape(func, 512, 1, 1);

            // create cuda event handles
            CUevent start = cuda.CreateEvent();
            CUevent stop = cuda.CreateEvent();

            // asynchronously issue work to the GPU (all to stream 0)
            CUstream stream = new CUstream();
            cuda.RecordEvent(start);
            cuda.CopyHostToDeviceAsync<int>(d_a, a, stream);

            // set parameters for kernel function
            cuda.SetParameter(func, 0, (uint)d_a.Pointer);
            cuda.SetParameter(func, IntPtr.Size, (uint)value);

            cuda.SetParameterSize(func, (uint)(IntPtr.Size + 4));

            // actually launch kernel
            cuda.LaunchAsync(func, n / 512, 1, stream);

            // wait for every thing to finish, then start copy back data
            cuda.CopyDeviceToHostAsync<int>(d_a, a, stream);

            cuda.RecordEvent(stop);

            // print the cpu and gpu times
            Console.WriteLine("time spent executing by the GPU: {0} ms", cuda.ElapsedTime(start, stop));

            // check the output for correctness
            if (CorrectOutput(a, value))
                Console.WriteLine("Test PASSED");
            else
                Console.WriteLine("Test FAILED");

            // release resources
            cuda.DestroyEvent(start);
            cuda.DestroyEvent(stop);
            cuda.Free(d_a);
        }

示例#2

显示文件

文件： Program.cs 项目： uzbekdev1/CUDAfy.NET-1

        static void Main(string[] args)
        {
            // Create a new instance of CUDA class, select 1st device.
            CUDA cuda = new CUDA(0, true);

            // Prepare parameters.
            int  n      = 16 * 1024 * 1024;
            uint nbytes = (uint)(n * sizeof(int));
            int  value  = 26;

            // allocate host memory
            int[] a = new int[n];

            // allocate device memory
            CUdeviceptr d_a = cuda.Allocate <int>(a);

            CUDADriver.cuMemsetD8(d_a, 0xff, nbytes);

            // load module
            cuda.LoadModule(Path.Combine(Environment.CurrentDirectory, "asyncAPI.ptx"));
            CUfunction func = cuda.GetModuleFunction("increment_kernel");

            // set kernel launch configuration
            cuda.SetFunctionBlockShape(func, 512, 1, 1);

            // create cuda event handles
            CUevent start = cuda.CreateEvent();
            CUevent stop  = cuda.CreateEvent();

            // asynchronously issue work to the GPU (all to stream 0)
            CUstream stream = new CUstream();

            cuda.RecordEvent(start);
            cuda.CopyHostToDeviceAsync <int>(d_a, a, stream);

            // set parameters for kernel function
            cuda.SetParameter(func, 0, (uint)d_a.Pointer);
            cuda.SetParameter(func, IntPtr.Size, (uint)value);

            cuda.SetParameterSize(func, (uint)(IntPtr.Size + 4));

            // actually launch kernel
            cuda.LaunchAsync(func, n / 512, 1, stream);

            // wait for every thing to finish, then start copy back data
            cuda.CopyDeviceToHostAsync <int>(d_a, a, stream);

            cuda.RecordEvent(stop);

            // print the cpu and gpu times
            Console.WriteLine("time spent executing by the GPU: {0} ms", cuda.ElapsedTime(start, stop));

            // check the output for correctness
            if (CorrectOutput(a, value))
            {
                Console.WriteLine("Test PASSED");
            }
            else
            {
                Console.WriteLine("Test FAILED");
            }

            // release resources
            cuda.DestroyEvent(start);
            cuda.DestroyEvent(stop);
            cuda.Free(d_a);
        }