C# (CSharp) ManagedCuda.NVRTC CudaRuntimeCompiler 예제들

프로그래밍 언어: C# (CSharp)

네임스페이스/패키지 이름: ManagedCuda.NVRTC

클래스/타입: CudaRuntimeCompiler

hotexamples.com에서의 예제들: 2

C# (CSharp) ManagedCuda.NVRTC CudaRuntimeCompiler - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 C# (CSharp)의 ManagedCuda.NVRTC.CudaRuntimeCompiler에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Compile(2)

GetPTX(2)

Dispose(1)

GetLogAsString(1)

Cuda runtime compiler

상속: IDisposable

CudaRuntimeCompiler 1 문서

예제 #1

파일 보기

파일: Program.cs 프로젝트: kunzmi/managedCuda

        static void Main(string[] args)
        {
            string filename = "vectorAdd_kernel.cu"; //we assume the file is in the same folder...
            string fileToCompile = File.ReadAllText(filename);

            CudaRuntimeCompiler rtc = new CudaRuntimeCompiler(fileToCompile, "vectorAdd_kernel");

            rtc.Compile(args);

            string log = rtc.GetLogAsString();

            Console.WriteLine(log);

            byte[] ptx = rtc.GetPTX();

            rtc.Dispose();

            CudaContext ctx = new CudaContext(0);

            CudaKernel vectorAdd = ctx.LoadKernelPTX(ptx, "vectorAdd");

            // Print the vector length to be used, and compute its size
            int numElements = 50000;
            SizeT size = numElements * sizeof(float);
            Console.WriteLine("[Vector addition of {0} elements]", numElements);

            // Allocate the host input vector A
            float[] h_A = new float[numElements];
            // Allocate the host input vector B
            float[] h_B = new float[numElements];
            // Allocate the host output vector C
            float[] h_C = new float[numElements];

            Random rand = new Random(0);

            // Initialize the host input vectors
            for (int i = 0; i < numElements; ++i)
            {
                h_A[i] = (float)rand.NextDouble();
                h_B[i] = (float)rand.NextDouble();
            }

            Console.WriteLine("Allocate and copy input data from the host memory to the CUDA device\n");
            // Allocate the device input vector A and copy to device
            CudaDeviceVariable<float> d_A = h_A;

            // Allocate the device input vector B and copy to device
            CudaDeviceVariable<float> d_B = h_B;

            // Allocate the device output vector C
            CudaDeviceVariable<float> d_C = new CudaDeviceVariable<float>(numElements);

            // Launch the Vector Add CUDA Kernel
            int threadsPerBlock = 256;
            int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock;
            Console.WriteLine("CUDA kernel launch with {0} blocks of {1} threads\n", blocksPerGrid, threadsPerBlock);
            vectorAdd.BlockDimensions = new dim3(threadsPerBlock,1, 1);
            vectorAdd.GridDimensions = new dim3(blocksPerGrid, 1, 1);

            vectorAdd.Run(d_A.DevicePointer, d_B.DevicePointer, d_C.DevicePointer, numElements);

            // Copy the device result vector in device memory to the host result vector
            // in host memory.
            Console.WriteLine("Copy output data from the CUDA device to the host memory\n");
            d_C.CopyToHost(h_C);

            // Verify that the result vector is correct
            for (int i = 0; i < numElements; ++i)
            {
                if (Math.Abs(h_A[i] + h_B[i] - h_C[i]) > 1e-5)
                {
                    Console.WriteLine("Result verification failed at element {0}!\n", i);
                    return;
                }
            }

            Console.WriteLine("Test PASSED\n");

            // Free device global memory
            d_A.Dispose();
            d_B.Dispose();
            d_C.Dispose();

            ctx.Dispose();
            Console.WriteLine("Done\n");
        }

예제 #2

파일 보기

파일: CUDAVectorPrimitivesProvider.cs 프로젝트: joemoorhouse/vector-accelerator

        public void Compile()
        {
            using (var ctx = new CudaContext())
            {
                // with verbaim string @, we only have to double up double quotes: no other escaping
                string source = @"
                extern ""C"" __global__
                void saxpy(float a, float *x, float *y, float *out, size_t n)
                {
                    size_t tid = blockIdx.x * blockDim.x + threadIdx.x;
                    if (tid < n)
                    {
                        out[tid] = a * x[tid] + y[tid];
                    }
                }
                ";

                source += Environment.NewLine;

                var name = "Test";
                var headers = new string[0];
                var includeNames = new string[0];

                var compiler = new CudaRuntimeCompiler(source, name, headers, includeNames);

                //var compiler2 = new CudaRuntimeCompiler(source, name, headers, includeNames);
                // --ptxas-options=-v -keep
                compiler.Compile(new string[] { "-G" });

                //var ptxString = compiler.GetPTXAsString(); // for debugging

                var ptx = compiler.GetPTX();

                //compiler2.Compile(new string[] { });

                var kernel = ctx.LoadKernelPTX(ptx, "kernelName");

                //One kernel per cu file:
                //CudaKernel kernel = ctx.LoadKernel(@"path\to\kernel.ptx", "kernelname");
                kernel.GridDimensions = new dim3(1, 1, 1);
                kernel.BlockDimensions = new dim3(16, 16);

                //kernel.Run()

                var a = new CudaDeviceVariable<double>(100);
                //ManagedCuda.NPP.NPPsExtensions.NPPsExtensionMethods.Sqr()

                //Multiple kernels per cu file:
                CUmodule cumodule = ctx.LoadModule(@"path\to\kernel.ptx");
                CudaKernel kernel1 = new CudaKernel("kernel1", cumodule, ctx)
                {
                    GridDimensions = new dim3(1, 1, 1),
                    BlockDimensions = new dim3(16, 16),
                };
                CudaKernel kernel2 = new CudaKernel("kernel2", cumodule, ctx)
                {
                    GridDimensions = new dim3(1, 1, 1),
                    BlockDimensions = new dim3(16, 16),
                };

            }
        }