Example #1
0
        public void LoadDataEx(string image, uint numOptions, CUjit_option options, IntPtr optionValues)
        {
            //Dispose();
            IntPtr ptxImage = Marshal.StringToHGlobalAnsi(image);

            module = Driver.ModuleLoadDataEx(
                ptxImage, numOptions, options, optionValues
                );
        }
Example #2
0
        public static CUmodule InitializeModule(IntPtr cubin)
        {
            if (cached_modules.TryGetValue(cubin, out CUmodule value))
            {
                return(value);
            }
            uint num_ops = 0;
            var  op      = new CUjit_option[num_ops];

            ulong[] op_values = new ulong[num_ops];

            var op_values_link_handle = GCHandle.Alloc(op_values, GCHandleType.Pinned);
            var op_values_link_intptr = op_values_link_handle.AddrOfPinnedObject();

            CUresult res = Cuda.cuModuleLoadDataEx(out CUmodule module, cubin, 0, op, op_values_link_intptr);

            CudaHelpers.CheckCudaError(res);
            cached_modules[cubin] = module;
            return(module);
        }
Example #3
0
        static unsafe void Part1()
        {
            Cuda.cuInit(0);
            var res = Cuda.cuDeviceGet(out int device, 0);

            if (res != CUresult.CUDA_SUCCESS)
            {
                throw new Exception();
            }
            res = Cuda.cuDeviceGetPCIBusId(out string pciBusId, 100, device);
            if (res != CUresult.CUDA_SUCCESS)
            {
                throw new Exception();
            }
            res = Cuda.cuDeviceGetName(out string name, 100, device);
            if (res != CUresult.CUDA_SUCCESS)
            {
                throw new Exception();
            }
            res = Cuda.cuCtxCreate_v2(out CUcontext cuContext, 0, device);
            if (res != CUresult.CUDA_SUCCESS)
            {
                throw new Exception();
            }
            string       assembly_directory = AssemblyDirectory;
            StreamReader sr     = new StreamReader(assembly_directory + @"/../../Project2/x64/Debug/stuff.ptx");
            string       kernel = sr.ReadToEnd();
            IntPtr       ptr    = Marshal.StringToHGlobalAnsi(kernel);

            int[]    option_values        = new int[] { };
            GCHandle option_values_handle = GCHandle.Alloc(option_values, GCHandleType.Pinned);
            IntPtr   options_values_ptr   = option_values_handle.AddrOfPinnedObject();

            CUjit_option[] options = new CUjit_option[] { };
            res = Cuda.cuModuleLoadDataEx(out CUmodule cuModule, ptr, (uint)0, options, options_values_ptr);
            if (res != CUresult.CUDA_SUCCESS)
            {
                throw new Exception();
            }
            res = Cuda.cuModuleGetFunction(out CUfunction helloWorld, cuModule, "_Z5helloPc");
            if (res != CUresult.CUDA_SUCCESS)
            {
                throw new Exception();
            }
            byte[]   v       = { (byte)'G', (byte)'d', (byte)'k', (byte)'k', (byte)'n', 31, (byte)'v', (byte)'n', (byte)'q', (byte)'k', (byte)'c', 0 };
            GCHandle handle  = GCHandle.Alloc(v, GCHandleType.Pinned);
            IntPtr   pointer = IntPtr.Zero;

            pointer = handle.AddrOfPinnedObject();
            res     = Cuda.cuMemAlloc_v2(out IntPtr dptr, 11 * sizeof(byte));
            if (res != CUresult.CUDA_SUCCESS)
            {
                throw new Exception();
            }
            res = Cuda.cuMemcpyHtoD_v2(dptr, pointer, 11 * sizeof(byte));
            if (res != CUresult.CUDA_SUCCESS)
            {
                throw new Exception();
            }
            IntPtr[] x        = new IntPtr[] { dptr };
            GCHandle handle2  = GCHandle.Alloc(x, GCHandleType.Pinned);
            IntPtr   pointer2 = IntPtr.Zero;

            pointer2 = handle2.AddrOfPinnedObject();
            IntPtr[] kp = new IntPtr[] { pointer2 };
            fixed(IntPtr *kernelParams = kp)
            {
                res = Cuda.cuLaunchKernel(helloWorld,
                                          1, 1, 1,  // grid has one block.
                                          11, 1, 1, // block has 11 threads.
                                          0,        // no shared memory
                                          default(CUstream),
                                          (IntPtr)kernelParams,
                                          (IntPtr)IntPtr.Zero
                                          );
            }

            if (res != CUresult.CUDA_SUCCESS)
            {
                throw new Exception();
            }
            res = Cuda.cuMemcpyDtoH_v2(pointer, dptr, 11 * sizeof(byte));
            if (res != CUresult.CUDA_SUCCESS)
            {
                throw new Exception();
            }
            for (int i = 0; i < 11; ++i)
            {
                System.Console.Write((char)v[i]);
            }
            System.Console.WriteLine();
            Cuda.cuCtxDestroy_v2(cuContext);
        }
Example #4
0
        static unsafe void Part2()
        {
            Cuda.cuInit(0);
            var res = Cuda.cuDeviceGet(out int device, 0);

            if (res != CUresult.CUDA_SUCCESS)
            {
                throw new Exception();
            }
            res = Cuda.cuDeviceGetPCIBusId(out string pciBusId, 100, device);
            if (res != CUresult.CUDA_SUCCESS)
            {
                throw new Exception();
            }
            res = Cuda.cuDeviceGetName(out string name, 100, device);
            if (res != CUresult.CUDA_SUCCESS)
            {
                throw new Exception();
            }
            res = Cuda.cuCtxCreate_v2(out CUcontext cuContext, 0, device);
            if (res != CUresult.CUDA_SUCCESS)
            {
                throw new Exception();
            }
            string     assembly_directory = AssemblyDirectory;
            FileStream fs      = new FileStream(assembly_directory + @"/../../Project2/x64/Debug/stuff.cu.obj", FileMode.Open);
            var        len     = fs.Length;
            var        gpu_obj = new byte[len];

            fs.Read(gpu_obj, 0, (int)len);

            uint num_ops_link = 5;
            var  op_link      = new CUjit_option[num_ops_link];

            ulong[] op_values_link = new ulong[num_ops_link];

            int size = 1024 * 100;

            op_link[0]        = CUjit_option.CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES;
            op_values_link[0] = (ulong)size;

            op_link[1] = CUjit_option.CU_JIT_INFO_LOG_BUFFER;
            byte[] info_log_buffer        = new byte[size];
            var    info_log_buffer_handle = GCHandle.Alloc(info_log_buffer, GCHandleType.Pinned);
            var    info_log_buffer_intptr = info_log_buffer_handle.AddrOfPinnedObject();

            op_values_link[1] = (ulong)info_log_buffer_intptr;

            op_link[2]        = CUjit_option.CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES;
            op_values_link[2] = (ulong)size;

            op_link[3] = CUjit_option.CU_JIT_ERROR_LOG_BUFFER;
            byte[] error_log_buffer        = new byte[size];
            var    error_log_buffer_handle = GCHandle.Alloc(error_log_buffer, GCHandleType.Pinned);
            var    error_log_buffer_intptr = error_log_buffer_handle.AddrOfPinnedObject();

            op_values_link[3] = (ulong)error_log_buffer_intptr;

            op_link[4]        = CUjit_option.CU_JIT_LOG_VERBOSE;
            op_values_link[4] = (ulong)1;

            var op_values_link_handle = GCHandle.Alloc(op_values_link, GCHandleType.Pinned);
            var op_values_link_intptr = op_values_link_handle.AddrOfPinnedObject();

            res = Cuda.cuLinkCreate_v2(num_ops_link, op_link, op_values_link_intptr, out CUlinkState linkState);
            {
                string info = Marshal.PtrToStringAnsi(info_log_buffer_intptr);
                System.Console.WriteLine(info);
                string error = Marshal.PtrToStringAnsi(error_log_buffer_intptr);
                System.Console.WriteLine(error);
            }

            uint num_ops = 0;

            CUjit_option[] op               = new CUjit_option[0];
            ulong[]        op_values        = new ulong[0];
            var            op_values_handle = GCHandle.Alloc(op_values, GCHandleType.Pinned);
            var            op_values_intptr = op_values_handle.AddrOfPinnedObject();

            var    kernel_handle      = GCHandle.Alloc(gpu_obj, GCHandleType.Pinned);
            IntPtr gpu_bcl_obj_intptr = kernel_handle.AddrOfPinnedObject();

            res = Cuda.cuLinkAddData_v2(linkState, CUjitInputType.CU_JIT_INPUT_OBJECT,
                                        gpu_bcl_obj_intptr, (uint)len,
                                        "", num_ops, op, op_values_intptr);
            {
                string info = Marshal.PtrToStringAnsi(info_log_buffer_intptr);
                System.Console.WriteLine(info);
                string error = Marshal.PtrToStringAnsi(error_log_buffer_intptr);
                System.Console.WriteLine(error);
            }

            IntPtr image;

            res = Cuda.cuLinkComplete(linkState, out image, out ulong sz);
            res = Cuda.cuModuleLoadDataEx(out CUmodule module, image, 0, op, op_values_link_intptr);

            res = Cuda.cuModuleGetFunction(out CUfunction helloWorld, module, "_Z5helloPc");
            if (res != CUresult.CUDA_SUCCESS)
            {
                throw new Exception();
            }
            byte[]   v       = { (byte)'G', (byte)'d', (byte)'k', (byte)'k', (byte)'n', 31, (byte)'v', (byte)'n', (byte)'q', (byte)'k', (byte)'c', 0 };
            GCHandle handle  = GCHandle.Alloc(v, GCHandleType.Pinned);
            IntPtr   pointer = IntPtr.Zero;

            pointer = handle.AddrOfPinnedObject();
            res     = Cuda.cuMemAlloc_v2(out IntPtr dptr, 11 * sizeof(byte));
            if (res != CUresult.CUDA_SUCCESS)
            {
                throw new Exception();
            }
            res = Cuda.cuMemcpyHtoD_v2(dptr, pointer, 11 * sizeof(byte));
            if (res != CUresult.CUDA_SUCCESS)
            {
                throw new Exception();
            }
            IntPtr[] x        = new IntPtr[] { dptr };
            GCHandle handle2  = GCHandle.Alloc(x, GCHandleType.Pinned);
            IntPtr   pointer2 = IntPtr.Zero;

            pointer2 = handle2.AddrOfPinnedObject();
            IntPtr[] kp = new IntPtr[] { pointer2 };
            fixed(IntPtr *kernelParams = kp)
            {
                res = Cuda.cuLaunchKernel(helloWorld,
                                          1, 1, 1,  // grid has one block.
                                          11, 1, 1, // block has 11 threads.
                                          0,        // no shared memory
                                          default(CUstream),
                                          (IntPtr)kernelParams,
                                          (IntPtr)IntPtr.Zero
                                          );
            }

            if (res != CUresult.CUDA_SUCCESS)
            {
                throw new Exception();
            }
            res = Cuda.cuMemcpyDtoH_v2(pointer, dptr, 11 * sizeof(byte));
            if (res != CUresult.CUDA_SUCCESS)
            {
                throw new Exception();
            }
            for (int i = 0; i < 11; ++i)
            {
                System.Console.Write((char)v[i]);
            }
            System.Console.WriteLine();
            Cuda.cuCtxDestroy_v2(cuContext);
        }
Example #5
0
 // http://developer.download.nvidia.com/compute/cuda/3_1/toolkit/docs/online/group__CUMODULE_gbfbf77eb2a307af8aa81376ecc909bd3.html
 private static extern CUresult nativeModuleLoadDataEx(out CUmodule module, IntPtr image, uint numOptions, CUjit_option[] options, IntPtr optionValues);