Ejemplo n.º 1
0
        public static void Run(int size, double density)
        {
            try
            {
                Console.WriteLine("Running CUDA tests (Double) ... [N = {0}]", size);
                Console.WriteLine();

                var A = Generate.Random(size, size, density);
                var B = Generate.RandomSymmetric(size, density, true);

                // Initialize CUDA device.
                Cuda.Initialize();

                RunCudaTest(A, B);

                Console.WriteLine();
            }
            catch (CudaException e)
            {
                Console.WriteLine(e.Result);
            }
            catch (Exception e)
            {
                Console.WriteLine(e.Message);
            }
        }
Ejemplo n.º 2
0
        private static void SetBaseIndex(int base_index)
        {
            unsafe
            {
                IntPtr[] x1      = new IntPtr[] { new IntPtr(base_index) };
                GCHandle handle2 = GCHandle.Alloc(x1, GCHandleType.Pinned);
                var      parm1   = handle2.AddrOfPinnedObject();

                IntPtr[] kp = new IntPtr[] { parm1 };

                CUmodule module = RUNTIME.RuntimeModule;
                CudaHelpers.CheckCudaError(Cuda.cuModuleGetFunction(out CUfunction function, module, "_Z21set_kernel_base_indexi"));
                Campy.Utils.CudaHelpers.MakeLinearTiling(1,
                                                         out Campy.Utils.CudaHelpers.dim3 tile_size,
                                                         out Campy.Utils.CudaHelpers.dim3 tiles);
                CUresult res;
                fixed(IntPtr *kernelParams = kp)
                {
                    res = Cuda.cuLaunchKernel(
                        function,
                        tiles.x, tiles.y, tiles.z,             // grid has one block.
                        tile_size.x, tile_size.y, tile_size.z, // n threads.
                        0,                                     // no shared memory
                        default(CUstream),
                        (IntPtr)kernelParams,
                        (IntPtr)IntPtr.Zero
                        );
                }

                Utils.CudaHelpers.CheckCudaError(res);
                res = Cuda.cuCtxSynchronize(); // Make sure it's copied back to host.
                Utils.CudaHelpers.CheckCudaError(res);
            }
        }
Ejemplo n.º 3
0
        public Engine(Dictionary <RenderingPaneHwndDescription> renderingPaneHwndDescriptions)
        {
            Console.WriteLine("\nMojo initializing...\n");

            D3D11.Initialize(out mDxgiFactory, out mD3D11Device);
            Cuda.Initialize(mD3D11Device);
            Thrust.Initialize();

            Segmenter = new Segmenter
            {
                Interop = new Interop.Segmenter(mD3D11Device, mD3D11Device.ImmediateContext, Constants.Parameters)
            };

            Viewers = new Dictionary <Viewer>
            {
                {
                    "Segmenter2D", new Viewer
                    {
                        RenderingPane = new RenderingPane(mDxgiFactory,
                                                          mD3D11Device,
                                                          mD3D11Device.ImmediateContext,
                                                          renderingPaneHwndDescriptions.Get("Segmenter2D").Handle,
                                                          renderingPaneHwndDescriptions.Get("Segmenter2D").Width,
                                                          renderingPaneHwndDescriptions.Get("Segmenter2D").Height,
                                                          new Segmenter2D.RenderingStrategy(mD3D11Device,
                                                                                            mD3D11Device.ImmediateContext,
                                                                                            Segmenter)),
                        UserInputHandler = new Segmenter2D.UserInputHandler(Segmenter)
                    }
                }
            };
        }
Ejemplo n.º 4
0
 public static void CheckCudaError(Swigged.Cuda.CUresult res)
 {
     if (res != CUresult.CUDA_SUCCESS)
     {
         Cuda.cuGetErrorString(res, out IntPtr pStr);
         var cuda_error = Marshal.PtrToStringAnsi(pStr);
         throw new Exception("CUDA error: " + cuda_error);
     }
 }
Ejemplo n.º 5
0
        public void Dispose()
        {
            Viewers.Internal.Values.ToList().ForEach(viewer => viewer.Dispose());
            Segmenter.Dispose();

            Thrust.Terminate();
            Cuda.Terminate();
            D3D11.Terminate(ref mDxgiFactory, ref mD3D11Device);

            Console.WriteLine("\nMojo terminating...\n");
        }
Ejemplo n.º 6
0
        /// <summary>
        /// Allocated a GPU managed buffer.
        /// Code based on https://www.codeproject.com/Articles/32125/Unmanaged-Arrays-in-C-No-Problem
        /// </summary>
        public IntPtr New(int bytes)
        {
            if (false)
            {
                // Let's try allocating a block of memory on the host. cuMemHostAlloc allocates bytesize
                // bytes of host memory that is page-locked and accessible to the device.
                // Note: cuMemHostAlloc and cuMemAllocHost seem to be almost identical except for the
                // third parameter to cuMemHostAlloc that is used for the type of memory allocation.
                var res = Cuda.cuMemHostAlloc(out IntPtr p, 10, (uint)Cuda.CU_MEMHOSTALLOC_DEVICEMAP);
                if (res == CUresult.CUDA_SUCCESS)
                {
                    System.Console.WriteLine("Worked.");
                }
                else
                {
                    System.Console.WriteLine("Did not work.");
                }
            }

            if (false)
            {
                // Allocate CPU memory, pin it, then register it with GPU.
                int      f       = new int();
                GCHandle handle  = GCHandle.Alloc(f, GCHandleType.Pinned);
                IntPtr   pointer = (IntPtr)handle;
                var      size    = Marshal.SizeOf(f);
                var      res     = Cuda.cuMemHostRegister_v2(pointer, (uint)size, (uint)Cuda.CU_MEMHOSTALLOC_DEVICEMAP);
                if (res == CUresult.CUDA_SUCCESS)
                {
                    System.Console.WriteLine("Worked.");
                }
                else
                {
                    System.Console.WriteLine("Did not work.");
                }
            }

            {
                // Allocate Unified Memory.
                var size = bytes;
                var res  = Cuda.cuMemAllocManaged(out IntPtr pointer, (uint)size, (uint)Swigged.Cuda.CUmemAttach_flags.CU_MEM_ATTACH_GLOBAL);
                if (res != CUresult.CUDA_SUCCESS)
                {
                    throw new Exception("cuMemAllocManged failed.");
                }
                return(pointer);
            }

            if (false)
            {
                return(Marshal.AllocHGlobal(bytes));
            }
        }
Ejemplo n.º 7
0
        protected override void Factorize(int rows, int columns, int nnz, CuSparseContext <Complex> A)
        {
            var ap = A.ColumnPointers;
            var ai = A.RowIndices;
            var ax = A.Values;

            var desc = A.MatrixDescriptor;

            // Analyze chol(A) to know structure of L.
            Check(NativeMethods.cusolverSpXcsrcholAnalysis(_p, rows, nnz, desc, ap, ai, _info));

            int size_internal = 0, size_chol = 0;

            // Workspace for chol(A).
            Check(NativeMethods.cusolverSpZcsrcholBufferInfo(_p, rows, nnz, desc, ax, ap, ai, _info,
                                                             ref size_internal, ref size_chol));


            Cuda.Malloc(ref _buffer, sizeof(char) * size_chol);

            // Compute A = L*L^T.
            Check(NativeMethods.cusolverSpZcsrcholFactor(_p, rows, nnz, desc, ax, ap, ai, _info, _buffer));
        }
Ejemplo n.º 8
0
        protected override void Factorize(int rows, int columns, int nnz, CuSparseContext <double> A)
        {
            var ap = A.ColumnPointers;
            var ai = A.RowIndices;
            var ax = A.Values;

            var desc = A.MatrixDescriptor;

            // Analyze qr(A) to know structure of L.
            Check(NativeMethods.cusolverSpXcsrqrAnalysis(_p, rows, columns, nnz, desc, ap, ai, _info));

            int size_internal = 0, size_qr = 0;

            // Workspace for qr(A).
            Check(NativeMethods.cusolverSpDcsrqrBufferInfo(_p, rows, columns, nnz, desc, ax, ap, ai, _info,
                                                           ref size_internal, ref size_qr));

            Cuda.Malloc(ref _buffer, sizeof(char) * size_qr);

            Check(NativeMethods.cusolverSpDcsrqrSetup(_p, rows, columns, nnz, desc, ax, ap, ai, 0.0, _info));

            // Compute A = Q*R.
            Check(NativeMethods.cusolverSpDcsrqrFactor(_p, rows, columns, nnz, IntPtr.Zero, IntPtr.Zero, _info, _buffer));
        }
Ejemplo n.º 9
0
        public static void For(int number_of_threads, SimpleKernel simpleKernel)
        {
            if (Campy.Utils.Options.IsOn("import-only"))
            {
                JustImport(simpleKernel);
                return;
            }

            GCHandle handle1 = default(GCHandle);
            GCHandle handle2 = default(GCHandle);

            try
            {
                unsafe
                {
                    System.Reflection.MethodInfo method_info = simpleKernel.Method;
                    String kernel_assembly_file_name         = method_info.DeclaringType.Assembly.Location;
                    Mono.Cecil.ModuleDefinition md           = Campy.Meta.StickyReadMod.StickyReadModule(
                        kernel_assembly_file_name, new ReaderParameters {
                        ReadSymbols = true
                    });
                    MethodReference method_reference = md.ImportReference(method_info);

                    CUfunction ptr_to_kernel = default(CUfunction);
                    CUmodule   module        = default(CUmodule);

                    Campy.Utils.TimePhase.Time("compile     ", () =>
                    {
                        IntPtr image = Singleton._compiler.Compile(method_reference, simpleKernel.Target);
                        module       = Singleton._compiler.SetModule(method_reference, image);
                        Singleton._compiler.StoreJits(module);
                        ptr_to_kernel = Singleton._compiler.GetCudaFunction(method_reference, module);
                    });

                    RUNTIME.BclCheckHeap();

                    BUFFERS buffer = Singleton.Buffer;
                    IntPtr  kernel_target_object = IntPtr.Zero;

                    Campy.Utils.TimePhase.Time("deep copy ", () =>
                    {
                        int count = simpleKernel.Method.GetParameters().Length;
                        var bb    = Singleton._compiler.GetBasicBlock(method_reference);
                        if (bb.HasThis)
                        {
                            count++;
                        }
                        if (!(count == 1 || count == 2))
                        {
                            throw new Exception("Expecting at least one parameter for kernel.");
                        }

                        if (bb.HasThis)
                        {
                            kernel_target_object = buffer.AddDataStructure(simpleKernel.Target);
                        }
                    });

                    Campy.Utils.TimePhase.Time("kernel cctor set up", () =>
                    {
                        // For each cctor, run on GPU.
                        // Construct dependency graph of methods.
                        List <MethodReference> order_list = COMPILER.Singleton.ConstructCctorOrder();

                        // Finally, call cctors.
                        foreach (var bb in order_list)
                        {
                            if (Campy.Utils.Options.IsOn("trace-cctors"))
                            {
                                System.Console.WriteLine("Executing cctor "
                                                         + bb.FullName);
                            }
                            var cctor = Singleton._compiler.GetCudaFunction(bb, module);

                            var res = CUresult.CUDA_SUCCESS;
                            Campy.Utils.CudaHelpers.MakeLinearTiling(1,
                                                                     out Campy.Utils.CudaHelpers.dim3 tile_size, out Campy.Utils.CudaHelpers.dim3 tiles);

                            res = Cuda.cuLaunchKernel(
                                cctor,
                                tiles.x, tiles.y, tiles.z,             // grid has one block.
                                tile_size.x, tile_size.y, tile_size.z, // n threads.
                                0,                                     // no shared memory
                                default(CUstream),
                                (IntPtr)IntPtr.Zero,
                                (IntPtr)IntPtr.Zero
                                );

                            CudaHelpers.CheckCudaError(res);
                            res = Cuda.cuCtxSynchronize(); // Make sure it's copied back to host.
                            CudaHelpers.CheckCudaError(res);
                        }
                    });

                    if (Campy.Utils.Options.IsOn("trace-cctors"))
                    {
                        System.Console.WriteLine("Done with cctors");
                    }

                    Campy.Utils.TimePhase.Time("kernel call ", () =>
                    {
                        IntPtr[] parm1 = new IntPtr[1];
                        IntPtr[] parm2 = new IntPtr[1];

                        parm1[0] = kernel_target_object;
                        parm2[0] = buffer.New(BUFFERS.SizeOf(typeof(int)));

                        IntPtr[] x1     = parm1;
                        handle1         = GCHandle.Alloc(x1, GCHandleType.Pinned);
                        IntPtr pointer1 = handle1.AddrOfPinnedObject();

                        IntPtr[] x2     = parm2;
                        handle2         = GCHandle.Alloc(x2, GCHandleType.Pinned);
                        IntPtr pointer2 = handle2.AddrOfPinnedObject();

                        IntPtr[] kp = new IntPtr[] { pointer1, pointer2 };
                        var res     = CUresult.CUDA_SUCCESS;
                        fixed(IntPtr * kernelParams = kp)
                        {
                            Campy.Utils.CudaHelpers.MakeLinearTiling(number_of_threads,
                                                                     out Campy.Utils.CudaHelpers.dim3 tile_size, out Campy.Utils.CudaHelpers.dim3 tiles);

                            //MakeLinearTiling(1, out dim3 tile_size, out dim3 tiles);

                            res = Cuda.cuLaunchKernel(
                                ptr_to_kernel,
                                tiles.x, tiles.y, tiles.z,             // grid has one block.
                                tile_size.x, tile_size.y, tile_size.z, // n threads.
                                0,                                     // no shared memory
                                default(CUstream),
                                (IntPtr)kernelParams,
                                (IntPtr)IntPtr.Zero
                                );
                        }
Ejemplo n.º 10
0
        public static void For(int number_of_threads, SimpleKernel simpleKernel)
        {
            GCHandle handle1 = default(GCHandle);
            GCHandle handle2 = default(GCHandle);

            try
            {
                unsafe
                {
                    //////// COMPILE KERNEL INTO GPU CODE ///////
                    /////////////////////////////////////////////
                    var stopwatch_cuda_compile = new Stopwatch();
                    stopwatch_cuda_compile.Start();
                    IntPtr     image               = Singleton()._converter.Compile(simpleKernel.Method, simpleKernel.Target);
                    CUfunction ptr_to_kernel       = Singleton()._converter.GetCudaFunction(simpleKernel.Method, image);
                    var        elapse_cuda_compile = stopwatch_cuda_compile.Elapsed;

                    RUNTIME.CheckHeap();

                    //////// COPY DATA INTO GPU /////////////////
                    /////////////////////////////////////////////
                    var stopwatch_deep_copy_to = new Stopwatch();
                    stopwatch_deep_copy_to.Reset();
                    stopwatch_deep_copy_to.Start();
                    BUFFERS buffer = Singleton().Buffer;

                    // Set up parameters.
                    int count = simpleKernel.Method.GetParameters().Length;
                    var bb    = Singleton()._converter.GetBasicBlock(simpleKernel.Method);
                    if (bb.HasThis)
                    {
                        count++;
                    }
                    if (!(count == 1 || count == 2))
                    {
                        throw new Exception("Expecting at least one parameter for kernel.");
                    }

                    IntPtr[] parm1 = new IntPtr[1];
                    IntPtr[] parm2 = new IntPtr[1];
                    IntPtr   ptr   = IntPtr.Zero;

                    // The method really should have a "this" because it's a closure
                    // object.
                    if (bb.HasThis)
                    {
                        RUNTIME.CheckHeap();
                        ptr      = buffer.AddDataStructure(simpleKernel.Target);
                        parm1[0] = ptr;
                    }

                    {
                        Type btype = typeof(int);
                        var  s     = BUFFERS.SizeOf(btype);
                        var  ptr2  = buffer.New(s);
                        // buffer.DeepCopyToImplementation(index, ptr2);
                        parm2[0] = ptr2;
                    }

                    stopwatch_deep_copy_to.Start();
                    var elapse_deep_copy_to = stopwatch_cuda_compile.Elapsed;

                    var stopwatch_call_kernel = new Stopwatch();
                    stopwatch_call_kernel.Reset();
                    stopwatch_call_kernel.Start();

                    IntPtr[] x1 = parm1;
                    handle1 = GCHandle.Alloc(x1, GCHandleType.Pinned);
                    IntPtr pointer1 = handle1.AddrOfPinnedObject();

                    IntPtr[] x2 = parm2;
                    handle2 = GCHandle.Alloc(x2, GCHandleType.Pinned);
                    IntPtr pointer2 = handle2.AddrOfPinnedObject();

                    RUNTIME.CheckHeap();

                    IntPtr[] kp  = new IntPtr[] { pointer1, pointer2 };
                    var      res = CUresult.CUDA_SUCCESS;
                    fixed(IntPtr *kernelParams = kp)
                    {
                        Campy.Utils.CudaHelpers.MakeLinearTiling(number_of_threads, out Campy.Utils.CudaHelpers.dim3 tile_size, out Campy.Utils.CudaHelpers.dim3 tiles);

                        //MakeLinearTiling(1, out dim3 tile_size, out dim3 tiles);

                        res = Cuda.cuLaunchKernel(
                            ptr_to_kernel,
                            tiles.x, tiles.y, tiles.z,             // grid has one block.
                            tile_size.x, tile_size.y, tile_size.z, // n threads.
                            0,                                     // no shared memory
                            default(CUstream),
                            (IntPtr)kernelParams,
                            (IntPtr)IntPtr.Zero
                            );
                    }
                    CudaHelpers.CheckCudaError(res);
                    res = Cuda.cuCtxSynchronize(); // Make sure it's copied back to host.
                    CudaHelpers.CheckCudaError(res);

                    stopwatch_call_kernel.Stop();
                    var elapse_call_kernel = stopwatch_call_kernel.Elapsed;

                    if (Campy.Utils.Options.IsOn("jit_trace"))
                    {
                        System.Console.WriteLine("cuda compile  " + elapse_cuda_compile);
                        System.Console.WriteLine("deep copy in  " + elapse_deep_copy_to);
                        System.Console.WriteLine("cuda kernel   " + elapse_call_kernel);
                    }

                    {
                        var stopwatch_deep_copy_back = new Stopwatch();
                        stopwatch_deep_copy_back.Reset();

                        RUNTIME.CheckHeap();

                        stopwatch_deep_copy_back.Start();

                        buffer.SynchDataStructures();

                        stopwatch_deep_copy_back.Stop();

                        RUNTIME.CheckHeap();

                        var elapse_deep_copy_back = stopwatch_deep_copy_back.Elapsed;
                        if (Campy.Utils.Options.IsOn("jit_trace"))
                        {
                            System.Console.WriteLine("deep copy out " + elapse_deep_copy_back);
                        }
                    }
                }
            }
            catch (Exception e)
            {
                Console.WriteLine(e);
                throw e;
            }
            finally
            {
                if (default(GCHandle) != handle1)
                {
                    handle1.Free();
                }
                if (default(GCHandle) != handle2)
                {
                    handle2.Free();
                }
            }
        }
Ejemplo n.º 11
0
        static unsafe void Part1()
        {
            Cuda.cuInit(0);
            var res = Cuda.cuDeviceGet(out int device, 0);

            if (res != CUresult.CUDA_SUCCESS)
            {
                throw new Exception();
            }
            res = Cuda.cuDeviceGetPCIBusId(out string pciBusId, 100, device);
            if (res != CUresult.CUDA_SUCCESS)
            {
                throw new Exception();
            }
            res = Cuda.cuDeviceGetName(out string name, 100, device);
            if (res != CUresult.CUDA_SUCCESS)
            {
                throw new Exception();
            }
            res = Cuda.cuCtxCreate_v2(out CUcontext cuContext, 0, device);
            if (res != CUresult.CUDA_SUCCESS)
            {
                throw new Exception();
            }
            string       assembly_directory = AssemblyDirectory;
            StreamReader sr     = new StreamReader(assembly_directory + @"/../../Project2/x64/Debug/stuff.ptx");
            string       kernel = sr.ReadToEnd();
            IntPtr       ptr    = Marshal.StringToHGlobalAnsi(kernel);

            int[]    option_values        = new int[] { };
            GCHandle option_values_handle = GCHandle.Alloc(option_values, GCHandleType.Pinned);
            IntPtr   options_values_ptr   = option_values_handle.AddrOfPinnedObject();

            CUjit_option[] options = new CUjit_option[] { };
            res = Cuda.cuModuleLoadDataEx(out CUmodule cuModule, ptr, (uint)0, options, options_values_ptr);
            if (res != CUresult.CUDA_SUCCESS)
            {
                throw new Exception();
            }
            res = Cuda.cuModuleGetFunction(out CUfunction helloWorld, cuModule, "_Z5helloPc");
            if (res != CUresult.CUDA_SUCCESS)
            {
                throw new Exception();
            }
            byte[]   v       = { (byte)'G', (byte)'d', (byte)'k', (byte)'k', (byte)'n', 31, (byte)'v', (byte)'n', (byte)'q', (byte)'k', (byte)'c', 0 };
            GCHandle handle  = GCHandle.Alloc(v, GCHandleType.Pinned);
            IntPtr   pointer = IntPtr.Zero;

            pointer = handle.AddrOfPinnedObject();
            res     = Cuda.cuMemAlloc_v2(out IntPtr dptr, 11 * sizeof(byte));
            if (res != CUresult.CUDA_SUCCESS)
            {
                throw new Exception();
            }
            res = Cuda.cuMemcpyHtoD_v2(dptr, pointer, 11 * sizeof(byte));
            if (res != CUresult.CUDA_SUCCESS)
            {
                throw new Exception();
            }
            IntPtr[] x        = new IntPtr[] { dptr };
            GCHandle handle2  = GCHandle.Alloc(x, GCHandleType.Pinned);
            IntPtr   pointer2 = IntPtr.Zero;

            pointer2 = handle2.AddrOfPinnedObject();
            IntPtr[] kp = new IntPtr[] { pointer2 };
            fixed(IntPtr *kernelParams = kp)
            {
                res = Cuda.cuLaunchKernel(helloWorld,
                                          1, 1, 1,  // grid has one block.
                                          11, 1, 1, // block has 11 threads.
                                          0,        // no shared memory
                                          default(CUstream),
                                          (IntPtr)kernelParams,
                                          (IntPtr)IntPtr.Zero
                                          );
            }

            if (res != CUresult.CUDA_SUCCESS)
            {
                throw new Exception();
            }
            res = Cuda.cuMemcpyDtoH_v2(pointer, dptr, 11 * sizeof(byte));
            if (res != CUresult.CUDA_SUCCESS)
            {
                throw new Exception();
            }
            for (int i = 0; i < 11; ++i)
            {
                System.Console.Write((char)v[i]);
            }
            System.Console.WriteLine();
            Cuda.cuCtxDestroy_v2(cuContext);
        }
Ejemplo n.º 12
0
        static unsafe void Part2()
        {
            Cuda.cuInit(0);
            var res = Cuda.cuDeviceGet(out int device, 0);

            if (res != CUresult.CUDA_SUCCESS)
            {
                throw new Exception();
            }
            res = Cuda.cuDeviceGetPCIBusId(out string pciBusId, 100, device);
            if (res != CUresult.CUDA_SUCCESS)
            {
                throw new Exception();
            }
            res = Cuda.cuDeviceGetName(out string name, 100, device);
            if (res != CUresult.CUDA_SUCCESS)
            {
                throw new Exception();
            }
            res = Cuda.cuCtxCreate_v2(out CUcontext cuContext, 0, device);
            if (res != CUresult.CUDA_SUCCESS)
            {
                throw new Exception();
            }
            string     assembly_directory = AssemblyDirectory;
            FileStream fs      = new FileStream(assembly_directory + @"/../../Project2/x64/Debug/stuff.cu.obj", FileMode.Open);
            var        len     = fs.Length;
            var        gpu_obj = new byte[len];

            fs.Read(gpu_obj, 0, (int)len);

            uint num_ops_link = 5;
            var  op_link      = new CUjit_option[num_ops_link];

            ulong[] op_values_link = new ulong[num_ops_link];

            int size = 1024 * 100;

            op_link[0]        = CUjit_option.CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES;
            op_values_link[0] = (ulong)size;

            op_link[1] = CUjit_option.CU_JIT_INFO_LOG_BUFFER;
            byte[] info_log_buffer        = new byte[size];
            var    info_log_buffer_handle = GCHandle.Alloc(info_log_buffer, GCHandleType.Pinned);
            var    info_log_buffer_intptr = info_log_buffer_handle.AddrOfPinnedObject();

            op_values_link[1] = (ulong)info_log_buffer_intptr;

            op_link[2]        = CUjit_option.CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES;
            op_values_link[2] = (ulong)size;

            op_link[3] = CUjit_option.CU_JIT_ERROR_LOG_BUFFER;
            byte[] error_log_buffer        = new byte[size];
            var    error_log_buffer_handle = GCHandle.Alloc(error_log_buffer, GCHandleType.Pinned);
            var    error_log_buffer_intptr = error_log_buffer_handle.AddrOfPinnedObject();

            op_values_link[3] = (ulong)error_log_buffer_intptr;

            op_link[4]        = CUjit_option.CU_JIT_LOG_VERBOSE;
            op_values_link[4] = (ulong)1;

            var op_values_link_handle = GCHandle.Alloc(op_values_link, GCHandleType.Pinned);
            var op_values_link_intptr = op_values_link_handle.AddrOfPinnedObject();

            res = Cuda.cuLinkCreate_v2(num_ops_link, op_link, op_values_link_intptr, out CUlinkState linkState);
            {
                string info = Marshal.PtrToStringAnsi(info_log_buffer_intptr);
                System.Console.WriteLine(info);
                string error = Marshal.PtrToStringAnsi(error_log_buffer_intptr);
                System.Console.WriteLine(error);
            }

            uint num_ops = 0;

            CUjit_option[] op               = new CUjit_option[0];
            ulong[]        op_values        = new ulong[0];
            var            op_values_handle = GCHandle.Alloc(op_values, GCHandleType.Pinned);
            var            op_values_intptr = op_values_handle.AddrOfPinnedObject();

            var    kernel_handle      = GCHandle.Alloc(gpu_obj, GCHandleType.Pinned);
            IntPtr gpu_bcl_obj_intptr = kernel_handle.AddrOfPinnedObject();

            res = Cuda.cuLinkAddData_v2(linkState, CUjitInputType.CU_JIT_INPUT_OBJECT,
                                        gpu_bcl_obj_intptr, (uint)len,
                                        "", num_ops, op, op_values_intptr);
            {
                string info = Marshal.PtrToStringAnsi(info_log_buffer_intptr);
                System.Console.WriteLine(info);
                string error = Marshal.PtrToStringAnsi(error_log_buffer_intptr);
                System.Console.WriteLine(error);
            }

            IntPtr image;

            res = Cuda.cuLinkComplete(linkState, out image, out ulong sz);
            res = Cuda.cuModuleLoadDataEx(out CUmodule module, image, 0, op, op_values_link_intptr);

            res = Cuda.cuModuleGetFunction(out CUfunction helloWorld, module, "_Z5helloPc");
            if (res != CUresult.CUDA_SUCCESS)
            {
                throw new Exception();
            }
            byte[]   v       = { (byte)'G', (byte)'d', (byte)'k', (byte)'k', (byte)'n', 31, (byte)'v', (byte)'n', (byte)'q', (byte)'k', (byte)'c', 0 };
            GCHandle handle  = GCHandle.Alloc(v, GCHandleType.Pinned);
            IntPtr   pointer = IntPtr.Zero;

            pointer = handle.AddrOfPinnedObject();
            res     = Cuda.cuMemAlloc_v2(out IntPtr dptr, 11 * sizeof(byte));
            if (res != CUresult.CUDA_SUCCESS)
            {
                throw new Exception();
            }
            res = Cuda.cuMemcpyHtoD_v2(dptr, pointer, 11 * sizeof(byte));
            if (res != CUresult.CUDA_SUCCESS)
            {
                throw new Exception();
            }
            IntPtr[] x        = new IntPtr[] { dptr };
            GCHandle handle2  = GCHandle.Alloc(x, GCHandleType.Pinned);
            IntPtr   pointer2 = IntPtr.Zero;

            pointer2 = handle2.AddrOfPinnedObject();
            IntPtr[] kp = new IntPtr[] { pointer2 };
            fixed(IntPtr *kernelParams = kp)
            {
                res = Cuda.cuLaunchKernel(helloWorld,
                                          1, 1, 1,  // grid has one block.
                                          11, 1, 1, // block has 11 threads.
                                          0,        // no shared memory
                                          default(CUstream),
                                          (IntPtr)kernelParams,
                                          (IntPtr)IntPtr.Zero
                                          );
            }

            if (res != CUresult.CUDA_SUCCESS)
            {
                throw new Exception();
            }
            res = Cuda.cuMemcpyDtoH_v2(pointer, dptr, 11 * sizeof(byte));
            if (res != CUresult.CUDA_SUCCESS)
            {
                throw new Exception();
            }
            for (int i = 0; i < 11; ++i)
            {
                System.Console.Write((char)v[i]);
            }
            System.Console.WriteLine();
            Cuda.cuCtxDestroy_v2(cuContext);
        }
Ejemplo n.º 13
0
        static unsafe void Main(string[] args)
        {
            System.Console.WriteLine("1");
            Cuda.cuInit(0);
            System.Console.WriteLine("2");

            // Device api.
            var res = Cuda.cuDeviceGet(out int device, 0);

            if (res != CUresult.CUDA_SUCCESS)
            {
                throw new Exception();
            }
            res = Cuda.cuDeviceGetPCIBusId(out string pciBusId, 100, device);
            if (res != CUresult.CUDA_SUCCESS)
            {
                throw new Exception();
            }
            res = Cuda.cuDeviceGetName(out string name, 100, device);
            if (res != CUresult.CUDA_SUCCESS)
            {
                throw new Exception();
            }

            System.Console.WriteLine("3");

            res = Cuda.cuCtxCreate_v2(out CUcontext cuContext, 0, device);
            if (res != CUresult.CUDA_SUCCESS)
            {
                throw new Exception();
            }
            string cu_kernel      = @"
#include <stdio.h>

__global__
void kern(int * ar)
{
	int i = threadIdx.x;
	if (i < 11)
		ar[i] = ar[i] + 1;
}
";
            string compile_string = @"
nvcc --ptx --gpu-architecture=sm_20 -ccbin ""C:\Program Files(x86)\Microsoft Visual Studio 14.0\VC\bin"" y.cu";

            string kernel = @"
//
// Generated by NVIDIA NVVM Compiler
//
// Compiler Build ID: CL-21373419
// Cuda compilation tools, release 8.0, V8.0.55
// Based on LLVM 3.4svn
//

.version 5.0
.target sm_20
.address_size 64

	// .globl	_Z4kernPi

.visible .entry _Z4kernPi(
	.param .u64 _Z4kernPi_param_0
)
{
	.reg .pred  %p<2>;
	.reg .b32   %r<4>;
	.reg .b64   %rd<5>;


	ld.param.u64    %rd1, [_Z4kernPi_param_0];
	mov.u32     %r1, %tid.x;
	setp.gt.s32	%p1, %r1, 10;
	@%p1 bra    BB0_2;

	cvta.to.global.u64  %rd2, %rd1;
	mul.wide.s32    %rd3, %r1, 4;
	add.s64     %rd4, %rd2, %rd3;
	ld.global.u32   %r2, [%rd4];
	add.s32     %r3, %r2, 1;
	st.global.u32   [%rd4], %r3;

BB0_2:
	ret;
}
";
            IntPtr ptr    = Marshal.StringToHGlobalAnsi(kernel);

            res = Cuda.cuModuleLoadData(out CUmodule cuModule, ptr);
            if (res != CUresult.CUDA_SUCCESS)
            {
                throw new Exception();
            }
            res = Cuda.cuModuleGetFunction(out CUfunction helloWorld, cuModule, "_Z4kernPi");
            if (res != CUresult.CUDA_SUCCESS)
            {
                throw new Exception();
            }
            int[]    v       = { 'G', 'd', 'k', 'k', 'n', (char)31, 'v', 'n', 'q', 'k', 'c' };
            GCHandle handle  = GCHandle.Alloc(v, GCHandleType.Pinned);
            IntPtr   pointer = IntPtr.Zero;

            pointer = handle.AddrOfPinnedObject();
            res     = Cuda.cuMemAlloc_v2(out IntPtr dptr, 11 * sizeof(int));
            if (res != CUresult.CUDA_SUCCESS)
            {
                throw new Exception();
            }
            res = Cuda.cuMemcpyHtoD_v2(dptr, pointer, 11 * sizeof(int));
            if (res != CUresult.CUDA_SUCCESS)
            {
                throw new Exception();
            }

            IntPtr[] x        = new IntPtr[] { dptr };
            GCHandle handle2  = GCHandle.Alloc(x, GCHandleType.Pinned);
            IntPtr   pointer2 = IntPtr.Zero;

            pointer2 = handle2.AddrOfPinnedObject();

            IntPtr[] kp = new IntPtr[] { pointer2 };
            fixed(IntPtr *kernelParams = kp)
            {
                res = Cuda.cuLaunchKernel(helloWorld,
                                          1, 1, 1,  // grid has one block.
                                          11, 1, 1, // block has 11 threads.
                                          0,        // no shared memory
                                          default(CUstream),
                                          (IntPtr)kernelParams,
                                          (IntPtr)IntPtr.Zero
                                          );
            }

            if (res != CUresult.CUDA_SUCCESS)
            {
                throw new Exception();
            }
            res = Cuda.cuMemcpyDtoH_v2(pointer, dptr, 11 * sizeof(int));
            for (int i = 0; i < 11; ++i)
            {
                System.Console.Write(Convert.ToChar(v[i]));
            }
            System.Console.WriteLine();
            if (res != CUresult.CUDA_SUCCESS)
            {
                throw new Exception();
            }
            Cuda.cuCtxDestroy_v2(cuContext);
        }
Ejemplo n.º 14
0
 public Buffers()
 {
     _asm = new Asm();
     Cuda.cuInit(0);
     Cuda.cuCtxCreate_v2(out _pctx, (uint)Swigged.Cuda.CUctx_flags.CU_CTX_MAP_HOST, 0);
 }