Example #1
0
        public static void cuParamSetv(CUfunction hfunc, int offset, Object obj)
        {
            Wrap(() =>
            {
                try
                {
                    obj.AssertNotNull().GetType().IsValueType.AssertTrue();
                    var size = Marshal.SizeOf(obj);
                    var ptr = Marshal.AllocHGlobal(size);

                    try
                    {
                        Marshal.StructureToPtr(obj, ptr, false);
                        var error = nativeParamSetv(hfunc, offset, ptr, (uint)size);
                        if (error != CUresult.CUDA_SUCCESS) throw new CudaException(error);
                    }
                    finally
                    {
                        Marshal.FreeHGlobal(ptr);
                    }
                }
                catch (CudaException)
                {
                    throw;
                }
                catch (DllNotFoundException dnfe)
                {
                    throw new CudaException(CudaError.NoDriver, dnfe);
                }
                catch (Exception e)
                {
                    throw new CudaException(CudaError.Unknown, e);
                }
            });
        }
Example #2
0
        public JittedFunction(CUfunction handle, String name)
        {
            CudaDriver.Ensure();
            Handle = handle.AssertThat(h => h.IsNotNull);
            Name = name ?? "N/A";

            MaxThreadsPerBlock = nvcuda.cuFuncGetAttribute(CUfunction_attribute.MaxThreadsPerBlock, this);
            SharedSizeBytes = nvcuda.cuFuncGetAttribute(CUfunction_attribute.SharedSizeBytes, this);
            ConstSizeBytes = nvcuda.cuFuncGetAttribute(CUfunction_attribute.ConstSizeBytes, this);
            LocalSizeBytes = nvcuda.cuFuncGetAttribute(CUfunction_attribute.LocalSizeBytes, this);
            NumRegs = nvcuda.cuFuncGetAttribute(CUfunction_attribute.NumRegs, this);
            PtxVersion = (HardwareIsa)nvcuda.cuFuncGetAttribute(CUfunction_attribute.PtxVersion, this);
            BinaryVersion = (HardwareIsa)nvcuda.cuFuncGetAttribute(CUfunction_attribute.BinaryVersion, this);
        }
Example #3
0
 public static void cuParamSetf(CUfunction hfunc, int offset, float value)
 {
     Wrap(() =>
     {
         try
         {
             var error = nativeParamSetf(hfunc, offset, value);
             if (error != CUresult.CUDA_SUCCESS) throw new CudaException(error);
         }
         catch (CudaException)
         {
             throw;
         }
         catch (DllNotFoundException dnfe)
         {
             throw new CudaException(CudaError.NoDriver, dnfe);
         }
         catch (Exception e)
         {
             throw new CudaException(CudaError.Unknown, e);
         }
     });
 }
Example #4
0
 public static void cuLaunchGrid(CUfunction f, dim3 dim)
 {
     // wow here we ain't able to specify the Z dimension
     if (dim.Z != 1) throw new CudaException(CudaError.InvalidGridDim);
     cuLaunchGrid(f, dim.X, dim.Y);
 }
Example #5
0
        public static void cuLaunchGrid(CUfunction f, int grid_width, int grid_height)
        {
            Wrap(() =>
            {
                try
                {
                    // if we don't verify this here, we'll get a strange message from the driver
                    var caps = CudaDevice.Current.Caps.GridCaps;
                    var valid_grid_dim = caps.MaxGridDim >= new dim3(grid_width, grid_height, 1);
                    if (!valid_grid_dim) throw new CudaException(CudaError.InvalidGridDim);

                    var error = nativeLaunchGrid(f, grid_width, grid_height);
                    if (error != CUresult.CUDA_SUCCESS) throw new CudaException(error);
                }
                catch (CudaException)
                {
                    throw;
                }
                catch (DllNotFoundException dnfe)
                {
                    throw new CudaException(CudaError.NoDriver, dnfe);
                }
                catch (Exception e)
                {
                    throw new CudaException(CudaError.Unknown, e);
                }
            });
        }
Example #6
0
 // http://developer.download.nvidia.com/compute/cuda/3_1/toolkit/docs/online/group__CUEXEC_g1a99f308b2f1655df734eb62452fafd3.html
 private static extern CUresult nativeLaunchGrid(CUfunction f, int grid_width, int grid_height);
Example #7
0
 public static void cuFuncSetBlockShape(CUfunction hfunc, dim3 dim)
 {
     cuFuncSetBlockShape(hfunc, dim.X, dim.Y, dim.Z);
 }
Example #8
0
        public static void cuFuncSetBlockShape(CUfunction hfunc, int x, int y, int z)
        {
            Wrap(() =>
            {
                try
                {
                    // if we don't verify this here, we'll get a strange message from the driver
                    var caps = CudaDevice.Current.Caps.GridCaps;
                    var valid_block_dim = caps.MaxBlockDim >= new dim3(x, y, z);
                    if (!valid_block_dim) throw new CudaException(CudaError.InvalidBlockDim);

                    var error = nativeFuncSetBlockShape(hfunc, x, y, z);
                    if (error != CUresult.CUDA_SUCCESS) throw new CudaException(error);
                }
                catch (CudaException)
                {
                    throw;
                }
                catch (DllNotFoundException dnfe)
                {
                    throw new CudaException(CudaError.NoDriver, dnfe);
                }
                catch (Exception e)
                {
                    throw new CudaException(CudaError.Unknown, e);
                }
            });
        }
Example #9
0
        public CudaProvider(string cudaKernelPath, bool stochastic, int memoryCacheSize)
        {
            _stochastic = stochastic;
            _cache      = new DeviceMemory(memoryCacheSize);
            _cuda       = new CudaContext();
            _kernel     = new KernelModule(_cuda, cudaKernelPath);
            _blas       = new CudaBlas(AtomicsMode.Allowed);

            _cuda.SetCurrent();

            _pointwiseMultiply = _kernel.LoadFunction("PointwiseMultiply");
            _addInPlace        = _kernel.LoadFunction("AddInPlace");
            _subtractInPlace   = _kernel.LoadFunction("SubtractInPlace");
            _addToEachRow      = _kernel.LoadFunction("AddToEachRow");
            _addToEachColumn   = _kernel.LoadFunction("AddToEachColumn");
            _tanh              = _kernel.LoadFunction("TanH");
            _tanhDerivative    = _kernel.LoadFunction("TanHDerivative");
            _sigmoid           = _kernel.LoadFunction("Sigmoid");
            _sigmoidDerivative = _kernel.LoadFunction("SigmoidDerivative");
            _sumRows           = _kernel.LoadFunction("SumRows");
            _relu              = _kernel.LoadFunction("RELU");
            _reluDerivative    = _kernel.LoadFunction("RELUDerivative");
            _memClear          = _kernel.LoadFunction("MemClear");
            _sumColumns        = _kernel.LoadFunction("SumColumns");
            _pointwiseDivide   = _kernel.LoadFunction("PointwiseDivide");
            _sqrt              = _kernel.LoadFunction("Sqrt");
            _findMinAndMax     = _kernel.LoadFunction("FindMinAndMax");
            _findSum           = _kernel.LoadFunction("FindSum");
            _findStdDev        = _kernel.LoadFunction("FindStdDev");
            _constrain         = _kernel.LoadFunction("Constrain");
            _pow                    = _kernel.LoadFunction("Pow");
            _diagonal               = _kernel.LoadFunction("Diagonal");
            _l1Regularisation       = _kernel.LoadFunction("L1Regularisation");
            _leakyRelu              = _kernel.LoadFunction("LeakyRELU");
            _leakyReluDerivative    = _kernel.LoadFunction("LeakyRELUDerivative");
            _pointwiseDivideRows    = _kernel.LoadFunction("PointwiseDivideRows");
            _pointwiseDivideColumns = _kernel.LoadFunction("PointwiseDivideColumns");
            _splitRows              = _kernel.LoadFunction("SplitRows");
            _splitColumns           = _kernel.LoadFunction("SplitColumns");
            _concatRows             = _kernel.LoadFunction("ConcatRows");
            _concatColumns          = _kernel.LoadFunction("ConcatColumns");
            _euclideanDistance      = _kernel.LoadFunction("EuclideanDistance");
            _manhattanDistance      = _kernel.LoadFunction("ManhattanDistance");
            _abs                    = _kernel.LoadFunction("Abs");
            _normalise              = _kernel.LoadFunction("Normalise");
            _softmaxVector          = _kernel.LoadFunction("SoftmaxVector");
            _multiEuclidean         = _kernel.LoadFunction("MultiEuclideanDistance");
            _multiManhattan         = _kernel.LoadFunction("MultiManhattanDistance");
            _log                    = _kernel.LoadFunction("Log");
            _vectorAdd              = _kernel.LoadFunction("VectorAdd");
            _vectorCopyRandom       = _kernel.LoadFunction("VectorCopyRandom");
            _copyToMatrix           = _kernel.LoadFunction("CopyToMatrix");
            _vectorSplit            = _kernel.LoadFunction("VectorSplit");
            _tensorConvertToVector  = _kernel.LoadFunction("TensorConvertToVector");
            _tensorConvertToMatrix  = _kernel.LoadFunction("TensorConvertToMatrix");
            _tensorAddPadding       = _kernel.LoadFunction("TensorAddPadding");
            _tensorRemovePadding    = _kernel.LoadFunction("TensorRemovePadding");
            _tensorIm2Col           = _kernel.LoadFunction("TensorIm2Col");
            _softmaxDerivative      = _kernel.LoadFunction("SoftmaxDerivative");
            _reverse                = _kernel.LoadFunction("Reverse");
            _rotate                 = _kernel.LoadFunction("Rotate");
            _tensorMaxPool          = _kernel.LoadFunction("TensorMaxPool");
            _tensorReverseMaxPool   = _kernel.LoadFunction("TensorReverseMaxPool");
            _tensorReverseIm2Col    = _kernel.LoadFunction("TensorReverseIm2Col");
        }
Example #10
0
 // http://developer.download.nvidia.com/compute/cuda/3_1/toolkit/docs/online/group__CUEXEC_gf4466f8fe7043de8c97137990bb2e1e9.html
 private static extern CUresult nativeParamSetv(CUfunction hfunc, int offset, IntPtr ptr, uint numbytes);
Example #11
0
 // http://developer.download.nvidia.com/compute/cuda/3_1/toolkit/docs/online/group__CUEXEC_g91d75e10ed90df3fd3ecf2488f2cb27f.html
 private static extern CUresult nativeFuncSetCacheConfig(CUfunction hfunc, CUfunc_cache config);
Example #12
0
 public static void cuFuncSetSharedSize(CUfunction hfunc, uint bytes)
 {
     Wrap(() =>
     {
         try
         {
             var error = nativeFuncSetSharedSize(hfunc, bytes);
             if (error != CUresult.CUDA_SUCCESS) throw new CudaException(error);
         }
         catch (CudaException)
         {
             throw;
         }
         catch (DllNotFoundException dnfe)
         {
             throw new CudaException(CudaError.NoDriver, dnfe);
         }
         catch (Exception e)
         {
             throw new CudaException(CudaError.Unknown, e);
         }
     });
 }
Example #13
0
 // http://developer.download.nvidia.com/compute/cuda/3_1/toolkit/docs/online/group__CUEXEC_g65bc1dcf1127400d8f7ff935edbd333c.html
 private static extern CUresult nativeFuncSetSharedSize(CUfunction hfunc, uint bytes);
Example #14
0
 public static int cuFuncGetAttribute(CUfunction_attribute attrib, CUfunction hfunc)
 {
     return Wrap(() =>
     {
         try
         {
             int i;
             var error = nativeFuncGetAttribute(out i, attrib, hfunc);
             if (error != CUresult.CUDA_SUCCESS) throw new CudaException(error);
             return i;
         }
         catch (CudaException)
         {
             throw;
         }
         catch (DllNotFoundException dnfe)
         {
             throw new CudaException(CudaError.NoDriver, dnfe);
         }
         catch (Exception e)
         {
             throw new CudaException(CudaError.Unknown, e);
         }
     });
 }
Example #15
0
 // http://developer.download.nvidia.com/compute/cuda/3_1/toolkit/docs/online/group__CUEXEC_g7bde0e4a4ce32ce7460348e01a91f45f.html
 private static extern CUresult nativeFuncGetAttribute(out int pi, CUfunction_attribute attrib, CUfunction hfunc);
Example #16
0
 // http://developer.download.nvidia.com/compute/cuda/3_1/toolkit/docs/online/group__CUEXEC_g1946eac8c4d9d74f6aba01b0aab2c3dd.html
 private static extern CUresult nativeParamSetSize(CUfunction hfunc, uint numbytes);
Example #17
0
 // http://developer.download.nvidia.com/compute/cuda/3_1/toolkit/docs/online/group__CUMODULE_ge18a9f0d853ae3a96a38416a0671606b.html
 private static extern CUresult nativeModuleGetFunction(out CUfunction hfunc, CUmodule hmod, String name);
Example #18
0
 // http://developer.download.nvidia.com/compute/cuda/3_1/toolkit/docs/online/group__CUEXEC_g27981d94ac79ac7ebe8590a858785b3b.html
 private static extern CUresult nativeParamSetf(CUfunction hfunc, int offset, float value);
Example #19
0
 public JittedFunction(CUfunction handle)
     : this(handle, null)
 {
 }
Example #20
0
        public static void For(int number_of_threads, SimpleKernel simpleKernel)
        {
            if (Campy.Utils.Options.IsOn("import-only"))
            {
                JustImport(simpleKernel);
                return;
            }

            GCHandle handle1 = default(GCHandle);
            GCHandle handle2 = default(GCHandle);

            try
            {
                unsafe
                {
                    System.Reflection.MethodInfo method_info = simpleKernel.Method;
                    String kernel_assembly_file_name         = method_info.DeclaringType.Assembly.Location;
                    Mono.Cecil.ModuleDefinition md           = Campy.Meta.StickyReadMod.StickyReadModule(
                        kernel_assembly_file_name, new ReaderParameters {
                        ReadSymbols = true
                    });
                    MethodReference method_reference = md.ImportReference(method_info);

                    CUfunction ptr_to_kernel = default(CUfunction);
                    CUmodule   module        = default(CUmodule);

                    Campy.Utils.TimePhase.Time("compile     ", () =>
                    {
                        IntPtr image = Singleton._compiler.Compile(method_reference, simpleKernel.Target);
                        module       = Singleton._compiler.SetModule(method_reference, image);
                        Singleton._compiler.StoreJits(module);
                        ptr_to_kernel = Singleton._compiler.GetCudaFunction(method_reference, module);
                    });

                    RUNTIME.BclCheckHeap();

                    BUFFERS buffer = Singleton.Buffer;
                    IntPtr  kernel_target_object = IntPtr.Zero;

                    Campy.Utils.TimePhase.Time("deep copy ", () =>
                    {
                        int count = simpleKernel.Method.GetParameters().Length;
                        var bb    = Singleton._compiler.GetBasicBlock(method_reference);
                        if (bb.HasThis)
                        {
                            count++;
                        }
                        if (!(count == 1 || count == 2))
                        {
                            throw new Exception("Expecting at least one parameter for kernel.");
                        }

                        if (bb.HasThis)
                        {
                            kernel_target_object = buffer.AddDataStructure(simpleKernel.Target);
                        }
                    });

                    Campy.Utils.TimePhase.Time("kernel cctor set up", () =>
                    {
                        // For each cctor, run on GPU.
                        // Construct dependency graph of methods.
                        List <MethodReference> order_list = COMPILER.Singleton.ConstructCctorOrder();

                        // Finally, call cctors.
                        foreach (var bb in order_list)
                        {
                            if (Campy.Utils.Options.IsOn("trace-cctors"))
                            {
                                System.Console.WriteLine("Executing cctor "
                                                         + bb.FullName);
                            }
                            var cctor = Singleton._compiler.GetCudaFunction(bb, module);

                            var res = new CUresult(cudaError_enum.CUDA_SUCCESS);
                            Campy.Utils.CudaHelpers.MakeLinearTiling(1,
                                                                     out Campy.Utils.CudaHelpers.dim3 tile_size, out Campy.Utils.CudaHelpers.dim3 tiles);

                            res = Functions.cuLaunchKernel(
                                cctor,
                                tiles.x, tiles.y, tiles.z,             // grid has one block.
                                tile_size.x, tile_size.y, tile_size.z, // n threads.
                                0,                                     // no shared memory
                                default(CUstream),
                                (IntPtr)IntPtr.Zero,
                                (IntPtr)IntPtr.Zero
                                );

                            CudaHelpers.CheckCudaError(res);
                            res = Functions.cuCtxSynchronize(); // Make sure it's copied back to host.
                            CudaHelpers.CheckCudaError(res);
                        }
                    });

                    if (Campy.Utils.Options.IsOn("trace-cctors"))
                    {
                        System.Console.WriteLine("Done with cctors");
                    }

                    Campy.Utils.TimePhase.Time("kernel call ", () =>
                    {
                        IntPtr[] parm1 = new IntPtr[1];
                        IntPtr[] parm2 = new IntPtr[1];

                        parm1[0] = kernel_target_object;
                        parm2[0] = buffer.New(BUFFERS.SizeOf(typeof(int)));

                        IntPtr[] x1     = parm1;
                        handle1         = GCHandle.Alloc(x1, GCHandleType.Pinned);
                        IntPtr pointer1 = handle1.AddrOfPinnedObject();

                        IntPtr[] x2     = parm2;
                        handle2         = GCHandle.Alloc(x2, GCHandleType.Pinned);
                        IntPtr pointer2 = handle2.AddrOfPinnedObject();

                        IntPtr[] kp = new IntPtr[] { pointer1, pointer2 };
                        var res     = new CUresult(cudaError_enum.CUDA_SUCCESS);
                        fixed(IntPtr * kernelParams = kp)
                        {
                            Campy.Utils.CudaHelpers.MakeLinearTiling(number_of_threads,
                                                                     out Campy.Utils.CudaHelpers.dim3 tile_size, out Campy.Utils.CudaHelpers.dim3 tiles);

                            //MakeLinearTiling(1, out dim3 tile_size, out dim3 tiles);

                            res = Functions.cuLaunchKernel(
                                ptr_to_kernel,
                                tiles.x, tiles.y, tiles.z,             // grid has one block.
                                tile_size.x, tile_size.y, tile_size.z, // n threads.
                                0,                                     // no shared memory
                                default(CUstream),
                                (IntPtr)kernelParams,
                                (IntPtr)IntPtr.Zero
                                );
                        }
Example #21
0
 public static void cuFuncSetCacheConfig(CUfunction hfunc, CUfunc_cache config)
 {
     Wrap(() =>
     {
         try
         {
             var error = nativeFuncSetCacheConfig(hfunc, config);
             if (error != CUresult.CUDA_SUCCESS) throw new CudaException(error);
         }
         catch (CudaException)
         {
             throw;
         }
         catch (DllNotFoundException dnfe)
         {
             throw new CudaException(CudaError.NoDriver, dnfe);
         }
         catch (Exception e)
         {
             throw new CudaException(CudaError.Unknown, e);
         }
     });
 }
Example #22
0
        public static unsafe void Gpu()
        {
            Cuda.CUcontext ctx = new CUcontext(IntPtr.Zero);
            try
            {
                Cuda.Functions.cuInit(0);

                int count = 0;
                var res   = Cuda.Functions.cuDeviceGetCount(ref count);
                if (res.Value != cudaError_enum.CUDA_SUCCESS)
                {
                    throw new Exception();
                }

                for (int deviceID = 0; deviceID < count; ++deviceID)
                {
                    CUdevice device = default(CUdevice);
                    res = Cuda.Functions.cuDeviceGet(ref device, deviceID);
                    if (res.Value != cudaError_enum.CUDA_SUCCESS)
                    {
                        throw new Exception();
                    }
                    byte[] name = new byte[100];
                    fixed(void *p = name)
                    {
                        res = Cuda.Functions.cuDeviceGetName((IntPtr)p, 100, device);
                    }
                    if (res.Value != cudaError_enum.CUDA_SUCCESS)
                    {
                        throw new Exception();
                    }
                    System.Text.ASCIIEncoding enc = new System.Text.ASCIIEncoding();
                    string    n     = enc.GetString(name).Replace("\0", "");
                    CUdevprop props = default(CUdevprop);
                    res = Cuda.Functions.cuDeviceGetProperties(ref props, device);
                    if (res.Value != cudaError_enum.CUDA_SUCCESS)
                    {
                        throw new Exception();
                    }

                    System.Console.WriteLine("--------");
                    System.Console.WriteLine(Helper.OutProps(props, n));
                }

                {
                    CUdevice device = default(CUdevice);
                    res = Cuda.Functions.cuDeviceGet(ref device, 0);
                    if (res.Value != cudaError_enum.CUDA_SUCCESS)
                    {
                        throw new Exception();
                    }
                    CUcontext cuContext = default(CUcontext);
                    res = Cuda.Functions.cuCtxCreate_v2(ref cuContext, 0, device);
                    if (res.Value != cudaError_enum.CUDA_SUCCESS)
                    {
                        throw new Exception();
                    }
                    string   kernel   = @"
//
// Generated by NVIDIA NVVM Compiler
//
// Compiler Build ID: CL-21373419
// Cuda compilation tools, release 8.0, V8.0.55
// Based on LLVM 3.4svn
//

                    .version 5.0
                    .target sm_20
                    .address_size 64

    // .globl   _Z4kernPi

                    .visible .entry _Z4kernPi(
                .param .u64 _Z4kernPi_param_0
                )
            {
                .reg .pred  %p<2>;
                .reg .b32   %r<4>;
                .reg .b64   %rd<5>;


                ld.param.u64    %rd1, [_Z4kernPi_param_0];
                mov.u32     %r1, %tid.x;
                setp.gt.s32 %p1, %r1, 10;
                @%p1 bra    BB0_2;

                cvta.to.global.u64  %rd2, %rd1;
                mul.wide.s32    %rd3, %r1, 4;
                add.s64     %rd4, %rd2, %rd3;
                ld.global.u32   %r2, [%rd4];
                add.s32     %r3, %r2, 1;
                st.global.u32   [%rd4], %r3;

BB0_2:
                ret;
            }
            ";
                    IntPtr   ptr      = Marshal.StringToHGlobalAnsi(kernel);
                    CUmodule cuModule = default(CUmodule);
                    res = Functions.cuModuleLoadData(ref cuModule, ptr);
                    if (res.Value != cudaError_enum.CUDA_SUCCESS)
                    {
                        throw new Exception();
                    }
                    CUfunction helloWorld = default(CUfunction);
                    var        fun        = Marshal.StringToHGlobalAnsi("_Z4kernPi");
                    res = Functions.cuModuleGetFunction(ref helloWorld, cuModule, fun);
                    if (res.Value != cudaError_enum.CUDA_SUCCESS)
                    {
                        throw new Exception();
                    }
                    int[]    v       = { 'G', 'd', 'k', 'k', 'n', (char)31, 'v', 'n', 'q', 'k', 'c' };
                    GCHandle handle  = GCHandle.Alloc(v, GCHandleType.Pinned);
                    IntPtr   pointer = IntPtr.Zero;
                    pointer = handle.AddrOfPinnedObject();
                    CUdeviceptr dptr = default(CUdeviceptr);
                    res = Functions.cuMemAlloc_v2(ref dptr, 11 * sizeof(int));
                    if (res.Value != cudaError_enum.CUDA_SUCCESS)
                    {
                        throw new Exception();
                    }
                    res = Functions.cuMemcpyHtoD_v2(dptr, pointer, 11 * sizeof(int));
                    if (res.Value != cudaError_enum.CUDA_SUCCESS)
                    {
                        throw new Exception();
                    }

                    IntPtr[] x        = new IntPtr[] { (IntPtr)dptr.Value };
                    GCHandle handle2  = GCHandle.Alloc(x, GCHandleType.Pinned);
                    IntPtr   pointer2 = handle2.AddrOfPinnedObject();
                    IntPtr[] kp       = new IntPtr[] { pointer2 };
                    fixed(IntPtr *kernelParams = kp)
                    {
                        res = Functions.cuLaunchKernel(helloWorld,
                                                       1, 1, 1,  // grid has one block.
                                                       11, 1, 1, // block has 11 threads.
                                                       0,        // no shared memory
                                                       default(CUstream),
                                                       (IntPtr)kernelParams,
                                                       (IntPtr)IntPtr.Zero
                                                       );
                    }
                    if (res.Value != cudaError_enum.CUDA_SUCCESS)
                    {
                        throw new Exception();
                    }
                    res = Functions.cuMemcpyDtoH_v2(pointer, dptr, 11 * sizeof(int));
                    if (res.Value != cudaError_enum.CUDA_SUCCESS)
                    {
                        throw new Exception();
                    }
                    Cuda.Functions.cuCtxDestroy_v2(cuContext);
                    var aofc = v.Select(c => (char)c).ToArray();
                    System.Console.WriteLine("Result = " + new string(aofc));
                }

                {
                    CUdevice device = default(CUdevice);
                    res = Cuda.Functions.cuDeviceGet(ref device, 0);
                    if (res.Value != cudaError_enum.CUDA_SUCCESS)
                    {
                        throw new Exception();
                    }
                    CUcontext cuContext = default(CUcontext);
                    res = Cuda.Functions.cuCtxCreate_v2(ref cuContext, 0, device);
                    if (res.Value != cudaError_enum.CUDA_SUCCESS)
                    {
                        throw new Exception();
                    }
                    string path = Assembly.GetAssembly(typeof(Program)).Location;
                    path = Path.GetDirectoryName(path);
                    path = Path.GetFullPath(path + @"\..\..\..\..");
                    path = path + @"\cuda\x64\Debug\vector-sum.ptx";
                    StreamReader sr     = new StreamReader(path);
                    String       ptx    = sr.ReadToEnd();
                    IntPtr       ptr    = Marshal.StringToHGlobalAnsi(ptx);
                    CUmodule     module = default(CUmodule);
                    res = Cuda.Functions.cuModuleLoadData(ref module, ptr);
                    if (res.Value != cudaError_enum.CUDA_SUCCESS)
                    {
                        throw new Exception();
                    }
                    CUfunction helloWorld = default(CUfunction);
                    var        fun        = Marshal.StringToHGlobalAnsi("VectorSumParallel");
                    res = Cuda.Functions.cuModuleGetFunction(ref helloWorld, module, fun);
                    if (res.Value != cudaError_enum.CUDA_SUCCESS)
                    {
                        throw new Exception();
                    }
                    int         n   = 3;
                    int[]       a   = Enumerable.Range(1, 3).Select(v => v * 3).ToArray();
                    int[]       b   = Enumerable.Range(1, 3).Select(v => v * 2).ToArray();
                    int[]       c   = new int[3];
                    CUdeviceptr d_a = default(CUdeviceptr);
                    CUdeviceptr d_b = default(CUdeviceptr);
                    CUdeviceptr d_c = default(CUdeviceptr);
                    res = Cuda.Functions.cuMemAlloc_v2(ref d_a, n * sizeof(int));
                    if (res.Value != cudaError_enum.CUDA_SUCCESS)
                    {
                        throw new Exception();
                    }
                    res = Cuda.Functions.cuMemAlloc_v2(ref d_b, n * sizeof(int));
                    if (res.Value != cudaError_enum.CUDA_SUCCESS)
                    {
                        throw new Exception();
                    }
                    res = Cuda.Functions.cuMemAlloc_v2(ref d_c, n * sizeof(int));
                    if (res.Value != cudaError_enum.CUDA_SUCCESS)
                    {
                        throw new Exception();
                    }
                    var ha = GCHandle.Alloc(a, GCHandleType.Pinned);
                    var hb = GCHandle.Alloc(b, GCHandleType.Pinned);
                    var hc = GCHandle.Alloc(c, GCHandleType.Pinned);
                    var pa = ha.AddrOfPinnedObject();
                    var pb = hb.AddrOfPinnedObject();
                    var pc = hc.AddrOfPinnedObject();
                    res = Cuda.Functions.cuMemcpyHtoD_v2(d_a, pa, sizeof(int) * n);
                    if (res.Value != cudaError_enum.CUDA_SUCCESS)
                    {
                        throw new Exception();
                    }
                    res = Cuda.Functions.cuMemcpyHtoD_v2(d_b, pb, sizeof(int) * n);
                    if (res.Value != cudaError_enum.CUDA_SUCCESS)
                    {
                        throw new Exception();
                    }

                    IntPtr[] xa       = new IntPtr[] { (IntPtr)d_a.Value };
                    GCHandle handlea  = GCHandle.Alloc(xa, GCHandleType.Pinned);
                    IntPtr   pointera = handlea.AddrOfPinnedObject();

                    IntPtr[] xb       = new IntPtr[] { (IntPtr)d_b.Value };
                    GCHandle handleb  = GCHandle.Alloc(xb, GCHandleType.Pinned);
                    IntPtr   pointerb = handleb.AddrOfPinnedObject();

                    IntPtr[] xc       = new IntPtr[] { (IntPtr)d_c.Value };
                    GCHandle handlec  = GCHandle.Alloc(xc, GCHandleType.Pinned);
                    IntPtr   pointerc = handlec.AddrOfPinnedObject();

                    IntPtr[] xn       = new IntPtr[] { (IntPtr)n };
                    GCHandle handlen  = GCHandle.Alloc(xn, GCHandleType.Pinned);
                    IntPtr   pointern = handlen.AddrOfPinnedObject();

                    IntPtr[] kp = new IntPtr[] { pointera, pointerb, pointerc, pointern };
                    fixed(IntPtr *kernelParams = kp)
                    {
                        res = Cuda.Functions.cuLaunchKernel(
                            helloWorld,
                            1, 1, 1,       // grid has one block.
                            (uint)n, 1, 1, // block has 3 threads.
                            0,             // no shared memory
                            default(CUstream),
                            (IntPtr)kernelParams,
                            (IntPtr)IntPtr.Zero
                            );
                    }
                    if (res.Value != cudaError_enum.CUDA_SUCCESS)
                    {
                        throw new Exception();
                    }
                    res = Cuda.Functions.cuMemcpyDtoH_v2(hc.AddrOfPinnedObject(), d_c, n * sizeof(int));
                    if (res.Value != cudaError_enum.CUDA_SUCCESS)
                    {
                        throw new Exception();
                    }
                    Cuda.Functions.cuCtxDestroy_v2(cuContext);
                    System.Console.WriteLine(String.Join(" ", c));
                }
            }
            finally
            {
            }
        }
Example #23
0
 // http://developer.download.nvidia.com/compute/cuda/3_1/toolkit/docs/online/group__CUEXEC_g3196abfe0d52f6806eced043ea1e1fb4.html
 private static extern CUresult nativeFuncSetBlockShape(CUfunction hfunc, int x, int y, int z);
Example #24
0
        public override void DoLayout()
        {
            CUdeviceptr p1 = new CUdeviceptr();

            CUDADriver.cuMemAlloc(ref p1, 1 << 10);
            byte[] b = new byte[1 << 10];
            CUDADriver.cuMemcpyHtoD(p1, b, (uint)b.Length);

            CUfunction func = new CUfunction();
            CUResult   res;

            int nnodes = (int)Network.VertexCount * 2;
            int blocks = 32;

            if (nnodes < 1024 * blocks)
            {
                nnodes = 1024 * blocks;
            }
            while ((nnodes & (prop.SIMDWidth - 1)) != 0)
            {
                nnodes++;
            }
            nnodes--;

            //float dtime = 0.025f;  float dthf = dtime * 0.5f;
            //float epssq = 0.05f * 0.05f;
            //float itolsq = 1.0f / (0.5f * 0.5f);

            CUDADriver.cuModuleGetFunction(ref func, mod, "dummy");

            // Float4[] data = new Float4[100];
            CUdeviceptr ptr = new CUdeviceptr();

            //CUDADriver.cuMemAlloc(ref ptr, (uint) 100 * System.Runtime.InteropServices.Marshal.SizeOf(Float4));
            CUDADriver.cuParamSeti(func, 0, (uint)ptr.Pointer);
            CUDADriver.cuParamSetSize(func, 4);

            res = CUDADriver.cuLaunch(func);
            if (res != CUResult.Success)
            {
                Logger.AddMessage(LogEntryType.Warning, "CUDA Error in dummy function: " + res.ToString());
            }

            // InitializationKernel<<<1, 1>>>();
            CUDADriver.cuModuleGetFunction(ref func, mod, "InitializationKernel");
            res = CUDADriver.cuLaunch(func);
            if (res != CUResult.Success)
            {
                Logger.AddMessage(LogEntryType.Warning, "CUDA Error in InitializationKernel: " + res.ToString());
            }

            // BoundingBoxKernel<<<blocks * FACTOR1, THREADS1>>>();
            CUDADriver.cuModuleGetFunction(ref func, mod, "BoundingBoxKernel: " + res.ToString());
            CUDADriver.cuLaunch(func);
            if (res != CUResult.Success)
            {
                Logger.AddMessage(LogEntryType.Warning, "CUDA Error in BoundingBoxKernel: " + res.ToString());
            }

            // TreeBuildingKernel<<<blocks * FACTOR2, THREADS2>>>();
            CUDADriver.cuModuleGetFunction(ref func, mod, "TreeBuildingKernel: " + res.ToString());
            CUDADriver.cuLaunch(func);
            if (res != CUResult.Success)
            {
                Logger.AddMessage(LogEntryType.Warning, "CUDA Error in TreeBuildingKernel: " + res.ToString());
            }

            // SummarizationKernel<<<blocks * FACTOR3, THREADS3>>>();
            CUDADriver.cuModuleGetFunction(ref func, mod, "SummarizationKernel: " + res.ToString());
            CUDADriver.cuLaunch(func);
            if (res != CUResult.Success)
            {
                Logger.AddMessage(LogEntryType.Warning, "CUDA Error in SummarizationKernel: " + res.ToString());
            }

            // ForceCalculationKernel<<<blocks * FACTOR5, THREADS5>>>();
            CUDADriver.cuModuleGetFunction(ref func, mod, "ForceCalculationKernel: " + res.ToString());
            CUDADriver.cuLaunch(func);
            if (res != CUResult.Success)
            {
                Logger.AddMessage(LogEntryType.Warning, "CUDA Error in ForceCalculationKernel: " + res.ToString());
            }

            // IntegrationKernel<<<blocks * FACTOR6, THREADS6>>>();
            CUDADriver.cuModuleGetFunction(ref func, mod, "IntegrationKernel");
            CUDADriver.cuLaunch(func);
            if (res != CUResult.Success)
            {
                Logger.AddMessage(LogEntryType.Warning, "CUDA Error in IntegrationKernel: " + res.ToString());
            }
        }