C# (CSharp) CudaContext.GetDeviceComputeCapability Examples

Programming Language: C# (CSharp)

Class/Type: CudaContext

Method/Function: GetDeviceComputeCapability

Examples at hotexamples.com: 4

C# (CSharp) CudaContext.GetDeviceComputeCapability - 4 examples found. These are the top rated real world C# (CSharp) examples of CudaContext.GetDeviceComputeCapability extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

LoadKernelPTX(26)

Dispose(24)

LoadKernel(24)

GetDeviceInfo(24)

GetDeviceCount(19)

GetMaxGflopsDeviceId(15)

LoadModule(14)

LoadModulePTX(14)

GetDeviceName(10)

AllocateMemory(9)

CopyToDevice(8)

FreeMemory(7)

CopyToHost(6)

GetDeviceComputeCapability(4)

GetDirectXDevices(4)

GetFreeDeviceMemorySize(4)

ClearMemory(4)

LoadKernelFatBin(3)

EnablePeerAccess(1)

AllocateMemoryFor(1)

DeviceCanAccessPeer(1)

GetMaxGflopsDevice(1)

CreateOpenGLContext(1)

Example #1

Show file

        // General GPU Device CUDA Initialization
        static int gpuDeviceInit(int devID)
        {
            int deviceCount = CudaContext.GetDeviceCount();

            if (deviceCount == 0)
            {
                Console.Write("gpuDeviceInit() CUDA error: no devices supporting CUDA.\n");
                Environment.Exit(-1);
            }
            if (devID < 0)
            {
                devID = 0;
            }
            if (devID > deviceCount - 1)
            {
                Console.Write("\n");
                Console.Write(">> {0} CUDA capable GPU device(s) detected. <<\n", deviceCount);
                Console.Write(">> gpuDeviceInit (-device={0}) is not a valid GPU device. <<\n", devID);
                Console.Write("\n");
                return(-devID);
            }


            if (CudaContext.GetDeviceComputeCapability(devID).Major < 1)
            {
                Console.Write("gpuDeviceInit(): GPU device does not support CUDA.\n");
                Environment.Exit(-1);
            }
            ctx = new CudaContext(devID);
            Console.Write("> gpuDeviceInit() CUDA device [{0}]: {1}\n", devID, ctx.GetDeviceName());
            return(devID);
        }

Example #2

Show file

File: GPU_Functionality.cs Project: my81656581/icFlow

        public GPU_Functionality(int deviceID = 0)
        {
            ctx     = new CudaContext(deviceID);
            version = ctx.GetDeviceComputeCapability();
            Trace.WriteLine($"cuda compute capability {version.Major}.{version.Minor}");

            CUmodule collision_module = ctx.LoadModulePTX("collision_kernels.ptx");

            kNarrowPhase            = new CudaKernel("kNarrowPhase_new", collision_module, ctx);
            kFindClosestFace        = new CudaKernel("kFindClosestFace", collision_module, ctx);
            kCollisionResponseForce = new CudaKernel("kCollisionResponseForce", collision_module, ctx);
            dim3 block = new dim3(block_size, 1, 1);

            kNarrowPhase.BlockDimensions            = block;
            kFindClosestFace.BlockDimensions        = block;
            kCollisionResponseForce.BlockDimensions = block;

            // cz
            CUmodule module_cz_kernels = ctx.LoadModulePTX("cz_kernels.ptx");

            kczCZForce = new CudaKernel("kczCZForce", module_cz_kernels, ctx);
            kczCZForce.BlockDimensions = block;

            // elem
            CUmodule module_elem_kernels = ctx.LoadModulePTX("elem_kernels.ptx");

            kelElementElasticityForce = new CudaKernel("kelElementElasticityForce", module_elem_kernels, ctx);
            kelElementElasticityForce.BlockDimensions = block;
        }

Example #3

Show file

File: BlurPerformanceTests.cs Project: strict-lang/Strict

        public void CompileKernel()
        {
            //generate as output language obviously from strict code
            var code =
                @"extern ""C"" __global__ void blur(unsigned char* image, unsigned char* output, size_t width, size_t height)
{
  size_t tid = blockIdx.x * blockDim.x + threadIdx.x;
  if (tid > width && tid < width*height-width) {
    output[tid] = image[tid];// (image[tid-2048]+image[tid-1]+image[tid]+image[tid+1]+image[tid+2048])/5;
  }
}";

            using var rtc = new CudaRuntimeCompiler(code, "blur");
            try
            {
                // Use max capabilities on actual hardware we have at runtime
                var computeVersion     = CudaContext.GetDeviceComputeCapability(0);
                var shaderModelVersion = "" + computeVersion.Major + computeVersion.Minor;
                Console.WriteLine("ShaderModelVersion=" + shaderModelVersion);
                // see http://docs.nvidia.com/cuda/nvrtc/index.html for usage and options
                //https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/
                //nvcc .\vectorAdd.cu -use_fast_math -ptx -m 64 -arch compute_61 -code sm_61 -o .\vectorAdd.ptx
                //https://docs.nvidia.com/cuda/nvrtc/index.html#group__options
                rtc.Compile(new[] { "--gpu-architecture=compute_" + shaderModelVersion });
                Console.WriteLine("Cuda compile log: " + rtc.GetLogAsString());
                var deviceID = 0;
                var ctx      = new CudaContext(deviceID);
                kernel = ctx.LoadKernelPTX(rtc.GetPTX(), "blur");
                kernel.GridDimensions  = (Size + 511) / 512;
                kernel.BlockDimensions = 512;
                //unused: float[] copyInput = new float[Size];
                input  = image;
                output = new CudaDeviceVariable <byte>(Size);
            }
            catch (NVRTCException ex)
            {
                Console.WriteLine("Cuda compile log: " + rtc.GetLogAsString());
                throw new Exception(ex.NVRTCError + " " + ex);
            }
        }

Example #4

Show file

File: CudaModule.cs Project: anthrax3/Flame

        private static async Task <CudaModule> CompileAsync(
            IMethod method,
            IEnumerable <ITypeMember> memberRoots,
            IEnumerable <IType> typeRoots,
            int threadIdParamIndex,
            ClrAssembly assembly,
            CudaContext context)
        {
            // Figure out which members we need to compile.
            var desc = await CreateContentDescriptionAsync(method, memberRoots, typeRoots, assembly);

            // Compile those members to LLVM IR. Use an Itanium name mangling scheme.
            var mangler       = new ItaniumMangler(assembly.Resolver.TypeEnvironment);
            var moduleBuilder = LlvmBackend.Compile(desc, assembly.Resolver.TypeEnvironment);
            var module        = moduleBuilder.Module;

            // Generate type metadata for all type roots.
            foreach (var type in typeRoots)
            {
                moduleBuilder.Metadata.GetMetadata(type, moduleBuilder);
            }

            // Get the compiled kernel function.
            var kernelFuncName = mangler.Mangle(method, true);
            var kernelFunc     = LLVM.GetNamedFunction(module, kernelFuncName);

            if (threadIdParamIndex >= 0)
            {
                // If we have a thread ID parameter, then we need to generate a thunk
                // kernel function that calls our actual kernel function. This thunk's
                // responsibility is to determine the thread ID of the kernel.
                var thunkKernelName = "kernel";
                var thunkTargetType = kernelFunc.TypeOf().GetElementType();
                var thunkParamTypes = new List <LLVMTypeRef>(thunkTargetType.GetParamTypes());
                if (threadIdParamIndex < thunkParamTypes.Count)
                {
                    thunkParamTypes.RemoveAt(threadIdParamIndex);
                }
                var thunkKernel = LLVM.AddFunction(
                    module,
                    thunkKernelName,
                    LLVM.FunctionType(
                        thunkTargetType.GetReturnType(),
                        thunkParamTypes.ToArray(),
                        thunkTargetType.IsFunctionVarArg));

                using (var builder = new IRBuilder(moduleBuilder.Context))
                {
                    builder.PositionBuilderAtEnd(thunkKernel.AppendBasicBlock("entry"));
                    var args = new List <LLVMValueRef>(thunkKernel.GetParams());
                    args.Insert(threadIdParamIndex, ComputeUniqueThreadId(builder, module));
                    var call = builder.CreateCall(kernelFunc, args.ToArray(), "");
                    if (call.TypeOf().TypeKind == LLVMTypeKind.LLVMVoidTypeKind)
                    {
                        builder.CreateRetVoid();
                    }
                    else
                    {
                        builder.CreateRet(call);
                    }
                }

                kernelFuncName = thunkKernelName;
                kernelFunc     = thunkKernel;
            }

            // Mark the compiled kernel as a kernel symbol.
            LLVM.AddNamedMetadataOperand(
                module,
                "nvvm.annotations",
                LLVM.MDNode(new LLVMValueRef[]
            {
                kernelFunc,
                MDString("kernel"),
                LLVM.ConstInt(LLVM.Int32TypeInContext(LLVM.GetModuleContext(module)), 1, false)
            }));

            // LLVM.DumpModule(module);

            // Compile that LLVM IR down to PTX.
            LLVMTargetMachineRef machine;
            var ptx = CompileToPtx(module, context.GetDeviceComputeCapability(), out machine);

            // Console.WriteLine(System.Text.Encoding.UTF8.GetString(ptx));

            // Load the PTX kernel.
            return(new CudaModule(assembly, moduleBuilder, machine, context.LoadModulePTX(ptx), kernelFuncName, context));
        }