Ejemplo n.º 1
0
        private static async Task <LLVMModuleRef> CompileAsync(ClrAssembly assembly)
        {
            var desc = await CreateContentDescriptionAsync(assembly);

            return(LlvmBackend.Compile(desc, assembly.Resolver.TypeEnvironment).Module);
        }
Ejemplo n.º 2
0
        private static async Task <CudaModule> CompileAsync(
            IMethod method,
            IEnumerable <ITypeMember> memberRoots,
            IEnumerable <IType> typeRoots,
            int threadIdParamIndex,
            ClrAssembly assembly,
            CudaContext context)
        {
            // Figure out which members we need to compile.
            var desc = await CreateContentDescriptionAsync(method, memberRoots, typeRoots, assembly);

            // Compile those members to LLVM IR. Use an Itanium name mangling scheme.
            var mangler       = new ItaniumMangler(assembly.Resolver.TypeEnvironment);
            var moduleBuilder = LlvmBackend.Compile(desc, assembly.Resolver.TypeEnvironment);
            var module        = moduleBuilder.Module;

            // Generate type metadata for all type roots.
            foreach (var type in typeRoots)
            {
                moduleBuilder.Metadata.GetMetadata(type, moduleBuilder);
            }

            // Get the compiled kernel function.
            var kernelFuncName = mangler.Mangle(method, true);
            var kernelFunc     = LLVM.GetNamedFunction(module, kernelFuncName);

            if (threadIdParamIndex >= 0)
            {
                // If we have a thread ID parameter, then we need to generate a thunk
                // kernel function that calls our actual kernel function. This thunk's
                // responsibility is to determine the thread ID of the kernel.
                var thunkKernelName = "kernel";
                var thunkTargetType = kernelFunc.TypeOf().GetElementType();
                var thunkParamTypes = new List <LLVMTypeRef>(thunkTargetType.GetParamTypes());
                if (threadIdParamIndex < thunkParamTypes.Count)
                {
                    thunkParamTypes.RemoveAt(threadIdParamIndex);
                }
                var thunkKernel = LLVM.AddFunction(
                    module,
                    thunkKernelName,
                    LLVM.FunctionType(
                        thunkTargetType.GetReturnType(),
                        thunkParamTypes.ToArray(),
                        thunkTargetType.IsFunctionVarArg));

                using (var builder = new IRBuilder(moduleBuilder.Context))
                {
                    builder.PositionBuilderAtEnd(thunkKernel.AppendBasicBlock("entry"));
                    var args = new List <LLVMValueRef>(thunkKernel.GetParams());
                    args.Insert(threadIdParamIndex, ComputeUniqueThreadId(builder, module));
                    var call = builder.CreateCall(kernelFunc, args.ToArray(), "");
                    if (call.TypeOf().TypeKind == LLVMTypeKind.LLVMVoidTypeKind)
                    {
                        builder.CreateRetVoid();
                    }
                    else
                    {
                        builder.CreateRet(call);
                    }
                }

                kernelFuncName = thunkKernelName;
                kernelFunc     = thunkKernel;
            }

            // Mark the compiled kernel as a kernel symbol.
            LLVM.AddNamedMetadataOperand(
                module,
                "nvvm.annotations",
                LLVM.MDNode(new LLVMValueRef[]
            {
                kernelFunc,
                MDString("kernel"),
                LLVM.ConstInt(LLVM.Int32TypeInContext(LLVM.GetModuleContext(module)), 1, false)
            }));

            // LLVM.DumpModule(module);

            // Compile that LLVM IR down to PTX.
            LLVMTargetMachineRef machine;
            var ptx = CompileToPtx(module, context.GetDeviceComputeCapability(), out machine);

            // Console.WriteLine(System.Text.Encoding.UTF8.GetString(ptx));

            // Load the PTX kernel.
            return(new CudaModule(assembly, moduleBuilder, machine, context.LoadModulePTX(ptx), kernelFuncName, context));
        }