Example #1
0
        /// <summary cref="LLVMBackend.CreateEntry(CompileUnit, EntryPoint, out string)"/>
        internal override LLVMValueRef CreateEntry(CompileUnit unit, EntryPoint entryPoint, out string entryPointName)
        {
            if (!ptxDeviceFunctions.TryGetValue(unit, out PTXDeviceFunctions deviceFunctions))
            {
                throw new InvalidOperationException(ErrorMessages.NotSupportedCompileUnit);
            }

            entryPointName = unit.GetLLVMName(entryPoint.MethodInfo, CudaKernelCategory);
            var context = unit.LLVMContext;
            var module  = unit.LLVMModule;

            LLVMValueRef cudaEntryPoint = GetNamedFunction(module, entryPointName);

            if (cudaEntryPoint.Pointer != IntPtr.Zero)
            {
                SetLinkage(cudaEntryPoint, LLVMLinkage.LLVMExternalLinkage);
                return(cudaEntryPoint);
            }

            var entryPointType = CreatePTXKernelFunctionType(unit, entryPoint, out int parameterOffset);

            cudaEntryPoint = AddFunction(module, entryPointName, entryPointType);
            SetLinkage(cudaEntryPoint, LLVMLinkage.LLVMExternalLinkage);

            var entryBlock = AppendBasicBlock(cudaEntryPoint, "Main");
            var exitBlock  = AppendBasicBlock(cudaEntryPoint, "Exit");

            var builder = CreateBuilderInContext(unit.LLVMContext);

            PositionBuilderAtEnd(builder, entryBlock);

            // Create a proper entry point for the virtual entry point
            var indexValue      = CreateIndexValue(unit, entryPoint, builder, deviceFunctions);
            var groupIndexValue = CreateGroupIndexValue(unit, entryPoint, builder, deviceFunctions);

            if (!entryPoint.IsGroupedIndexEntry)
            {
                // We have to generate code for an implictly grouped kernel
                // -> Compute the actual global idx
                indexValue = CreateGlobalIndexValue(
                    unit,
                    entryPoint,
                    builder,
                    deviceFunctions,
                    indexValue,
                    groupIndexValue);

                // Append a new main block that contains the actual body
                var mainBlock = AppendBasicBlock(cudaEntryPoint, "Core");

                // Emit the required check (custom dimension size is stored in parameter 0).
                // This check is required to ensure that the index is always smaller than the
                // specified user size. Otherwise, the index might be larger due to custom blocking!
                Debug.Assert(parameterOffset > 0);
                var rangeComparisonResult = CreateGlobalIndexRangeComparison(
                    unit,
                    entryPoint,
                    builder,
                    deviceFunctions,
                    indexValue,
                    GetParam(cudaEntryPoint, 0));
                BuildCondBr(builder, rangeComparisonResult, mainBlock, exitBlock);

                // Move builder to main block to emit the actual kernel body
                PositionBuilderAtEnd(builder, mainBlock);
            }
            else
            {
                Debug.Assert(parameterOffset < 1);
                indexValue = CreateGroupedIndex(
                    unit,
                    entryPoint,
                    builder,
                    deviceFunctions,
                    indexValue,
                    groupIndexValue);
            }

            // Call the virtual entry point
            LLVMValueRef[] kernelValues = new LLVMValueRef[entryPoint.NumCustomParameters + 1];
            kernelValues[0] = indexValue;

            var kernelParameters = GetParams(cudaEntryPoint);
            var uniformVariables = entryPoint.UniformVariables;

            for (int i = 0, kernelParamIdx = parameterOffset, e = uniformVariables.Length; i < e; ++i, ++kernelParamIdx)
            {
                var          variable = uniformVariables[i];
                LLVMValueRef kernelParam;
                var          kernelValue = kernelParam = kernelParameters[kernelParamIdx];
                if (variable.VariableType.IsPassedViaPtr())
                {
                    // We have to generate a local alloca and store the current parameter value
                    kernelValue = BuildAlloca(builder, TypeOf(kernelParam), string.Empty);
                    BuildStore(builder, kernelParam, kernelValue);
                }
                kernelValues[variable.Index] = kernelValue;
            }

            var sharedMemoryVariables = entryPoint.SharedMemoryVariables;

            foreach (var variable in sharedMemoryVariables)
            {
                // This type can be: ArrayType<T> or VariableType<T>
                var variableType        = unit.GetType(variable.Type);
                var variableElementType = unit.GetType(variable.ElementType);
                var sharedVariable      = GetUndef(variableType);
                if (variable.IsArray)
                {
                    // However, ArrayType<T> encapsulates the type ArrayView<T, Index>
                    var genericArrayView = GetUndef(GetStructElementTypes(variableType)[0]);
                    var arrayType        = ArrayType(variableElementType, variable.Count != null ? variable.Count.Value : 0);
                    var sharedMem        = DeclareSharedMemoryVariable(unit, builder, arrayType);
                    genericArrayView = BuildInsertValue(builder, genericArrayView, sharedMem, 0, string.Empty);
                    LLVMValueRef intIndex;

                    if (variable.Count != null)
                    {
                        intIndex = ConstInt(context.Int32Type, variable.Count.Value, false);
                    }
                    else
                    {
                        // Attach the right length information that is given via a parameter
                        Debug.Assert(variable.SharedMemoryIndex >= 0);
                        intIndex = kernelParameters[uniformVariables.Length + variable.SharedMemoryIndex];
                    }

                    var indexInstance = GetUndef(unit.GetType(typeof(Index)));
                    indexInstance    = BuildInsertValue(builder, indexInstance, intIndex, 0, string.Empty);
                    genericArrayView = BuildInsertValue(builder, genericArrayView, indexInstance, 1, string.Empty);
                    sharedVariable   = BuildInsertValue(builder, sharedVariable, genericArrayView, 0, string.Empty);
                }
                else
                {
                    var sharedMem = DeclareSharedMemoryVariable(unit, builder, variableElementType);
                    // Insert pointer into variable view
                    sharedVariable = BuildInsertValue(builder, sharedVariable, sharedMem, 0, string.Empty);
                }


                // Setup the pointer as generic pointer
                kernelValues[variable.Index] = sharedVariable;
            }

            // Declare external entry point
            var virtualEntryPoint = unit.GetMethod(entryPoint.MethodInfo);

            BuildCall(builder, virtualEntryPoint.LLVMFunction, kernelValues);

            // Verify method access in the scope of implicitly-grouped kernels
            if (!entryPoint.IsGroupedIndexEntry)
            {
                virtualEntryPoint.VisitCalls((instruction, calledMethod) =>
                {
                    CodeGenerator.VerifyAccessToMethodInImplicitlyGroupedKernel(
                        unit.CompilationContext,
                        calledMethod.MethodBase,
                        entryPoint);
                });
            }

            // Jump to exit block
            BuildBr(builder, exitBlock);

            // Build exit block
            PositionBuilderAtEnd(builder, exitBlock);
            BuildRetVoid(builder);

            unit.Optimize();

            return(cudaEntryPoint);
        }