Beispiel #1
0
        /// <summary>
        /// Creates a signature for the actual kernel entry point.
        /// </summary>
        /// <param name="unit">The target unit.</param>
        /// <param name="entryPoint">The target entry point.</param>
        /// <param name="parameterOffset">The parameter offset for the actual kernel parameters.</param>
        /// <returns>A signature for the actual kernel entry point.</returns>
        private LLVMTypeRef CreatePTXKernelFunctionType(
            CompileUnit unit,
            EntryPoint entryPoint,
            out int parameterOffset)
        {
            parameterOffset = entryPoint.IsGroupedIndexEntry ? 0 : 1;
            var numUniformVariables = entryPoint.NumUniformVariables;
            var argTypes            = new LLVMTypeRef[parameterOffset + numUniformVariables + entryPoint.NumDynamicallySizedSharedMemoryVariables];

            // Custom dispatch-size information for implicitly grouped kernels
            if (parameterOffset > 0)
            {
                argTypes[0] = unit.GetType(entryPoint.UngroupedIndexType);
            }

            Debug.Assert(parameterOffset >= 0 && parameterOffset < 2);

            for (int i = 0, e = numUniformVariables; i < e; ++i)
            {
                argTypes[i + parameterOffset] = unit.GetType(entryPoint.UniformVariables[i].VariableType);
            }

            // Attach length information to dynamically sized variables using runtime information
            for (int i = 0, e = entryPoint.NumDynamicallySizedSharedMemoryVariables; i < e; ++i)
            {
                argTypes[i + parameterOffset + numUniformVariables] = unit.GetType(typeof(int));
            }

            return(FunctionType(Context.LLVMContext.VoidType, argTypes));
        }
        /// <summary>
        /// Builds a warp-shuffle mask.
        /// </summary>
        /// <param name="unit">The current unit.</param>
        /// <param name="builder">The current builder.</param>
        /// <param name="width">The width that was passed by the user.</param>
        /// <param name="addOrMask">True, to add an or mask consisting of (WarpSize - 1).</param>
        /// <returns>A value that represents the desired warp-shuffle mask.</returns>
        private LLVMValueRef BuildWarpShuffleMask(
            CompileUnit unit,
            LLVMBuilderRef builder,
            LLVMValueRef width,
            bool addOrMask)
        {
            var warpSize = MakeWarpSize(builder);
            var warpDiff = BuildSub(builder, warpSize, width, string.Empty);
            var result   = BuildShl(
                builder,
                warpDiff,
                ConstInt(unit.GetType(BasicValueType.Int32), 8, false),
                string.Empty);

            if (addOrMask)
            {
                var orMask = BuildSub(
                    builder,
                    warpSize,
                    ConstInt(unit.GetType(BasicValueType.Int32), 1, false),
                    string.Empty);
                result = BuildOr(
                    builder,
                    result,
                    orMask,
                    string.Empty);
            }
            return(result);
        }
        /// <summary>
        /// Constructs a default LLVM-based ABI specification.
        /// </summary>
        /// <param name="unit">The compile unit used for ABI generation.</param>
        public DefaultLLVMABI(CompileUnit unit)
            : base(unit)
        {
            var backend = unit.Backend as LLVMBackend;

            if (backend == null)
            {
                throw new NotSupportedException(ErrorMessages.NotSupportedBackend);
            }
            LLVMTargetData = CreateTargetDataLayout(backend.LLVMTargetMachine);
            foreach (var managedAlignment in ManagedAlignments)
            {
                var managedType = managedAlignment.Key;
                var llvmType    = unit.GetType(managedType);
                var alignment   = ABIAlignmentOfType(LLVMTargetData, llvmType);
                // We need a special case for the builtin mapping of 64bit floats
                // to 32bit floats since this mapping changes the alignment logic.
                if (unit.Force32BitFloats && managedType == typeof(double))
                {
                    managedType = typeof(float);
                }
                if (ManagedAlignments[managedType] != alignment)
                {
                    throw new NotSupportedException(string.Format(
                                                        ErrorMessages.CustomABIImplementationRequired, managedAlignment.Key));
                }
                Alignments.Add(managedAlignment.Key, alignment);
            }
            AddNonBlittableTypes();
            AddPtrAlignment(ABIAlignmentOfType(
                                LLVMTargetData,
                                unit.LLVMContext.VoidPtrType));
        }
Beispiel #4
0
        /// <summary>
        /// Creates an <see cref="Index3"/> in the LLVM world containing the current group-thread indices.
        /// </summary>
        /// <param name="unit">The target unit.</param>
        /// <param name="entryPoint">The entry point.</param>
        /// <param name="builder">The LLVM builder.</param>
        /// <param name="cudaDeviceFunctions">A reference to the cuda device functions.</param>
        /// <returns>An <see cref="Index3"/> in the LLVM world containg the current group-thread indices.</returns>
        private static LLVMValueRef CreateGroupIndexValue(
            CompileUnit unit,
            EntryPoint entryPoint,
            LLVMBuilderRef builder,
            PTXDeviceFunctions cudaDeviceFunctions)
        {
            var indexType        = unit.GetType(entryPoint.UngroupedIndexType);
            var threadIndexValue = GetUndef(indexType);

            Debug.Assert(entryPoint.Type >= IndexType.Index1D);

            var isGroupedIndex = entryPoint.IsGroupedIndexEntry;

            threadIndexValue = BuildInsertValue(builder, threadIndexValue, BuildCall(
                                                    builder, cudaDeviceFunctions.GetThreadIdxX.Value), 0, "TIdx1");

            if (entryPoint.Type >= IndexType.Index2D && !isGroupedIndex || entryPoint.Type >= IndexType.GroupedIndex2D)
            {
                threadIndexValue = BuildInsertValue(builder, threadIndexValue, BuildCall(
                                                        builder, cudaDeviceFunctions.GetThreadIdxY.Value), 1, "TIdx2");
            }
            if (entryPoint.Type >= IndexType.Index3D && !isGroupedIndex || entryPoint.Type >= IndexType.GroupedIndex3D)
            {
                threadIndexValue = BuildInsertValue(builder, threadIndexValue, BuildCall(
                                                        builder, cudaDeviceFunctions.GetThreadIdxZ.Value), 2, "TIdx3");
            }

            return(threadIndexValue);
        }
Beispiel #5
0
        /// <summary>
        /// Creates an <see cref="Index3"/> in the LLVM world containing the current grid indices.
        /// </summary>
        /// <param name="unit">The target unit.</param>
        /// <param name="entryPoint">The entry point.</param>
        /// <param name="builder">The LLVM builder.</param>
        /// <param name="cudaDeviceFunctions">A reference to the cuda device functions.</param>
        /// <returns>An <see cref="Index3"/> in the LLVM world containg the current grid indices.</returns>
        private static LLVMValueRef CreateIndexValue(
            CompileUnit unit,
            EntryPoint entryPoint,
            LLVMBuilderRef builder,
            PTXDeviceFunctions cudaDeviceFunctions)
        {
            var indexType  = unit.GetType(entryPoint.UngroupedIndexType);
            var indexValue = GetUndef(indexType);

            Debug.Assert(entryPoint.Type >= IndexType.Index1D);

            indexValue = BuildInsertValue(builder, indexValue, BuildCall(
                                              builder, cudaDeviceFunctions.GetBlockIdxX.Value), 0, "Idx1");

            if (entryPoint.Type >= IndexType.Index2D && entryPoint.Type <= IndexType.Index3D ||
                entryPoint.Type >= IndexType.GroupedIndex2D)
            {
                indexValue = BuildInsertValue(builder, indexValue, BuildCall(
                                                  builder, cudaDeviceFunctions.GetBlockIdxY.Value), 1, "Idx2");
            }
            if (entryPoint.Type == IndexType.Index3D || entryPoint.Type == IndexType.GroupedIndex3D)
            {
                indexValue = BuildInsertValue(builder, indexValue, BuildCall(
                                                  builder, cudaDeviceFunctions.GetBlockIdxZ.Value), 2, "Idx3");
            }

            return(indexValue);
        }
Beispiel #6
0
        public void Run()
        {
            CompileUnit cu = _context.CompileUnit;

            new XmlSerializer(cu.GetType()).Serialize(Console.Out, cu);
            Console.WriteLine();
        }
Beispiel #7
0
        override public void Run()
        {
            CompileUnit cu = Context.CompileUnit;

            new XmlSerializer(cu.GetType()).Serialize(OutputWriter, cu);
            Console.WriteLine();
        }
Beispiel #8
0
        /// <summary>
        /// Creates an <see cref="Index3"/> in the LLVM world containing the current global indices
        /// (gridIdx * blockDim + blockIdx).
        /// </summary>
        /// <param name="unit">The target unit.</param>
        /// <param name="entryPoint">The entry point.</param>
        /// <param name="builder">The LLVM builder.</param>
        /// <param name="cudaDeviceFunctions">A reference to the cuda device functions.</param>
        /// <param name="indexValue">The current grid-index value (gridIdx).</param>
        /// <param name="groupIndexValue">The current group-thread-index value (blockIdx).</param>
        /// <returns>An <see cref="Index3"/> in the LLVM world containg the current global indices.</returns>
        private static LLVMValueRef CreateGlobalIndexValue(
            CompileUnit unit,
            EntryPoint entryPoint,
            LLVMBuilderRef builder,
            PTXDeviceFunctions cudaDeviceFunctions,
            LLVMValueRef indexValue,
            LLVMValueRef groupIndexValue)
        {
            var indexType        = unit.GetType(entryPoint.UngroupedIndexType);
            var globalIndexValue = GetUndef(indexType);

            Debug.Assert(entryPoint.Type >= IndexType.Index1D && entryPoint.Type < IndexType.GroupedIndex1D);
            var blockDimensions = cudaDeviceFunctions.GetBlockDimensions;

            for (int i = 0, e = (int)entryPoint.Type; i < e; ++i)
            {
                var globalGroupOffset = BuildMul(
                    builder,
                    BuildExtractValue(
                        builder,
                        indexValue,
                        i,
                        "GridIdx_" + i),
                    BuildCall(
                        builder,
                        blockDimensions[i].Value),
                    "GlobalGroupOffset_" + i);

                var globalIdx = BuildAdd(
                    builder,
                    globalGroupOffset,
                    BuildExtractValue(
                        builder,
                        groupIndexValue,
                        i,
                        "GroupIdx_" + i),
                    "GlobalIdxVal_" + i);

                globalIndexValue = BuildInsertValue(
                    builder,
                    globalIndexValue,
                    globalIdx,
                    i,
                    "GlobalIdx_" + i);
            }

            return(globalIndexValue);
        }
Beispiel #9
0
        /// <summary>
        /// Creates an instance of an <see cref="IGroupedIndex{TIndex}"/> in the LLVM world.
        /// </summary>
        /// <param name="unit">The target unit.</param>
        /// <param name="entryPoint">The entry point.</param>
        /// <param name="builder">The LLVM builder.</param>
        /// <param name="cudaDeviceFunctions">A reference to the cuda device functions.</param>
        /// <param name="indexValue">The current index values (first part of a grouped index).</param>
        /// <param name="groupIndexValue">The current group-index values (second part of a grouped index).</param>
        /// <returns>An instance of an <see cref="IGroupedIndex{TIndex}"/> in the LLVM world.</returns>
        private static LLVMValueRef CreateGroupedIndex(
            CompileUnit unit,
            EntryPoint entryPoint,
            LLVMBuilderRef builder,
            PTXDeviceFunctions cudaDeviceFunctions,
            LLVMValueRef indexValue,
            LLVMValueRef groupIndexValue)
        {
            Debug.Assert(entryPoint.Type >= IndexType.GroupedIndex1D);

            // Create a new blocked index
            var blockIndexValue = GetUndef(unit.GetType(entryPoint.KernelIndexType));

            blockIndexValue = BuildInsertValue(builder, blockIndexValue, indexValue, 0, "GridIdx");
            blockIndexValue = BuildInsertValue(builder, blockIndexValue, groupIndexValue, 1, "GroupIdx");
            return(blockIndexValue);
        }
 /// <summary cref="ABISpecification.GetSizeOf(Type)"/>
 public override int GetSizeOf(Type type)
 {
     return(GetSizeOf(CompileUnit.GetType(type)));
 }
 /// <summary cref="ABISpecification.GetAlignmentOf(Type)"/>
 public override int GetAlignmentOf(Type type)
 {
     return(ABIAlignmentOfType(
                LLVMTargetData,
                CompileUnit.GetType(type)));
 }
Beispiel #12
0
        /// <summary cref="LLVMBackend.CreateEntry(CompileUnit, EntryPoint, out string)"/>
        internal override LLVMValueRef CreateEntry(CompileUnit unit, EntryPoint entryPoint, out string entryPointName)
        {
            if (!ptxDeviceFunctions.TryGetValue(unit, out PTXDeviceFunctions deviceFunctions))
            {
                throw new InvalidOperationException(ErrorMessages.NotSupportedCompileUnit);
            }

            entryPointName = unit.GetLLVMName(entryPoint.MethodInfo, CudaKernelCategory);
            var context = unit.LLVMContext;
            var module  = unit.LLVMModule;

            LLVMValueRef cudaEntryPoint = GetNamedFunction(module, entryPointName);

            if (cudaEntryPoint.Pointer != IntPtr.Zero)
            {
                SetLinkage(cudaEntryPoint, LLVMLinkage.LLVMExternalLinkage);
                return(cudaEntryPoint);
            }

            var entryPointType = CreatePTXKernelFunctionType(unit, entryPoint, out int parameterOffset);

            cudaEntryPoint = AddFunction(module, entryPointName, entryPointType);
            SetLinkage(cudaEntryPoint, LLVMLinkage.LLVMExternalLinkage);

            var entryBlock = AppendBasicBlock(cudaEntryPoint, "Main");
            var exitBlock  = AppendBasicBlock(cudaEntryPoint, "Exit");

            var builder = CreateBuilderInContext(unit.LLVMContext);

            PositionBuilderAtEnd(builder, entryBlock);

            // Create a proper entry point for the virtual entry point
            var indexValue      = CreateIndexValue(unit, entryPoint, builder, deviceFunctions);
            var groupIndexValue = CreateGroupIndexValue(unit, entryPoint, builder, deviceFunctions);

            if (!entryPoint.IsGroupedIndexEntry)
            {
                // We have to generate code for an implictly grouped kernel
                // -> Compute the actual global idx
                indexValue = CreateGlobalIndexValue(
                    unit,
                    entryPoint,
                    builder,
                    deviceFunctions,
                    indexValue,
                    groupIndexValue);

                // Append a new main block that contains the actual body
                var mainBlock = AppendBasicBlock(cudaEntryPoint, "Core");

                // Emit the required check (custom dimension size is stored in parameter 0).
                // This check is required to ensure that the index is always smaller than the
                // specified user size. Otherwise, the index might be larger due to custom blocking!
                Debug.Assert(parameterOffset > 0);
                var rangeComparisonResult = CreateGlobalIndexRangeComparison(
                    unit,
                    entryPoint,
                    builder,
                    deviceFunctions,
                    indexValue,
                    GetParam(cudaEntryPoint, 0));
                BuildCondBr(builder, rangeComparisonResult, mainBlock, exitBlock);

                // Move builder to main block to emit the actual kernel body
                PositionBuilderAtEnd(builder, mainBlock);
            }
            else
            {
                Debug.Assert(parameterOffset < 1);
                indexValue = CreateGroupedIndex(
                    unit,
                    entryPoint,
                    builder,
                    deviceFunctions,
                    indexValue,
                    groupIndexValue);
            }

            // Call the virtual entry point
            LLVMValueRef[] kernelValues = new LLVMValueRef[entryPoint.NumCustomParameters + 1];
            kernelValues[0] = indexValue;

            var kernelParameters = GetParams(cudaEntryPoint);
            var uniformVariables = entryPoint.UniformVariables;

            for (int i = 0, kernelParamIdx = parameterOffset, e = uniformVariables.Length; i < e; ++i, ++kernelParamIdx)
            {
                var          variable = uniformVariables[i];
                LLVMValueRef kernelParam;
                var          kernelValue = kernelParam = kernelParameters[kernelParamIdx];
                if (variable.VariableType.IsPassedViaPtr())
                {
                    // We have to generate a local alloca and store the current parameter value
                    kernelValue = BuildAlloca(builder, TypeOf(kernelParam), string.Empty);
                    BuildStore(builder, kernelParam, kernelValue);
                }
                kernelValues[variable.Index] = kernelValue;
            }

            var sharedMemoryVariables = entryPoint.SharedMemoryVariables;

            foreach (var variable in sharedMemoryVariables)
            {
                // This type can be: ArrayType<T> or VariableType<T>
                var variableType        = unit.GetType(variable.Type);
                var variableElementType = unit.GetType(variable.ElementType);
                var sharedVariable      = GetUndef(variableType);
                if (variable.IsArray)
                {
                    // However, ArrayType<T> encapsulates the type ArrayView<T, Index>
                    var genericArrayView = GetUndef(GetStructElementTypes(variableType)[0]);
                    var arrayType        = ArrayType(variableElementType, variable.Count != null ? variable.Count.Value : 0);
                    var sharedMem        = DeclareSharedMemoryVariable(unit, builder, arrayType);
                    genericArrayView = BuildInsertValue(builder, genericArrayView, sharedMem, 0, string.Empty);
                    LLVMValueRef intIndex;

                    if (variable.Count != null)
                    {
                        intIndex = ConstInt(context.Int32Type, variable.Count.Value, false);
                    }
                    else
                    {
                        // Attach the right length information that is given via a parameter
                        Debug.Assert(variable.SharedMemoryIndex >= 0);
                        intIndex = kernelParameters[uniformVariables.Length + variable.SharedMemoryIndex];
                    }

                    var indexInstance = GetUndef(unit.GetType(typeof(Index)));
                    indexInstance    = BuildInsertValue(builder, indexInstance, intIndex, 0, string.Empty);
                    genericArrayView = BuildInsertValue(builder, genericArrayView, indexInstance, 1, string.Empty);
                    sharedVariable   = BuildInsertValue(builder, sharedVariable, genericArrayView, 0, string.Empty);
                }
                else
                {
                    var sharedMem = DeclareSharedMemoryVariable(unit, builder, variableElementType);
                    // Insert pointer into variable view
                    sharedVariable = BuildInsertValue(builder, sharedVariable, sharedMem, 0, string.Empty);
                }


                // Setup the pointer as generic pointer
                kernelValues[variable.Index] = sharedVariable;
            }

            // Declare external entry point
            var virtualEntryPoint = unit.GetMethod(entryPoint.MethodInfo);

            BuildCall(builder, virtualEntryPoint.LLVMFunction, kernelValues);

            // Verify method access in the scope of implicitly-grouped kernels
            if (!entryPoint.IsGroupedIndexEntry)
            {
                virtualEntryPoint.VisitCalls((instruction, calledMethod) =>
                {
                    CodeGenerator.VerifyAccessToMethodInImplicitlyGroupedKernel(
                        unit.CompilationContext,
                        calledMethod.MethodBase,
                        entryPoint);
                });
            }

            // Jump to exit block
            BuildBr(builder, exitBlock);

            // Build exit block
            PositionBuilderAtEnd(builder, exitBlock);
            BuildRetVoid(builder);

            unit.Optimize();

            return(cudaEntryPoint);
        }