/// <summary> /// Creates a signature for the actual kernel entry point. /// </summary> /// <param name="unit">The target unit.</param> /// <param name="entryPoint">The target entry point.</param> /// <param name="parameterOffset">The parameter offset for the actual kernel parameters.</param> /// <returns>A signature for the actual kernel entry point.</returns> private LLVMTypeRef CreatePTXKernelFunctionType( CompileUnit unit, EntryPoint entryPoint, out int parameterOffset) { parameterOffset = entryPoint.IsGroupedIndexEntry ? 0 : 1; var numUniformVariables = entryPoint.NumUniformVariables; var argTypes = new LLVMTypeRef[parameterOffset + numUniformVariables + entryPoint.NumDynamicallySizedSharedMemoryVariables]; // Custom dispatch-size information for implicitly grouped kernels if (parameterOffset > 0) { argTypes[0] = unit.GetType(entryPoint.UngroupedIndexType); } Debug.Assert(parameterOffset >= 0 && parameterOffset < 2); for (int i = 0, e = numUniformVariables; i < e; ++i) { argTypes[i + parameterOffset] = unit.GetType(entryPoint.UniformVariables[i].VariableType); } // Attach length information to dynamically sized variables using runtime information for (int i = 0, e = entryPoint.NumDynamicallySizedSharedMemoryVariables; i < e; ++i) { argTypes[i + parameterOffset + numUniformVariables] = unit.GetType(typeof(int)); } return(FunctionType(Context.LLVMContext.VoidType, argTypes)); }
/// <summary> /// Builds a warp-shuffle mask. /// </summary> /// <param name="unit">The current unit.</param> /// <param name="builder">The current builder.</param> /// <param name="width">The width that was passed by the user.</param> /// <param name="addOrMask">True, to add an or mask consisting of (WarpSize - 1).</param> /// <returns>A value that represents the desired warp-shuffle mask.</returns> private LLVMValueRef BuildWarpShuffleMask( CompileUnit unit, LLVMBuilderRef builder, LLVMValueRef width, bool addOrMask) { var warpSize = MakeWarpSize(builder); var warpDiff = BuildSub(builder, warpSize, width, string.Empty); var result = BuildShl( builder, warpDiff, ConstInt(unit.GetType(BasicValueType.Int32), 8, false), string.Empty); if (addOrMask) { var orMask = BuildSub( builder, warpSize, ConstInt(unit.GetType(BasicValueType.Int32), 1, false), string.Empty); result = BuildOr( builder, result, orMask, string.Empty); } return(result); }
/// <summary> /// Constructs a default LLVM-based ABI specification. /// </summary> /// <param name="unit">The compile unit used for ABI generation.</param> public DefaultLLVMABI(CompileUnit unit) : base(unit) { var backend = unit.Backend as LLVMBackend; if (backend == null) { throw new NotSupportedException(ErrorMessages.NotSupportedBackend); } LLVMTargetData = CreateTargetDataLayout(backend.LLVMTargetMachine); foreach (var managedAlignment in ManagedAlignments) { var managedType = managedAlignment.Key; var llvmType = unit.GetType(managedType); var alignment = ABIAlignmentOfType(LLVMTargetData, llvmType); // We need a special case for the builtin mapping of 64bit floats // to 32bit floats since this mapping changes the alignment logic. if (unit.Force32BitFloats && managedType == typeof(double)) { managedType = typeof(float); } if (ManagedAlignments[managedType] != alignment) { throw new NotSupportedException(string.Format( ErrorMessages.CustomABIImplementationRequired, managedAlignment.Key)); } Alignments.Add(managedAlignment.Key, alignment); } AddNonBlittableTypes(); AddPtrAlignment(ABIAlignmentOfType( LLVMTargetData, unit.LLVMContext.VoidPtrType)); }
/// <summary> /// Creates an <see cref="Index3"/> in the LLVM world containing the current group-thread indices. /// </summary> /// <param name="unit">The target unit.</param> /// <param name="entryPoint">The entry point.</param> /// <param name="builder">The LLVM builder.</param> /// <param name="cudaDeviceFunctions">A reference to the cuda device functions.</param> /// <returns>An <see cref="Index3"/> in the LLVM world containg the current group-thread indices.</returns> private static LLVMValueRef CreateGroupIndexValue( CompileUnit unit, EntryPoint entryPoint, LLVMBuilderRef builder, PTXDeviceFunctions cudaDeviceFunctions) { var indexType = unit.GetType(entryPoint.UngroupedIndexType); var threadIndexValue = GetUndef(indexType); Debug.Assert(entryPoint.Type >= IndexType.Index1D); var isGroupedIndex = entryPoint.IsGroupedIndexEntry; threadIndexValue = BuildInsertValue(builder, threadIndexValue, BuildCall( builder, cudaDeviceFunctions.GetThreadIdxX.Value), 0, "TIdx1"); if (entryPoint.Type >= IndexType.Index2D && !isGroupedIndex || entryPoint.Type >= IndexType.GroupedIndex2D) { threadIndexValue = BuildInsertValue(builder, threadIndexValue, BuildCall( builder, cudaDeviceFunctions.GetThreadIdxY.Value), 1, "TIdx2"); } if (entryPoint.Type >= IndexType.Index3D && !isGroupedIndex || entryPoint.Type >= IndexType.GroupedIndex3D) { threadIndexValue = BuildInsertValue(builder, threadIndexValue, BuildCall( builder, cudaDeviceFunctions.GetThreadIdxZ.Value), 2, "TIdx3"); } return(threadIndexValue); }
/// <summary> /// Creates an <see cref="Index3"/> in the LLVM world containing the current grid indices. /// </summary> /// <param name="unit">The target unit.</param> /// <param name="entryPoint">The entry point.</param> /// <param name="builder">The LLVM builder.</param> /// <param name="cudaDeviceFunctions">A reference to the cuda device functions.</param> /// <returns>An <see cref="Index3"/> in the LLVM world containg the current grid indices.</returns> private static LLVMValueRef CreateIndexValue( CompileUnit unit, EntryPoint entryPoint, LLVMBuilderRef builder, PTXDeviceFunctions cudaDeviceFunctions) { var indexType = unit.GetType(entryPoint.UngroupedIndexType); var indexValue = GetUndef(indexType); Debug.Assert(entryPoint.Type >= IndexType.Index1D); indexValue = BuildInsertValue(builder, indexValue, BuildCall( builder, cudaDeviceFunctions.GetBlockIdxX.Value), 0, "Idx1"); if (entryPoint.Type >= IndexType.Index2D && entryPoint.Type <= IndexType.Index3D || entryPoint.Type >= IndexType.GroupedIndex2D) { indexValue = BuildInsertValue(builder, indexValue, BuildCall( builder, cudaDeviceFunctions.GetBlockIdxY.Value), 1, "Idx2"); } if (entryPoint.Type == IndexType.Index3D || entryPoint.Type == IndexType.GroupedIndex3D) { indexValue = BuildInsertValue(builder, indexValue, BuildCall( builder, cudaDeviceFunctions.GetBlockIdxZ.Value), 2, "Idx3"); } return(indexValue); }
public void Run() { CompileUnit cu = _context.CompileUnit; new XmlSerializer(cu.GetType()).Serialize(Console.Out, cu); Console.WriteLine(); }
override public void Run() { CompileUnit cu = Context.CompileUnit; new XmlSerializer(cu.GetType()).Serialize(OutputWriter, cu); Console.WriteLine(); }
/// <summary> /// Creates an <see cref="Index3"/> in the LLVM world containing the current global indices /// (gridIdx * blockDim + blockIdx). /// </summary> /// <param name="unit">The target unit.</param> /// <param name="entryPoint">The entry point.</param> /// <param name="builder">The LLVM builder.</param> /// <param name="cudaDeviceFunctions">A reference to the cuda device functions.</param> /// <param name="indexValue">The current grid-index value (gridIdx).</param> /// <param name="groupIndexValue">The current group-thread-index value (blockIdx).</param> /// <returns>An <see cref="Index3"/> in the LLVM world containg the current global indices.</returns> private static LLVMValueRef CreateGlobalIndexValue( CompileUnit unit, EntryPoint entryPoint, LLVMBuilderRef builder, PTXDeviceFunctions cudaDeviceFunctions, LLVMValueRef indexValue, LLVMValueRef groupIndexValue) { var indexType = unit.GetType(entryPoint.UngroupedIndexType); var globalIndexValue = GetUndef(indexType); Debug.Assert(entryPoint.Type >= IndexType.Index1D && entryPoint.Type < IndexType.GroupedIndex1D); var blockDimensions = cudaDeviceFunctions.GetBlockDimensions; for (int i = 0, e = (int)entryPoint.Type; i < e; ++i) { var globalGroupOffset = BuildMul( builder, BuildExtractValue( builder, indexValue, i, "GridIdx_" + i), BuildCall( builder, blockDimensions[i].Value), "GlobalGroupOffset_" + i); var globalIdx = BuildAdd( builder, globalGroupOffset, BuildExtractValue( builder, groupIndexValue, i, "GroupIdx_" + i), "GlobalIdxVal_" + i); globalIndexValue = BuildInsertValue( builder, globalIndexValue, globalIdx, i, "GlobalIdx_" + i); } return(globalIndexValue); }
/// <summary> /// Creates an instance of an <see cref="IGroupedIndex{TIndex}"/> in the LLVM world. /// </summary> /// <param name="unit">The target unit.</param> /// <param name="entryPoint">The entry point.</param> /// <param name="builder">The LLVM builder.</param> /// <param name="cudaDeviceFunctions">A reference to the cuda device functions.</param> /// <param name="indexValue">The current index values (first part of a grouped index).</param> /// <param name="groupIndexValue">The current group-index values (second part of a grouped index).</param> /// <returns>An instance of an <see cref="IGroupedIndex{TIndex}"/> in the LLVM world.</returns> private static LLVMValueRef CreateGroupedIndex( CompileUnit unit, EntryPoint entryPoint, LLVMBuilderRef builder, PTXDeviceFunctions cudaDeviceFunctions, LLVMValueRef indexValue, LLVMValueRef groupIndexValue) { Debug.Assert(entryPoint.Type >= IndexType.GroupedIndex1D); // Create a new blocked index var blockIndexValue = GetUndef(unit.GetType(entryPoint.KernelIndexType)); blockIndexValue = BuildInsertValue(builder, blockIndexValue, indexValue, 0, "GridIdx"); blockIndexValue = BuildInsertValue(builder, blockIndexValue, groupIndexValue, 1, "GroupIdx"); return(blockIndexValue); }
/// <summary cref="ABISpecification.GetSizeOf(Type)"/> public override int GetSizeOf(Type type) { return(GetSizeOf(CompileUnit.GetType(type))); }
/// <summary cref="ABISpecification.GetAlignmentOf(Type)"/> public override int GetAlignmentOf(Type type) { return(ABIAlignmentOfType( LLVMTargetData, CompileUnit.GetType(type))); }
/// <summary cref="LLVMBackend.CreateEntry(CompileUnit, EntryPoint, out string)"/> internal override LLVMValueRef CreateEntry(CompileUnit unit, EntryPoint entryPoint, out string entryPointName) { if (!ptxDeviceFunctions.TryGetValue(unit, out PTXDeviceFunctions deviceFunctions)) { throw new InvalidOperationException(ErrorMessages.NotSupportedCompileUnit); } entryPointName = unit.GetLLVMName(entryPoint.MethodInfo, CudaKernelCategory); var context = unit.LLVMContext; var module = unit.LLVMModule; LLVMValueRef cudaEntryPoint = GetNamedFunction(module, entryPointName); if (cudaEntryPoint.Pointer != IntPtr.Zero) { SetLinkage(cudaEntryPoint, LLVMLinkage.LLVMExternalLinkage); return(cudaEntryPoint); } var entryPointType = CreatePTXKernelFunctionType(unit, entryPoint, out int parameterOffset); cudaEntryPoint = AddFunction(module, entryPointName, entryPointType); SetLinkage(cudaEntryPoint, LLVMLinkage.LLVMExternalLinkage); var entryBlock = AppendBasicBlock(cudaEntryPoint, "Main"); var exitBlock = AppendBasicBlock(cudaEntryPoint, "Exit"); var builder = CreateBuilderInContext(unit.LLVMContext); PositionBuilderAtEnd(builder, entryBlock); // Create a proper entry point for the virtual entry point var indexValue = CreateIndexValue(unit, entryPoint, builder, deviceFunctions); var groupIndexValue = CreateGroupIndexValue(unit, entryPoint, builder, deviceFunctions); if (!entryPoint.IsGroupedIndexEntry) { // We have to generate code for an implictly grouped kernel // -> Compute the actual global idx indexValue = CreateGlobalIndexValue( unit, entryPoint, builder, deviceFunctions, indexValue, groupIndexValue); // Append a new main block that contains the actual body var mainBlock = AppendBasicBlock(cudaEntryPoint, "Core"); // Emit the required check (custom dimension size is stored in parameter 0). // This check is required to ensure that the index is always smaller than the // specified user size. Otherwise, the index might be larger due to custom blocking! Debug.Assert(parameterOffset > 0); var rangeComparisonResult = CreateGlobalIndexRangeComparison( unit, entryPoint, builder, deviceFunctions, indexValue, GetParam(cudaEntryPoint, 0)); BuildCondBr(builder, rangeComparisonResult, mainBlock, exitBlock); // Move builder to main block to emit the actual kernel body PositionBuilderAtEnd(builder, mainBlock); } else { Debug.Assert(parameterOffset < 1); indexValue = CreateGroupedIndex( unit, entryPoint, builder, deviceFunctions, indexValue, groupIndexValue); } // Call the virtual entry point LLVMValueRef[] kernelValues = new LLVMValueRef[entryPoint.NumCustomParameters + 1]; kernelValues[0] = indexValue; var kernelParameters = GetParams(cudaEntryPoint); var uniformVariables = entryPoint.UniformVariables; for (int i = 0, kernelParamIdx = parameterOffset, e = uniformVariables.Length; i < e; ++i, ++kernelParamIdx) { var variable = uniformVariables[i]; LLVMValueRef kernelParam; var kernelValue = kernelParam = kernelParameters[kernelParamIdx]; if (variable.VariableType.IsPassedViaPtr()) { // We have to generate a local alloca and store the current parameter value kernelValue = BuildAlloca(builder, TypeOf(kernelParam), string.Empty); BuildStore(builder, kernelParam, kernelValue); } kernelValues[variable.Index] = kernelValue; } var sharedMemoryVariables = entryPoint.SharedMemoryVariables; foreach (var variable in sharedMemoryVariables) { // This type can be: ArrayType<T> or VariableType<T> var variableType = unit.GetType(variable.Type); var variableElementType = unit.GetType(variable.ElementType); var sharedVariable = GetUndef(variableType); if (variable.IsArray) { // However, ArrayType<T> encapsulates the type ArrayView<T, Index> var genericArrayView = GetUndef(GetStructElementTypes(variableType)[0]); var arrayType = ArrayType(variableElementType, variable.Count != null ? variable.Count.Value : 0); var sharedMem = DeclareSharedMemoryVariable(unit, builder, arrayType); genericArrayView = BuildInsertValue(builder, genericArrayView, sharedMem, 0, string.Empty); LLVMValueRef intIndex; if (variable.Count != null) { intIndex = ConstInt(context.Int32Type, variable.Count.Value, false); } else { // Attach the right length information that is given via a parameter Debug.Assert(variable.SharedMemoryIndex >= 0); intIndex = kernelParameters[uniformVariables.Length + variable.SharedMemoryIndex]; } var indexInstance = GetUndef(unit.GetType(typeof(Index))); indexInstance = BuildInsertValue(builder, indexInstance, intIndex, 0, string.Empty); genericArrayView = BuildInsertValue(builder, genericArrayView, indexInstance, 1, string.Empty); sharedVariable = BuildInsertValue(builder, sharedVariable, genericArrayView, 0, string.Empty); } else { var sharedMem = DeclareSharedMemoryVariable(unit, builder, variableElementType); // Insert pointer into variable view sharedVariable = BuildInsertValue(builder, sharedVariable, sharedMem, 0, string.Empty); } // Setup the pointer as generic pointer kernelValues[variable.Index] = sharedVariable; } // Declare external entry point var virtualEntryPoint = unit.GetMethod(entryPoint.MethodInfo); BuildCall(builder, virtualEntryPoint.LLVMFunction, kernelValues); // Verify method access in the scope of implicitly-grouped kernels if (!entryPoint.IsGroupedIndexEntry) { virtualEntryPoint.VisitCalls((instruction, calledMethod) => { CodeGenerator.VerifyAccessToMethodInImplicitlyGroupedKernel( unit.CompilationContext, calledMethod.MethodBase, entryPoint); }); } // Jump to exit block BuildBr(builder, exitBlock); // Build exit block PositionBuilderAtEnd(builder, exitBlock); BuildRetVoid(builder); unit.Optimize(); return(cudaEntryPoint); }