/// <summary>
        /// Creates a comparison of the current global index to the custom desired number of user threads.
        /// </summary>
        /// <param name="unit">The target unit.</param>
        /// <param name="entryPoint">The entry point.</param>
        /// <param name="builder">The LLVM builder.</param>
        /// <param name="cudaDeviceFunctions">A reference to the cuda device functions.</param>
        /// <param name="globalIndexValue">The current global index values.</param>
        /// <param name="userIndexRange">The user given custom launcher range.</param>
        /// <returns>An instance of an <see cref="IGroupedIndex{TIndex}"/> in the LLVM world.</returns>
        private static LLVMValueRef CreateGlobalIndexRangeComparison(
            CompileUnit unit,
            EntryPoint entryPoint,
            LLVMBuilderRef builder,
            PTXDeviceFunctions cudaDeviceFunctions,
            LLVMValueRef globalIndexValue,
            LLVMValueRef userIndexRange)
        {
            Debug.Assert(entryPoint.Type >= IndexType.Index1D && entryPoint.Type < IndexType.GroupedIndex1D);

            LLVMValueRef comparisonValue = ConstInt(unit.LLVMContext.Int1Type, 1, false);

            for (int i = 0, e = (int)entryPoint.Type; i <= e; ++i)
            {
                var compareResult = BuildICmp(
                    builder,
                    LLVMIntPredicate.LLVMIntSLT,
                    BuildExtractValue(builder, globalIndexValue, 0, "GlobalIdx_" + i),
                    BuildExtractValue(builder, userIndexRange, 0, "UserRange_" + i),
                    "InRange_" + i);
                comparisonValue = BuildAnd(
                    builder,
                    comparisonValue,
                    compareResult,
                    "RangeOr_" + i);
            }

            return(comparisonValue);
        }
        /// <summary>
        /// Creates an <see cref="Index3"/> in the LLVM world containing the current group-thread indices.
        /// </summary>
        /// <param name="unit">The target unit.</param>
        /// <param name="entryPoint">The entry point.</param>
        /// <param name="builder">The LLVM builder.</param>
        /// <param name="cudaDeviceFunctions">A reference to the cuda device functions.</param>
        /// <returns>An <see cref="Index3"/> in the LLVM world containg the current group-thread indices.</returns>
        private static LLVMValueRef CreateGroupIndexValue(
            CompileUnit unit,
            EntryPoint entryPoint,
            LLVMBuilderRef builder,
            PTXDeviceFunctions cudaDeviceFunctions)
        {
            var indexType        = unit.GetType(entryPoint.UngroupedIndexType);
            var threadIndexValue = GetUndef(indexType);

            Debug.Assert(entryPoint.Type >= IndexType.Index1D);

            var isGroupedIndex = entryPoint.IsGroupedIndexEntry;

            threadIndexValue = BuildInsertValue(builder, threadIndexValue, BuildCall(
                                                    builder, cudaDeviceFunctions.GetThreadIdxX.Value), 0, "TIdx1");

            if (entryPoint.Type >= IndexType.Index2D && !isGroupedIndex || entryPoint.Type >= IndexType.GroupedIndex2D)
            {
                threadIndexValue = BuildInsertValue(builder, threadIndexValue, BuildCall(
                                                        builder, cudaDeviceFunctions.GetThreadIdxY.Value), 1, "TIdx2");
            }
            if (entryPoint.Type >= IndexType.Index3D && !isGroupedIndex || entryPoint.Type >= IndexType.GroupedIndex3D)
            {
                threadIndexValue = BuildInsertValue(builder, threadIndexValue, BuildCall(
                                                        builder, cudaDeviceFunctions.GetThreadIdxZ.Value), 2, "TIdx3");
            }

            return(threadIndexValue);
        }
        /// <summary>
        /// Creates an <see cref="Index3"/> in the LLVM world containing the current grid indices.
        /// </summary>
        /// <param name="unit">The target unit.</param>
        /// <param name="entryPoint">The entry point.</param>
        /// <param name="builder">The LLVM builder.</param>
        /// <param name="cudaDeviceFunctions">A reference to the cuda device functions.</param>
        /// <returns>An <see cref="Index3"/> in the LLVM world containg the current grid indices.</returns>
        private static LLVMValueRef CreateIndexValue(
            CompileUnit unit,
            EntryPoint entryPoint,
            LLVMBuilderRef builder,
            PTXDeviceFunctions cudaDeviceFunctions)
        {
            var indexType  = unit.GetType(entryPoint.UngroupedIndexType);
            var indexValue = GetUndef(indexType);

            Debug.Assert(entryPoint.Type >= IndexType.Index1D);

            indexValue = BuildInsertValue(builder, indexValue, BuildCall(
                                              builder, cudaDeviceFunctions.GetBlockIdxX.Value), 0, "Idx1");

            if (entryPoint.Type >= IndexType.Index2D && entryPoint.Type <= IndexType.Index3D ||
                entryPoint.Type >= IndexType.GroupedIndex2D)
            {
                indexValue = BuildInsertValue(builder, indexValue, BuildCall(
                                                  builder, cudaDeviceFunctions.GetBlockIdxY.Value), 1, "Idx2");
            }
            if (entryPoint.Type == IndexType.Index3D || entryPoint.Type == IndexType.GroupedIndex3D)
            {
                indexValue = BuildInsertValue(builder, indexValue, BuildCall(
                                                  builder, cudaDeviceFunctions.GetBlockIdxZ.Value), 2, "Idx3");
            }

            return(indexValue);
        }
        /// <summary cref="Backend.TargetUnit(CompileUnit)"/>
        internal override void TargetUnit(CompileUnit unit)
        {
            var module       = unit.LLVMModule;
            var dataLayout   = GetLLVMLayout(Platform);
            var targetTriple = GetLLVMTriple(Platform);

            SetDataLayout(module, dataLayout);
            SetTarget(module, targetTriple);

            if (CreateMemoryBufferWithContentsOfFile(LibDevicePath, out LLVMMemoryBufferRef libDeviceBuffer, out IntPtr errorMessage))
            {
                throw new InvalidOperationException(string.Format(
                                                        ErrorMessages.CouldNotReadLibDevice, Marshal.PtrToStringAnsi(errorMessage)));
            }
            if (GetBitcodeModuleInContext(unit.LLVMContext, libDeviceBuffer, out LLVMModuleRef libDeviceModule, out errorMessage))
            {
                throw new InvalidOperationException(string.Format(
                                                        ErrorMessages.CouldNotLoadLibDevice, Marshal.PtrToStringAnsi(errorMessage)));
            }
            SetDataLayout(libDeviceModule, dataLayout);
            SetTarget(libDeviceModule, targetTriple);
            LinkModules2(module, libDeviceModule);

            var functions = new PTXDeviceFunctions(unit);

            ptxDeviceFunctions.Add(unit, functions);
            unit.RegisterDeviceFunctions(functions);
        }
        /// <summary>
        /// Creates an <see cref="Index3"/> in the LLVM world containing the current global indices
        /// (gridIdx * blockDim + blockIdx).
        /// </summary>
        /// <param name="unit">The target unit.</param>
        /// <param name="entryPoint">The entry point.</param>
        /// <param name="builder">The LLVM builder.</param>
        /// <param name="cudaDeviceFunctions">A reference to the cuda device functions.</param>
        /// <param name="indexValue">The current grid-index value (gridIdx).</param>
        /// <param name="groupIndexValue">The current group-thread-index value (blockIdx).</param>
        /// <returns>An <see cref="Index3"/> in the LLVM world containg the current global indices.</returns>
        private static LLVMValueRef CreateGlobalIndexValue(
            CompileUnit unit,
            EntryPoint entryPoint,
            LLVMBuilderRef builder,
            PTXDeviceFunctions cudaDeviceFunctions,
            LLVMValueRef indexValue,
            LLVMValueRef groupIndexValue)
        {
            var indexType        = unit.GetType(entryPoint.UngroupedIndexType);
            var globalIndexValue = GetUndef(indexType);

            Debug.Assert(entryPoint.Type >= IndexType.Index1D && entryPoint.Type < IndexType.GroupedIndex1D);
            var blockDimensions = cudaDeviceFunctions.GetBlockDimensions;

            for (int i = 0, e = (int)entryPoint.Type; i < e; ++i)
            {
                var globalGroupOffset = BuildMul(
                    builder,
                    BuildExtractValue(
                        builder,
                        indexValue,
                        i,
                        "GridIdx_" + i),
                    BuildCall(
                        builder,
                        blockDimensions[i].Value),
                    "GlobalGroupOffset_" + i);

                var globalIdx = BuildAdd(
                    builder,
                    globalGroupOffset,
                    BuildExtractValue(
                        builder,
                        groupIndexValue,
                        i,
                        "GroupIdx_" + i),
                    "GlobalIdxVal_" + i);

                globalIndexValue = BuildInsertValue(
                    builder,
                    globalIndexValue,
                    globalIdx,
                    i,
                    "GlobalIdx_" + i);
            }

            return(globalIndexValue);
        }
        /// <summary>
        /// Creates an instance of an <see cref="IGroupedIndex{TIndex}"/> in the LLVM world.
        /// </summary>
        /// <param name="unit">The target unit.</param>
        /// <param name="entryPoint">The entry point.</param>
        /// <param name="builder">The LLVM builder.</param>
        /// <param name="cudaDeviceFunctions">A reference to the cuda device functions.</param>
        /// <param name="indexValue">The current index values (first part of a grouped index).</param>
        /// <param name="groupIndexValue">The current group-index values (second part of a grouped index).</param>
        /// <returns>An instance of an <see cref="IGroupedIndex{TIndex}"/> in the LLVM world.</returns>
        private static LLVMValueRef CreateGroupedIndex(
            CompileUnit unit,
            EntryPoint entryPoint,
            LLVMBuilderRef builder,
            PTXDeviceFunctions cudaDeviceFunctions,
            LLVMValueRef indexValue,
            LLVMValueRef groupIndexValue)
        {
            Debug.Assert(entryPoint.Type >= IndexType.GroupedIndex1D);

            // Create a new blocked index
            var blockIndexValue = GetUndef(unit.GetType(entryPoint.KernelIndexType));

            blockIndexValue = BuildInsertValue(builder, blockIndexValue, indexValue, 0, "GridIdx");
            blockIndexValue = BuildInsertValue(builder, blockIndexValue, groupIndexValue, 1, "GroupIdx");
            return(blockIndexValue);
        }