Ejemplo n.º 1
0
        /// <summary cref="KernelAccelerator{TCompiledKernel, TKernel}
        /// .GenerateKernelLauncherMethod(TCompiledKernel, int)"/>
        protected override MethodInfo GenerateKernelLauncherMethod(
            CLCompiledKernel kernel,
            int customGroupSize)
        {
            var entryPoint = kernel.EntryPoint;

            AdjustAndVerifyKernelGroupSize(ref customGroupSize, entryPoint);

            // Add support for by ref parameters
            if (entryPoint.HasByRefParameters)
            {
                throw new NotSupportedException(
                          ErrorMessages.NotSupportedByRefKernelParameters);
            }

            var launcher = entryPoint.CreateLauncherMethod(Context);
            var emitter  = new ILEmitter(launcher.ILGenerator);

            // Load kernel instance
            var kernelLocal = emitter.DeclareLocal(typeof(CLKernel));

            KernelLauncherBuilder.EmitLoadKernelArgument <CLKernel, ILEmitter>(
                Kernel.KernelInstanceParamIdx,
                emitter);
            emitter.Emit(LocalOperation.Store, kernelLocal);

            // Map all kernel arguments
            var argumentMapper = Backend.ArgumentMapper;

            argumentMapper.Map(emitter, kernelLocal, entryPoint);

            // Load stream
            KernelLauncherBuilder.EmitLoadAcceleratorStream <CLStream, ILEmitter>(
                Kernel.KernelStreamParamIdx,
                emitter);

            // Load kernel
            emitter.Emit(LocalOperation.Load, kernelLocal);

            // Load dimensions
            KernelLauncherBuilder.EmitLoadRuntimeKernelConfig(
                entryPoint,
                emitter,
                Kernel.KernelParamDimensionIdx,
                customGroupSize);

            // Dispatch kernel
            emitter.EmitCall(LaunchKernelMethod);

            // Emit ThrowIfFailed
            emitter.EmitCall(ThrowIfFailedMethod);

            emitter.Emit(OpCodes.Ret);
            emitter.Finish();

            return(launcher.Finish());
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Generates a dynamic kernel-launcher method that will be just-in-time compiled
        /// during the first invocation. Using the generated launcher lowers the overhead
        /// for kernel launching dramatically, since unnecessary operations (like boxing)
        /// can be avoided.
        /// </summary>
        /// <param name="kernel">The kernel to generate a launcher for.</param>
        /// <param name="customGroupSize">The custom group size for the launching operation.</param>
        /// <returns>The generated launcher method.</returns>
        private MethodInfo GenerateKernelLauncherMethod(ILCompiledKernel kernel, int customGroupSize)
        {
            var entryPoint = kernel.EntryPoint;

            AdjustAndVerifyKernelGroupSize(ref customGroupSize, entryPoint);

            var launcher = entryPoint.CreateLauncherMethod(Context);
            var emitter  = new ILEmitter(launcher.ILGenerator);

            var cpuKernel = emitter.DeclareLocal(typeof(CPUKernel));

            KernelLauncherBuilder.EmitLoadKernelArgument <CPUKernel, ILEmitter>(
                Kernel.KernelInstanceParamIdx, emitter);
            emitter.Emit(LocalOperation.Store, cpuKernel);

            // Create an instance of the custom task type
            var task = emitter.DeclareLocal(kernel.TaskType);
            {
                var sharedMemSize = KernelLauncherBuilder.EmitSharedMemorySizeComputation(entryPoint, emitter);

                emitter.Emit(LocalOperation.Load, cpuKernel);
                emitter.EmitCall(
                    typeof(CPUKernel).GetProperty(
                        nameof(CPUKernel.KernelExecutionDelegate),
                        BindingFlags.NonPublic | BindingFlags.Public | BindingFlags.Instance).GetGetMethod(true));

                // Load custom user dimension
                KernelLauncherBuilder.EmitLoadDimensions(
                    entryPoint,
                    emitter,
                    Kernel.KernelParamDimensionIdx,
                    () => emitter.EmitNewObject(
                        typeof(Index3).GetConstructor(
                            new Type[] { typeof(int), typeof(int), typeof(int) })));

                // Load dimensions as index3 arguments
                KernelLauncherBuilder.EmitLoadDimensions(
                    entryPoint,
                    emitter,
                    Kernel.KernelParamDimensionIdx,
                    () => emitter.EmitNewObject(
                        typeof(Index3).GetConstructor(
                            new Type[] { typeof(int), typeof(int), typeof(int) })),
                    customGroupSize);

                // Load shared-memory size
                emitter.Emit(LocalOperation.Load, sharedMemSize);

                // Create new task object
                emitter.EmitNewObject(kernel.TaskConstructor);

                // Store task
                emitter.Emit(LocalOperation.Store, task);
            }

            // Assign parameters
            var parameters = entryPoint.Parameters;

            for (int i = 0, e = parameters.NumParameters; i < e; ++i)
            {
                emitter.Emit(LocalOperation.Load, task);
                emitter.Emit(ArgumentOperation.Load, i + Kernel.KernelParameterOffset);
                if (parameters.IsByRef(i))
                {
                    emitter.Emit(OpCodes.Ldobj, parameters[i]);
                }
                emitter.Emit(OpCodes.Stfld, kernel.TaskArgumentMapping[i]);
            }

            // Launch task: ((CPUKernel)kernel).CPUAccelerator.Launch(task);
            emitter.Emit(LocalOperation.Load, cpuKernel);
            emitter.EmitCall(
                typeof(CPUKernel).GetProperty(
                    nameof(CPUKernel.CPUAccelerator)).GetGetMethod(false));
            emitter.Emit(LocalOperation.Load, task);
            emitter.EmitCall(
                typeof(CPUAccelerator).GetMethod(
                    nameof(CPUAccelerator.Launch),
                    BindingFlags.NonPublic | BindingFlags.Instance));

            // End of launch method
            emitter.Emit(OpCodes.Ret);
            emitter.Finish();

            return(launcher.Finish());
        }
Ejemplo n.º 3
0
        /// <summary cref="KernelAccelerator{TCompiledKernel, TKernel}
        /// .GenerateKernelLauncherMethod(TCompiledKernel, int)"/>
        protected override MethodInfo GenerateKernelLauncherMethod(
            CLCompiledKernel kernel,
            int customGroupSize)
        {
            var entryPoint = kernel.EntryPoint;

            AdjustAndVerifyKernelGroupSize(ref customGroupSize, entryPoint);

            // Add support for by ref parameters
            if (entryPoint.HasByRefParameters)
            {
                throw new NotSupportedException(
                          ErrorMessages.NotSupportedByRefKernelParameters);
            }

            using var scopedLock = entryPoint.CreateLauncherMethod(
                      Context.RuntimeSystem,
                      out var launcher);
            var emitter = new ILEmitter(launcher.ILGenerator);

            // Load kernel instance
            var kernelLocal = emitter.DeclareLocal(typeof(CLKernel));

            KernelLauncherBuilder.EmitLoadKernelArgument <CLKernel, ILEmitter>(
                Kernel.KernelInstanceParamIdx,
                emitter);
            emitter.Emit(LocalOperation.Store, kernelLocal);

            // Map all kernel arguments
            var argumentMapper = Backend.ArgumentMapper;

            argumentMapper.Map(
                emitter,
                kernelLocal,
                Context.TypeContext,
                entryPoint);

            // Load current driver API
            emitter.EmitCall(GetCLAPIMethod);

            // Load stream
            KernelLauncherBuilder.EmitLoadAcceleratorStream <CLStream, ILEmitter>(
                Kernel.KernelStreamParamIdx,
                emitter);

            // Load kernel
            emitter.Emit(LocalOperation.Load, kernelLocal);

            // Load dimensions
            KernelLauncherBuilder.EmitLoadRuntimeKernelConfig(
                entryPoint,
                emitter,
                Kernel.KernelParamDimensionIdx,
                MaxGridSize,
                MaxGroupSize,
                customGroupSize);

            // Dispatch kernel
            var launchMethod = GenericLaunchKernelMethod.MakeGenericMethod(
                entryPoint.SharedMemory.HasDynamicMemory
                ? typeof(DynamicSharedMemoryHandler)
                : typeof(DefaultLaunchHandler));

            emitter.EmitCall(launchMethod);

            // Emit ThrowIfFailed
            emitter.EmitCall(ThrowIfFailedMethod);

            emitter.Emit(OpCodes.Ret);
            emitter.Finish();

            return(launcher.Finish());
        }
Ejemplo n.º 4
0
        /// <summary>
        /// Generates a dynamic kernel-launcher method that will be just-in-time compiled
        /// during the first invocation. Using the generated launcher lowers the overhead
        /// for kernel launching dramatically, since unnecessary operations (like boxing)
        /// can be avoided.
        /// </summary>
        /// <param name="kernel">The kernel to generate a launcher for.</param>
        /// <param name="customGroupSize">
        /// The custom group size for the launching operation.
        /// </param>
        /// <returns>The generated launcher method.</returns>
        private MethodInfo GenerateKernelLauncherMethod(
            ILCompiledKernel kernel,
            int customGroupSize)
        {
            var entryPoint = kernel.EntryPoint;

            AdjustAndVerifyKernelGroupSize(ref customGroupSize, entryPoint);

            // Add support for by ref parameters
            if (entryPoint.HasByRefParameters)
            {
                throw new NotSupportedException(
                          ErrorMessages.NotSupportedByRefKernelParameters);
            }

            using var scopedLock = entryPoint.CreateLauncherMethod(
                      Context.RuntimeSystem,
                      out var launcher);
            var emitter = new ILEmitter(launcher.ILGenerator);

            // Pretend to map kernel arguments (like a GPU accelerator would perform).
            var argumentMapper = Backend.ArgumentMapper;

            argumentMapper.Map(entryPoint);

            var cpuKernel = emitter.DeclareLocal(typeof(CPUKernel));

            KernelLauncherBuilder.EmitLoadKernelArgument <CPUKernel, ILEmitter>(
                Kernel.KernelInstanceParamIdx, emitter);
            emitter.Emit(LocalOperation.Store, cpuKernel);

            // Create an instance of the custom task type
            var task = emitter.DeclareLocal(kernel.TaskType);
            {
                emitter.Emit(LocalOperation.Load, cpuKernel);
                emitter.EmitCall(CPUKernel.GetKernelExecutionDelegate);

                // Load custom user dimension
                KernelLauncherBuilder.EmitLoadKernelConfig(
                    entryPoint,
                    emitter,
                    Kernel.KernelParamDimensionIdx,
                    MaxGridSize,
                    MaxGroupSize);

                // Load dimensions
                KernelLauncherBuilder.EmitLoadRuntimeKernelConfig(
                    entryPoint,
                    emitter,
                    Kernel.KernelParamDimensionIdx,
                    MaxGridSize,
                    MaxGroupSize,
                    customGroupSize);

                // Create new task object
                emitter.EmitNewObject(kernel.TaskConstructor);

                // Store task
                emitter.Emit(LocalOperation.Store, task);
            }

            // Assign parameters
            var parameters = entryPoint.Parameters;

            for (int i = 0, e = parameters.Count; i < e; ++i)
            {
                emitter.Emit(LocalOperation.Load, task);
                emitter.Emit(ArgumentOperation.Load, i + Kernel.KernelParameterOffset);
                if (parameters.IsByRef(i))
                {
                    emitter.Emit(OpCodes.Ldobj, parameters[i]);
                }
                emitter.Emit(OpCodes.Stfld, kernel.TaskArgumentMapping[i]);
            }

            // Launch task: ((CPUKernel)kernel).CPUAccelerator.Launch(task);
            emitter.Emit(LocalOperation.Load, cpuKernel);
            emitter.EmitCall(
                typeof(CPUKernel).GetProperty(
                    nameof(CPUKernel.CPUAccelerator)).GetGetMethod(false));
            emitter.Emit(LocalOperation.Load, task);
            emitter.EmitCall(
                typeof(CPUAccelerator).GetMethod(
                    nameof(CPUAccelerator.Launch),
                    BindingFlags.NonPublic | BindingFlags.Instance));

            // End of launch method
            emitter.Emit(OpCodes.Ret);
            emitter.Finish();

            return(launcher.Finish());
        }
Ejemplo n.º 5
0
        /// <summary>
        /// Generates a dynamic kernel-launcher method that will be just-in-time compiled
        /// during the first invocation. Using the generated launcher lowers the overhead
        /// for kernel launching dramatically, since unnecessary operations (like boxing)
        /// can be avoided.
        /// </summary>
        /// <param name="kernel">The kernel to generate a launcher for.</param>
        /// <param name="taskType">The created task.</param>
        /// <param name="taskArgumentMapping">The created task-argument mapping that maps parameter indices of uniforms</param>
        /// <param name="customGroupSize">The custom group size for the launching operation.</param>
        /// <returns>The generated launcher method.</returns>
        private MethodInfo GenerateKernelLauncherMethod(
            CompiledKernel kernel,
            out Type taskType,
            out FieldInfo[] taskArgumentMapping,
            int customGroupSize)
        {
            var entryPoint = kernel.EntryPoint;

            AdjustAndVerifyKernelGroupSize(ref customGroupSize, entryPoint);

            var uniformVariables    = entryPoint.UniformVariables;
            var numUniformVariables = uniformVariables.Length;

            var kernelParamTypes = entryPoint.CreateCustomParameterTypes();
            int numKernelParams  = kernelParamTypes.Length;
            var funcParamTypes   = new Type[numKernelParams + Kernel.KernelParameterOffset];

            GenerateAcceleratorTask(
                kernel,
                kernelParamTypes,
                out taskType,
                out ConstructorInfo taskConstructor,
                out taskArgumentMapping);

            // Launcher(Kernel, AcceleratorStream, [Index], ...)
            funcParamTypes[Kernel.KernelInstanceParamIdx]  = typeof(Kernel);
            funcParamTypes[Kernel.KernelStreamParamIdx]    = typeof(AcceleratorStream);
            funcParamTypes[Kernel.KernelParamDimensionIdx] = entryPoint.KernelIndexType;
            kernelParamTypes.CopyTo(funcParamTypes, Kernel.KernelParameterOffset);

            // Create the actual launcher method
            var func = new DynamicMethod(
                kernel.EntryName,
                typeof(void),
                funcParamTypes,
                typeof(Kernel));
            var funcParams  = func.GetParameters();
            var ilGenerator = func.GetILGenerator();

            var cpuKernel = ilGenerator.DeclareLocal(typeof(CPUKernel));

            KernelLauncherBuilder.EmitLoadKernelArgument <CPUKernel>(Kernel.KernelInstanceParamIdx, ilGenerator);
            ilGenerator.Emit(OpCodes.Stloc, cpuKernel);

            // Create an instance of the custom task type
            var task = ilGenerator.DeclareLocal(taskType);

            {
                var sharedMemSize = KernelLauncherBuilder.EmitSharedMemorySizeComputation(
                    entryPoint,
                    ilGenerator,
                    paramIdx => funcParams[paramIdx + Kernel.KernelParameterOffset]);

                ilGenerator.Emit(OpCodes.Ldloc, cpuKernel);
                ilGenerator.Emit(
                    OpCodes.Call,
                    typeof(CPUKernel).GetProperty(
                        nameof(CPUKernel.KernelExecutionDelegate),
                        BindingFlags.NonPublic | BindingFlags.Public | BindingFlags.Instance).GetGetMethod(true));

                // Load custom user dimension
                KernelLauncherBuilder.EmitLoadDimensions(
                    entryPoint,
                    ilGenerator,
                    Kernel.KernelParamDimensionIdx,
                    () => ilGenerator.Emit(
                        OpCodes.Newobj,
                        typeof(Index3).GetConstructor(
                            new Type[] { typeof(int), typeof(int), typeof(int) })));

                // Load dimensions as index3 arguments
                KernelLauncherBuilder.EmitLoadDimensions(
                    entryPoint,
                    ilGenerator,
                    Kernel.KernelParamDimensionIdx,
                    () => ilGenerator.Emit(
                        OpCodes.Newobj,
                        typeof(Index3).GetConstructor(
                            new Type[] { typeof(int), typeof(int), typeof(int) })),
                    customGroupSize);

                // Load shared-memory size
                ilGenerator.Emit(OpCodes.Ldloc, sharedMemSize);

                // Create new task object
                ilGenerator.Emit(OpCodes.Newobj, taskConstructor);

                // Store task
                ilGenerator.Emit(OpCodes.Stloc, task);
            }

            // Assign parameters
            for (int i = 0; i < numUniformVariables; ++i)
            {
                ilGenerator.Emit(OpCodes.Ldloc, task);
                ilGenerator.Emit(OpCodes.Ldarg, i + Kernel.KernelParameterOffset);
                ilGenerator.Emit(OpCodes.Stfld, taskArgumentMapping[i]);
            }

            // Launch task: ((CPUKernel)kernel).CPUAccelerator.Launch(task);
            ilGenerator.Emit(OpCodes.Ldloc, cpuKernel);
            ilGenerator.Emit(
                OpCodes.Call,
                typeof(CPUKernel).GetProperty(
                    nameof(CPUKernel.CPUAccelerator)).GetGetMethod(false));
            ilGenerator.Emit(OpCodes.Ldloc, task);
            ilGenerator.Emit(
                OpCodes.Call,
                typeof(CPUAccelerator).GetMethod(
                    nameof(CPUAccelerator.Launch),
                    BindingFlags.NonPublic | BindingFlags.Instance));

            // End of launch method
            ilGenerator.Emit(OpCodes.Ret);

            return(func);
        }