/// <summary> /// Generates a dynamic kernel-launcher method that will be just-in-time compiled /// during the first invocation. Using the generated launcher lowers the overhead /// for kernel launching dramatically, since unnecessary operations (like boxing) /// can be avoided. /// </summary> /// <param name="kernel">The kernel to generate a launcher for.</param> /// <param name="customGroupSize">The custom group size for the launching operation.</param> /// <returns>The generated launcher method.</returns> private MethodInfo GenerateKernelLauncherMethod(ILCompiledKernel kernel, int customGroupSize) { var entryPoint = kernel.EntryPoint; AdjustAndVerifyKernelGroupSize(ref customGroupSize, entryPoint); var launcher = entryPoint.CreateLauncherMethod(Context); var emitter = new ILEmitter(launcher.ILGenerator); var cpuKernel = emitter.DeclareLocal(typeof(CPUKernel)); KernelLauncherBuilder.EmitLoadKernelArgument <CPUKernel, ILEmitter>( Kernel.KernelInstanceParamIdx, emitter); emitter.Emit(LocalOperation.Store, cpuKernel); // Create an instance of the custom task type var task = emitter.DeclareLocal(kernel.TaskType); { var sharedMemSize = KernelLauncherBuilder.EmitSharedMemorySizeComputation(entryPoint, emitter); emitter.Emit(LocalOperation.Load, cpuKernel); emitter.EmitCall( typeof(CPUKernel).GetProperty( nameof(CPUKernel.KernelExecutionDelegate), BindingFlags.NonPublic | BindingFlags.Public | BindingFlags.Instance).GetGetMethod(true)); // Load custom user dimension KernelLauncherBuilder.EmitLoadDimensions( entryPoint, emitter, Kernel.KernelParamDimensionIdx, () => emitter.EmitNewObject( typeof(Index3).GetConstructor( new Type[] { typeof(int), typeof(int), typeof(int) }))); // Load dimensions as index3 arguments KernelLauncherBuilder.EmitLoadDimensions( entryPoint, emitter, Kernel.KernelParamDimensionIdx, () => emitter.EmitNewObject( typeof(Index3).GetConstructor( new Type[] { typeof(int), typeof(int), typeof(int) })), customGroupSize); // Load shared-memory size emitter.Emit(LocalOperation.Load, sharedMemSize); // Create new task object emitter.EmitNewObject(kernel.TaskConstructor); // Store task emitter.Emit(LocalOperation.Store, task); } // Assign parameters var parameters = entryPoint.Parameters; for (int i = 0, e = parameters.NumParameters; i < e; ++i) { emitter.Emit(LocalOperation.Load, task); emitter.Emit(ArgumentOperation.Load, i + Kernel.KernelParameterOffset); if (parameters.IsByRef(i)) { emitter.Emit(OpCodes.Ldobj, parameters[i]); } emitter.Emit(OpCodes.Stfld, kernel.TaskArgumentMapping[i]); } // Launch task: ((CPUKernel)kernel).CPUAccelerator.Launch(task); emitter.Emit(LocalOperation.Load, cpuKernel); emitter.EmitCall( typeof(CPUKernel).GetProperty( nameof(CPUKernel.CPUAccelerator)).GetGetMethod(false)); emitter.Emit(LocalOperation.Load, task); emitter.EmitCall( typeof(CPUAccelerator).GetMethod( nameof(CPUAccelerator.Launch), BindingFlags.NonPublic | BindingFlags.Instance)); // End of launch method emitter.Emit(OpCodes.Ret); emitter.Finish(); return(launcher.Finish()); }
/// <summary> /// Generates a dynamic kernel-launcher method that will be just-in-time compiled /// during the first invocation. Using the generated launcher lowers the overhead /// for kernel launching dramatically, since unnecessary operations (like boxing) /// can be avoided. /// </summary> /// <param name="kernel">The kernel to generate a launcher for.</param> /// <param name="customGroupSize"> /// The custom group size for the launching operation. /// </param> /// <returns>The generated launcher method.</returns> private MethodInfo GenerateKernelLauncherMethod( ILCompiledKernel kernel, int customGroupSize) { var entryPoint = kernel.EntryPoint; AdjustAndVerifyKernelGroupSize(ref customGroupSize, entryPoint); // Add support for by ref parameters if (entryPoint.HasByRefParameters) { throw new NotSupportedException( ErrorMessages.NotSupportedByRefKernelParameters); } using var scopedLock = entryPoint.CreateLauncherMethod( Context.RuntimeSystem, out var launcher); var emitter = new ILEmitter(launcher.ILGenerator); // Pretend to map kernel arguments (like a GPU accelerator would perform). var argumentMapper = Backend.ArgumentMapper; argumentMapper.Map(entryPoint); var cpuKernel = emitter.DeclareLocal(typeof(CPUKernel)); KernelLauncherBuilder.EmitLoadKernelArgument <CPUKernel, ILEmitter>( Kernel.KernelInstanceParamIdx, emitter); emitter.Emit(LocalOperation.Store, cpuKernel); // Create an instance of the custom task type var task = emitter.DeclareLocal(kernel.TaskType); { emitter.Emit(LocalOperation.Load, cpuKernel); emitter.EmitCall(CPUKernel.GetKernelExecutionDelegate); // Load custom user dimension KernelLauncherBuilder.EmitLoadKernelConfig( entryPoint, emitter, Kernel.KernelParamDimensionIdx, MaxGridSize, MaxGroupSize); // Load dimensions KernelLauncherBuilder.EmitLoadRuntimeKernelConfig( entryPoint, emitter, Kernel.KernelParamDimensionIdx, MaxGridSize, MaxGroupSize, customGroupSize); // Create new task object emitter.EmitNewObject(kernel.TaskConstructor); // Store task emitter.Emit(LocalOperation.Store, task); } // Assign parameters var parameters = entryPoint.Parameters; for (int i = 0, e = parameters.Count; i < e; ++i) { emitter.Emit(LocalOperation.Load, task); emitter.Emit(ArgumentOperation.Load, i + Kernel.KernelParameterOffset); if (parameters.IsByRef(i)) { emitter.Emit(OpCodes.Ldobj, parameters[i]); } emitter.Emit(OpCodes.Stfld, kernel.TaskArgumentMapping[i]); } // Launch task: ((CPUKernel)kernel).CPUAccelerator.Launch(task); emitter.Emit(LocalOperation.Load, cpuKernel); emitter.EmitCall( typeof(CPUKernel).GetProperty( nameof(CPUKernel.CPUAccelerator)).GetGetMethod(false)); emitter.Emit(LocalOperation.Load, task); emitter.EmitCall( typeof(CPUAccelerator).GetMethod( nameof(CPUAccelerator.Launch), BindingFlags.NonPublic | BindingFlags.Instance)); // End of launch method emitter.Emit(OpCodes.Ret); emitter.Finish(); return(launcher.Finish()); }