/// <summary> /// Generates a dynamic kernel-launcher method that will be just-in-time compiled /// during the first invocation. Using the generated launcher lowers the overhead /// for kernel launching dramatically, since unnecessary operations (like boxing) /// can be avoided. /// </summary> /// <param name="kernel">The kernel to generate a launcher for.</param> /// <param name="customGroupSize">The custom group size for the launching operation.</param> /// <returns>The generated launcher method.</returns> private MethodInfo GenerateKernelLauncherMethod(ILCompiledKernel kernel, int customGroupSize) { var entryPoint = kernel.EntryPoint; AdjustAndVerifyKernelGroupSize(ref customGroupSize, entryPoint); var launcher = entryPoint.CreateLauncherMethod(Context); var emitter = new ILEmitter(launcher.ILGenerator); var cpuKernel = emitter.DeclareLocal(typeof(CPUKernel)); KernelLauncherBuilder.EmitLoadKernelArgument <CPUKernel, ILEmitter>( Kernel.KernelInstanceParamIdx, emitter); emitter.Emit(LocalOperation.Store, cpuKernel); // Create an instance of the custom task type var task = emitter.DeclareLocal(kernel.TaskType); { var sharedMemSize = KernelLauncherBuilder.EmitSharedMemorySizeComputation(entryPoint, emitter); emitter.Emit(LocalOperation.Load, cpuKernel); emitter.EmitCall( typeof(CPUKernel).GetProperty( nameof(CPUKernel.KernelExecutionDelegate), BindingFlags.NonPublic | BindingFlags.Public | BindingFlags.Instance).GetGetMethod(true)); // Load custom user dimension KernelLauncherBuilder.EmitLoadDimensions( entryPoint, emitter, Kernel.KernelParamDimensionIdx, () => emitter.EmitNewObject( typeof(Index3).GetConstructor( new Type[] { typeof(int), typeof(int), typeof(int) }))); // Load dimensions as index3 arguments KernelLauncherBuilder.EmitLoadDimensions( entryPoint, emitter, Kernel.KernelParamDimensionIdx, () => emitter.EmitNewObject( typeof(Index3).GetConstructor( new Type[] { typeof(int), typeof(int), typeof(int) })), customGroupSize); // Load shared-memory size emitter.Emit(LocalOperation.Load, sharedMemSize); // Create new task object emitter.EmitNewObject(kernel.TaskConstructor); // Store task emitter.Emit(LocalOperation.Store, task); } // Assign parameters var parameters = entryPoint.Parameters; for (int i = 0, e = parameters.NumParameters; i < e; ++i) { emitter.Emit(LocalOperation.Load, task); emitter.Emit(ArgumentOperation.Load, i + Kernel.KernelParameterOffset); if (parameters.IsByRef(i)) { emitter.Emit(OpCodes.Ldobj, parameters[i]); } emitter.Emit(OpCodes.Stfld, kernel.TaskArgumentMapping[i]); } // Launch task: ((CPUKernel)kernel).CPUAccelerator.Launch(task); emitter.Emit(LocalOperation.Load, cpuKernel); emitter.EmitCall( typeof(CPUKernel).GetProperty( nameof(CPUKernel.CPUAccelerator)).GetGetMethod(false)); emitter.Emit(LocalOperation.Load, task); emitter.EmitCall( typeof(CPUAccelerator).GetMethod( nameof(CPUAccelerator.Launch), BindingFlags.NonPublic | BindingFlags.Instance)); // End of launch method emitter.Emit(OpCodes.Ret); emitter.Finish(); return(launcher.Finish()); }
/// <summary> /// Generates a dynamic kernel-launcher method that will be just-in-time compiled /// during the first invocation. Using the generated launcher lowers the overhead /// for kernel launching dramatically, since unnecessary operations (like boxing) /// can be avoided. /// </summary> /// <param name="kernel">The kernel to generate a launcher for.</param> /// <param name="taskType">The created task.</param> /// <param name="taskArgumentMapping">The created task-argument mapping that maps parameter indices of uniforms</param> /// <param name="customGroupSize">The custom group size for the launching operation.</param> /// <returns>The generated launcher method.</returns> private MethodInfo GenerateKernelLauncherMethod( CompiledKernel kernel, out Type taskType, out FieldInfo[] taskArgumentMapping, int customGroupSize) { var entryPoint = kernel.EntryPoint; AdjustAndVerifyKernelGroupSize(ref customGroupSize, entryPoint); var uniformVariables = entryPoint.UniformVariables; var numUniformVariables = uniformVariables.Length; var kernelParamTypes = entryPoint.CreateCustomParameterTypes(); int numKernelParams = kernelParamTypes.Length; var funcParamTypes = new Type[numKernelParams + Kernel.KernelParameterOffset]; GenerateAcceleratorTask( kernel, kernelParamTypes, out taskType, out ConstructorInfo taskConstructor, out taskArgumentMapping); // Launcher(Kernel, AcceleratorStream, [Index], ...) funcParamTypes[Kernel.KernelInstanceParamIdx] = typeof(Kernel); funcParamTypes[Kernel.KernelStreamParamIdx] = typeof(AcceleratorStream); funcParamTypes[Kernel.KernelParamDimensionIdx] = entryPoint.KernelIndexType; kernelParamTypes.CopyTo(funcParamTypes, Kernel.KernelParameterOffset); // Create the actual launcher method var func = new DynamicMethod( kernel.EntryName, typeof(void), funcParamTypes, typeof(Kernel)); var funcParams = func.GetParameters(); var ilGenerator = func.GetILGenerator(); var cpuKernel = ilGenerator.DeclareLocal(typeof(CPUKernel)); KernelLauncherBuilder.EmitLoadKernelArgument <CPUKernel>(Kernel.KernelInstanceParamIdx, ilGenerator); ilGenerator.Emit(OpCodes.Stloc, cpuKernel); // Create an instance of the custom task type var task = ilGenerator.DeclareLocal(taskType); { var sharedMemSize = KernelLauncherBuilder.EmitSharedMemorySizeComputation( entryPoint, ilGenerator, paramIdx => funcParams[paramIdx + Kernel.KernelParameterOffset]); ilGenerator.Emit(OpCodes.Ldloc, cpuKernel); ilGenerator.Emit( OpCodes.Call, typeof(CPUKernel).GetProperty( nameof(CPUKernel.KernelExecutionDelegate), BindingFlags.NonPublic | BindingFlags.Public | BindingFlags.Instance).GetGetMethod(true)); // Load custom user dimension KernelLauncherBuilder.EmitLoadDimensions( entryPoint, ilGenerator, Kernel.KernelParamDimensionIdx, () => ilGenerator.Emit( OpCodes.Newobj, typeof(Index3).GetConstructor( new Type[] { typeof(int), typeof(int), typeof(int) }))); // Load dimensions as index3 arguments KernelLauncherBuilder.EmitLoadDimensions( entryPoint, ilGenerator, Kernel.KernelParamDimensionIdx, () => ilGenerator.Emit( OpCodes.Newobj, typeof(Index3).GetConstructor( new Type[] { typeof(int), typeof(int), typeof(int) })), customGroupSize); // Load shared-memory size ilGenerator.Emit(OpCodes.Ldloc, sharedMemSize); // Create new task object ilGenerator.Emit(OpCodes.Newobj, taskConstructor); // Store task ilGenerator.Emit(OpCodes.Stloc, task); } // Assign parameters for (int i = 0; i < numUniformVariables; ++i) { ilGenerator.Emit(OpCodes.Ldloc, task); ilGenerator.Emit(OpCodes.Ldarg, i + Kernel.KernelParameterOffset); ilGenerator.Emit(OpCodes.Stfld, taskArgumentMapping[i]); } // Launch task: ((CPUKernel)kernel).CPUAccelerator.Launch(task); ilGenerator.Emit(OpCodes.Ldloc, cpuKernel); ilGenerator.Emit( OpCodes.Call, typeof(CPUKernel).GetProperty( nameof(CPUKernel.CPUAccelerator)).GetGetMethod(false)); ilGenerator.Emit(OpCodes.Ldloc, task); ilGenerator.Emit( OpCodes.Call, typeof(CPUAccelerator).GetMethod( nameof(CPUAccelerator.Launch), BindingFlags.NonPublic | BindingFlags.Instance)); // End of launch method ilGenerator.Emit(OpCodes.Ret); return(func); }