/// <summary> /// Constructs a new kernel key. /// </summary> /// <param name="entry">The entry point description.</param> /// <param name="specialization">The kernel specialization.</param> public CachedCompiledKernelKey( EntryPointDescription entry, KernelSpecialization specialization) { Entry = entry; Specialization = specialization; }
/// <summary> /// Executes the specified kernel with the given arguments. /// </summary> /// <typeparam name="TIndex">The index type.</typeparam> /// <param name="kernel">The kernel method.</param> /// <param name="dimension">The dimension.</param> /// <param name="arguments">The arguments.</param> public void Execute <TIndex>( MethodInfo kernel, TIndex dimension, params object[] arguments) where TIndex : struct, IIndex { using var stream = Accelerator.CreateStream(); // Compile kernel manually and load the compiled kernel into the accelerator var backend = Accelerator.Backend; Output.WriteLine($"Compiling '{kernel.Name}'"); var entryPoint = typeof(TIndex) == typeof(KernelConfig) ? EntryPointDescription.FromExplicitlyGroupedKernel(kernel) : EntryPointDescription.FromImplicitlyGroupedKernel(kernel); var compiled = backend.Compile(entryPoint, new KernelSpecialization()); // Load the compiled kernel Output.WriteLine($"Loading '{kernel.Name}'"); using var acceleratorKernel = Accelerator.LoadKernel(compiled); // Launch the kernel Output.WriteLine($"Launching '{kernel.Name}'"); acceleratorKernel.Launch(stream, dimension, arguments); stream.Synchronize(); }
internal static Kernel CreateKernel(MethodInfo kernelFunction) { var entryPointDescCalculate = EntryPointDescription.FromImplicitlyGroupedKernel(kernelFunction); var compiledKernel = GPUHelper.accelerator.Backend.Compile(entryPointDescCalculate, KernelSpecialization.Empty); var kernel = GPUHelper.accelerator.LoadAutoGroupedKernel(compiledKernel); kernels.Add(kernel); return(kernel); }
/// <summary> /// Constructs a new specialization cache. /// </summary> /// <param name="accelerator">The parent accelerator.</param> /// <param name="kernelMethod">The IR kernel method.</param> /// <param name="loader">The loader instance.</param> /// <param name="entry">The associated entry point.</param> /// <param name="specialization">The kernel specialization.</param> public SpecializationCache( Accelerator accelerator, Method kernelMethod, TLoader loader, EntryPointDescription entry, KernelSpecialization specialization) { Accelerator = accelerator; KernelMethod = kernelMethod; Loader = loader; Entry = entry; KernelSpecialization = specialization; }
/// <summary> /// Compiles and launches an auto-grouped implicitly-grouped kernel. /// </summary> static void CompileAndLaunchAutoGroupedKernel(Accelerator accelerator) { // Access the current backend for this device var backend = accelerator.Backend; // Resolve and compile method into a kernel var method = typeof(Program).GetMethod(nameof(MyKernel), BindingFlags.NonPublic | BindingFlags.Static); var entryPointDesc = EntryPointDescription.FromImplicitlyGroupedKernel(method); var compiledKernel = backend.Compile(entryPointDesc, default); // Info: If the current accelerator is a CudaAccelerator, we can cast the compiled kernel to a // PTXCompiledKernel in order to extract the PTX assembly code. // ------------------------------------------------------------------------------- // Load the implicitly grouped kernel with an automatically determined group size. // Note that the kernel has to be disposed manually. using (var kernel = accelerator.LoadAutoGroupedKernel(compiledKernel)) { var launcher = kernel.CreateLauncherDelegate <Action <AcceleratorStream, Index1, ArrayView <int>, int> >(); // ------------------------------------------------------------------------------- using (var buffer = accelerator.Allocate <int>(1024)) { // Launch buffer.Length many threads and pass a view to buffer. // You can also use kernel.Launch; however, the generic launch method involves boxing. launcher( accelerator.DefaultStream, buffer.Length, buffer.View, 42); // Wait for the kernel to finish... accelerator.Synchronize(); // Resolve and verify data var data = buffer.GetAsArray(); for (int i = 0, e = data.Length; i < e; ++i) { if (data[i] != 42 + i) { Console.WriteLine($"Error at element location {i}: {data[i]} found"); } } } accelerator.Synchronize(); } }
/// <summary> /// Compiles and launches an explicitly grouped kernel. /// </summary> static void CompileAndLaunchKernel(Accelerator accelerator, int groupSize) { // Access the current backend for this device var backend = accelerator.Backend; // Resolve and compile method into a kernel var method = typeof(Program).GetMethod(nameof(GroupedKernel), BindingFlags.NonPublic | BindingFlags.Static); var entryPointDesc = EntryPointDescription.FromExplicitlyGroupedKernel(method); var compiledKernel = backend.Compile(entryPointDesc, default); // Info: If the current accelerator is a CudaAccelerator, we can cast the compiled kernel to a // PTXCompiledKernel in order to extract the PTX assembly code. // ------------------------------------------------------------------------------- // Load the explicitly grouped kernel // Note that the kernel has to be disposed manually. using (var kernel = accelerator.LoadKernel(compiledKernel)) { var launcher = kernel.CreateLauncherDelegate <Action <AcceleratorStream, KernelConfig, ArrayView <int>, int> >(); // ------------------------------------------------------------------------------- using (var buffer = accelerator.Allocate <int>(1024)) { // You can also use kernel.Launch; however, the generic launch method involves boxing. launcher( accelerator.DefaultStream, ((buffer.Length + groupSize - 1) / groupSize, // Compute the number of groups (round up) groupSize), // Use the given group size buffer.View, 42); accelerator.Synchronize(); // Resolve and verify data var data = buffer.GetAsArray(); for (int i = 0, e = data.Length; i < e; ++i) { if (data[i] != 42 + i) { Console.WriteLine($"Error at element location {i}: {data[i]} found"); } } } } }
/// <summary> /// Compiles and launches an implicitly-grouped kernel. /// </summary> static void CompileAndLaunchImplicitlyGroupedKernel(Accelerator accelerator, int groupSize) { // Access the current backend for this device var backend = accelerator.GetBackend(); // Resolve and compile method into a kernel var method = typeof(Program).GetMethod(nameof(MyKernel), BindingFlags.NonPublic | BindingFlags.Static); var entryPointDesc = EntryPointDescription.FromImplicitlyGroupedKernel(method); var compiledKernel = backend.Compile(entryPointDesc, default); // Info: If the current accelerator is a CudaAccelerator, we can cast the compiled kernel to a // PTXCompiledKernel in order to extract the PTX assembly code. // ------------------------------------------------------------------------------- // Load the implicitly grouped kernel with the custom group size // Note that the kernel has to be disposed manually. using var kernel = accelerator.LoadImplicitlyGroupedKernel(compiledKernel, groupSize); var launcher = kernel.CreateLauncherDelegate <Action <AcceleratorStream, Index1D, ArrayView <int>, int> >(); // ------------------------------------------------------------------------------- using var buffer = accelerator.Allocate1D <int>(1024); // Launch buffer.Length many threads and pass a view to buffer. // You can also use kernel.Launch; however, the generic launch method involves boxing. launcher( accelerator.DefaultStream, (int)buffer.Length, buffer.View, 42); // Reads data from the GPU buffer into a new CPU array. // Implicitly calls accelerator.DefaultStream.Synchronize() to ensure // that the kernel and memory copy are completed first. var data = buffer.GetAsArray1D(); for (int i = 0, e = data.Length; i < e; ++i) { if (data[i] != 42 + i) { Console.WriteLine($"Error at element location {i}: {data[i]} found"); } } }
public static void Main2() { using (var context = new Context()) { foreach (var acceleratorId in Accelerator.Accelerators) { if (acceleratorId.AcceleratorType == AcceleratorType.CPU) { continue; } using (var accelerator = Accelerator.Create(context, acceleratorId)) { CompiledKernel compiledKernel; using (Backend b = new CLBackend(context, ILGPU.Runtime.OpenCL.CLAcceleratorVendor.AMD)) { MethodInfo methodInfo = typeof(GPU).GetMethod("PixelKernel"); KernelSpecialization spec = KernelSpecialization.Empty; compiledKernel = b.Compile(EntryPointDescription.FromImplicitlyGroupedKernel(methodInfo), spec); // debug: check kernel.Source for source text } var kernel = accelerator.LoadAutoGroupedKernel(compiledKernel); // var kernel = accelerator.LoadAutoGroupedStreamKernel<Index2, ArrayView2D<FSMUnit>>(MathKernel); // kernel = accelerator.LoadAutoGroupedStreamKernel<Index2, ArrayView2D<Color3>, ArrayView<byte>, ArrayView2D<Neuron>>(PixelKernel); MemoryBuffer2D <Color3> buffer = accelerator.Allocate <Color3>(pixelMap.GetLength(0), pixelMap.GetLength(1)); MemoryBuffer <byte> buffer2 = accelerator.Allocate <byte>(imageBytes.Length); MemoryBuffer2D <Neuron> buffer3 = accelerator.Allocate <Neuron>(nrn.GetLength(0), nrn.GetLength(1)); buffer3.CopyFrom(nrn, new LongIndex2(0, 0), new LongIndex2(0, 0), new LongIndex2(nrn.GetLength(0), nrn.GetLength(1))); while (running == true) { Stopwatch sw = new Stopwatch(); sw.Start(); Index2 gridSize = new Index2(pixelMap.GetLength(0), pixelMap.GetLength(1)); //kernel(gridSize, buffer.View, buffer2.View, buffer3.View); sw.OutputDelta("Kernel"); accelerator.Synchronize(); sw.OutputDelta("Sync"); // imageBytes = buffer2.GetAsArray(); buffer2.CopyTo(imageBytes, 0, 0, imageBytes.Length); sw.OutputDelta("Copy ImageBytes"); // Resolve and verify data //pixelMap = buffer.GetAs2DArray(); // buffer.CopyTo(pixelMap, new LongIndex2(0, 0), new LongIndex2(0, 0), new LongIndex2(pixelMap.GetLength(0), pixelMap.GetLength(1))); // Color3[] pixelMap1D = buffer.GetAsArray(); //Copy1DTo2DArray(pixelMap1D, pixelMap); // ~36ms, a bit faster //Array.Copy(pixelMap1D, imageBytes, pixelMap1D.Length); // fails //Buffer.BlockCopy(pixelMap1D, 0, pixelMap, 0, pixelMap1D.Length * Marshal.SizeOf(typeof(Color3))); // fails // pixelMap = Make2DArray(pixelMap1D, pixelMap.GetLength(0), pixelMap.GetLength(1)); // still slow //sw.OutputDelta("Copy PixelMap"); // MainForm.form.DrawPixels(pixelMap); MainForm.form.DrawPixels(imageBytes, pixelMap.GetLength(0), pixelMap.GetLength(1)); Application.DoEvents(); //Debugger.Break(); sw.OutputDelta("DrawPixels"); } buffer.Dispose(); buffer2.Dispose(); buffer3.Dispose(); Application.Exit(); //Debugger.Break(); } } } }