コード例 #1
0
 /// <summary>
 /// Constructs a new kernel key.
 /// </summary>
 /// <param name="entry">The entry point description.</param>
 /// <param name="specialization">The kernel specialization.</param>
 public CachedCompiledKernelKey(
     EntryPointDescription entry,
     KernelSpecialization specialization)
 {
     Entry          = entry;
     Specialization = specialization;
 }
コード例 #2
0
ファイル: TestBase.cs プロジェクト: killop/ILGPU
        /// <summary>
        /// Executes the specified kernel with the given arguments.
        /// </summary>
        /// <typeparam name="TIndex">The index type.</typeparam>
        /// <param name="kernel">The kernel method.</param>
        /// <param name="dimension">The dimension.</param>
        /// <param name="arguments">The arguments.</param>
        public void Execute <TIndex>(
            MethodInfo kernel,
            TIndex dimension,
            params object[] arguments)
            where TIndex : struct, IIndex
        {
            using var stream = Accelerator.CreateStream();

            // Compile kernel manually and load the compiled kernel into the accelerator
            var backend = Accelerator.Backend;

            Output.WriteLine($"Compiling '{kernel.Name}'");
            var entryPoint = typeof(TIndex) == typeof(KernelConfig)
                ? EntryPointDescription.FromExplicitlyGroupedKernel(kernel)
                : EntryPointDescription.FromImplicitlyGroupedKernel(kernel);
            var compiled = backend.Compile(entryPoint, new KernelSpecialization());

            // Load the compiled kernel
            Output.WriteLine($"Loading '{kernel.Name}'");
            using var acceleratorKernel = Accelerator.LoadKernel(compiled);

            // Launch the kernel
            Output.WriteLine($"Launching '{kernel.Name}'");
            acceleratorKernel.Launch(stream, dimension, arguments);

            stream.Synchronize();
        }
コード例 #3
0
        internal static Kernel CreateKernel(MethodInfo kernelFunction)
        {
            var entryPointDescCalculate = EntryPointDescription.FromImplicitlyGroupedKernel(kernelFunction);
            var compiledKernel          = GPUHelper.accelerator.Backend.Compile(entryPointDescCalculate, KernelSpecialization.Empty);
            var kernel = GPUHelper.accelerator.LoadAutoGroupedKernel(compiledKernel);

            kernels.Add(kernel);
            return(kernel);
        }
コード例 #4
0
ファイル: SpecializationCache.cs プロジェクト: m4rs-mt/ILGPU
 /// <summary>
 /// Constructs a new specialization cache.
 /// </summary>
 /// <param name="accelerator">The parent accelerator.</param>
 /// <param name="kernelMethod">The IR kernel method.</param>
 /// <param name="loader">The loader instance.</param>
 /// <param name="entry">The associated entry point.</param>
 /// <param name="specialization">The kernel specialization.</param>
 public SpecializationCache(
     Accelerator accelerator,
     Method kernelMethod,
     TLoader loader,
     EntryPointDescription entry,
     KernelSpecialization specialization)
 {
     Accelerator          = accelerator;
     KernelMethod         = kernelMethod;
     Loader               = loader;
     Entry                = entry;
     KernelSpecialization = specialization;
 }
コード例 #5
0
ファイル: Program.cs プロジェクト: phoyd/ILGPU.Samples
        /// <summary>
        /// Compiles and launches an auto-grouped implicitly-grouped kernel.
        /// </summary>
        static void CompileAndLaunchAutoGroupedKernel(Accelerator accelerator)
        {
            // Access the current backend for this device
            var backend = accelerator.Backend;

            // Resolve and compile method into a kernel
            var method         = typeof(Program).GetMethod(nameof(MyKernel), BindingFlags.NonPublic | BindingFlags.Static);
            var entryPointDesc = EntryPointDescription.FromImplicitlyGroupedKernel(method);
            var compiledKernel = backend.Compile(entryPointDesc, default);

            // Info: If the current accelerator is a CudaAccelerator, we can cast the compiled kernel to a
            // PTXCompiledKernel in order to extract the PTX assembly code.

            // -------------------------------------------------------------------------------
            // Load the implicitly grouped kernel with an automatically determined group size.
            // Note that the kernel has to be disposed manually.
            using (var kernel = accelerator.LoadAutoGroupedKernel(compiledKernel))
            {
                var launcher = kernel.CreateLauncherDelegate <Action <AcceleratorStream, Index1, ArrayView <int>, int> >();
                // -------------------------------------------------------------------------------

                using (var buffer = accelerator.Allocate <int>(1024))
                {
                    // Launch buffer.Length many threads and pass a view to buffer.
                    // You can also use kernel.Launch; however, the generic launch method involves boxing.
                    launcher(
                        accelerator.DefaultStream,
                        buffer.Length,
                        buffer.View,
                        42);

                    // Wait for the kernel to finish...
                    accelerator.Synchronize();

                    // Resolve and verify data
                    var data = buffer.GetAsArray();
                    for (int i = 0, e = data.Length; i < e; ++i)
                    {
                        if (data[i] != 42 + i)
                        {
                            Console.WriteLine($"Error at element location {i}: {data[i]} found");
                        }
                    }
                }

                accelerator.Synchronize();
            }
        }
コード例 #6
0
ファイル: Program.cs プロジェクト: phoyd/ILGPU.Samples
        /// <summary>
        /// Compiles and launches an explicitly grouped kernel.
        /// </summary>
        static void CompileAndLaunchKernel(Accelerator accelerator, int groupSize)
        {
            // Access the current backend for this device
            var backend = accelerator.Backend;

            // Resolve and compile method into a kernel
            var method         = typeof(Program).GetMethod(nameof(GroupedKernel), BindingFlags.NonPublic | BindingFlags.Static);
            var entryPointDesc = EntryPointDescription.FromExplicitlyGroupedKernel(method);
            var compiledKernel = backend.Compile(entryPointDesc, default);

            // Info: If the current accelerator is a CudaAccelerator, we can cast the compiled kernel to a
            // PTXCompiledKernel in order to extract the PTX assembly code.

            // -------------------------------------------------------------------------------
            // Load the explicitly grouped kernel
            // Note that the kernel has to be disposed manually.
            using (var kernel = accelerator.LoadKernel(compiledKernel))
            {
                var launcher = kernel.CreateLauncherDelegate <Action <AcceleratorStream, KernelConfig, ArrayView <int>, int> >();
                // -------------------------------------------------------------------------------

                using (var buffer = accelerator.Allocate <int>(1024))
                {
                    // You can also use kernel.Launch; however, the generic launch method involves boxing.
                    launcher(
                        accelerator.DefaultStream,
                        ((buffer.Length + groupSize - 1) / groupSize, // Compute the number of groups (round up)
                         groupSize),                                  // Use the given group size
                        buffer.View,
                        42);

                    accelerator.Synchronize();

                    // Resolve and verify data
                    var data = buffer.GetAsArray();
                    for (int i = 0, e = data.Length; i < e; ++i)
                    {
                        if (data[i] != 42 + i)
                        {
                            Console.WriteLine($"Error at element location {i}: {data[i]} found");
                        }
                    }
                }
            }
        }
コード例 #7
0
        /// <summary>
        /// Compiles and launches an implicitly-grouped kernel.
        /// </summary>
        static void CompileAndLaunchImplicitlyGroupedKernel(Accelerator accelerator, int groupSize)
        {
            // Access the current backend for this device
            var backend = accelerator.GetBackend();

            // Resolve and compile method into a kernel
            var method         = typeof(Program).GetMethod(nameof(MyKernel), BindingFlags.NonPublic | BindingFlags.Static);
            var entryPointDesc = EntryPointDescription.FromImplicitlyGroupedKernel(method);
            var compiledKernel = backend.Compile(entryPointDesc, default);

            // Info: If the current accelerator is a CudaAccelerator, we can cast the compiled kernel to a
            // PTXCompiledKernel in order to extract the PTX assembly code.

            // -------------------------------------------------------------------------------
            // Load the implicitly grouped kernel with the custom group size
            // Note that the kernel has to be disposed manually.
            using var kernel = accelerator.LoadImplicitlyGroupedKernel(compiledKernel, groupSize);
            var launcher = kernel.CreateLauncherDelegate <Action <AcceleratorStream, Index1D, ArrayView <int>, int> >();

            // -------------------------------------------------------------------------------

            using var buffer = accelerator.Allocate1D <int>(1024);
            // Launch buffer.Length many threads and pass a view to buffer.
            // You can also use kernel.Launch; however, the generic launch method involves boxing.
            launcher(
                accelerator.DefaultStream,
                (int)buffer.Length,
                buffer.View,
                42);

            // Reads data from the GPU buffer into a new CPU array.
            // Implicitly calls accelerator.DefaultStream.Synchronize() to ensure
            // that the kernel and memory copy are completed first.
            var data = buffer.GetAsArray1D();

            for (int i = 0, e = data.Length; i < e; ++i)
            {
                if (data[i] != 42 + i)
                {
                    Console.WriteLine($"Error at element location {i}: {data[i]} found");
                }
            }
        }
コード例 #8
0
ファイル: GPU.cs プロジェクト: binaryalgorithm/FSMNet
        public static void Main2()
        {
            using (var context = new Context())
            {
                foreach (var acceleratorId in Accelerator.Accelerators)
                {
                    if (acceleratorId.AcceleratorType == AcceleratorType.CPU)
                    {
                        continue;
                    }

                    using (var accelerator = Accelerator.Create(context, acceleratorId))
                    {
                        CompiledKernel compiledKernel;

                        using (Backend b = new CLBackend(context, ILGPU.Runtime.OpenCL.CLAcceleratorVendor.AMD))
                        {
                            MethodInfo           methodInfo = typeof(GPU).GetMethod("PixelKernel");
                            KernelSpecialization spec       = KernelSpecialization.Empty;
                            compiledKernel = b.Compile(EntryPointDescription.FromImplicitlyGroupedKernel(methodInfo), spec);
                            // debug: check kernel.Source for source text
                        }

                        var kernel = accelerator.LoadAutoGroupedKernel(compiledKernel);

                        // var kernel = accelerator.LoadAutoGroupedStreamKernel<Index2, ArrayView2D<FSMUnit>>(MathKernel);
                        // kernel = accelerator.LoadAutoGroupedStreamKernel<Index2, ArrayView2D<Color3>, ArrayView<byte>, ArrayView2D<Neuron>>(PixelKernel);

                        MemoryBuffer2D <Color3> buffer  = accelerator.Allocate <Color3>(pixelMap.GetLength(0), pixelMap.GetLength(1));
                        MemoryBuffer <byte>     buffer2 = accelerator.Allocate <byte>(imageBytes.Length);
                        MemoryBuffer2D <Neuron> buffer3 = accelerator.Allocate <Neuron>(nrn.GetLength(0), nrn.GetLength(1));

                        buffer3.CopyFrom(nrn, new LongIndex2(0, 0), new LongIndex2(0, 0), new LongIndex2(nrn.GetLength(0), nrn.GetLength(1)));

                        while (running == true)
                        {
                            Stopwatch sw = new Stopwatch();
                            sw.Start();

                            Index2 gridSize = new Index2(pixelMap.GetLength(0), pixelMap.GetLength(1));

                            //kernel(gridSize, buffer.View, buffer2.View, buffer3.View);

                            sw.OutputDelta("Kernel");

                            accelerator.Synchronize();

                            sw.OutputDelta("Sync");

                            // imageBytes = buffer2.GetAsArray();
                            buffer2.CopyTo(imageBytes, 0, 0, imageBytes.Length);

                            sw.OutputDelta("Copy ImageBytes");

                            // Resolve and verify data
                            //pixelMap = buffer.GetAs2DArray();
                            // buffer.CopyTo(pixelMap, new LongIndex2(0, 0), new LongIndex2(0, 0), new LongIndex2(pixelMap.GetLength(0), pixelMap.GetLength(1)));

                            // Color3[] pixelMap1D = buffer.GetAsArray();
                            //Copy1DTo2DArray(pixelMap1D, pixelMap); // ~36ms, a bit faster

                            //Array.Copy(pixelMap1D, imageBytes, pixelMap1D.Length); // fails
                            //Buffer.BlockCopy(pixelMap1D, 0, pixelMap, 0, pixelMap1D.Length * Marshal.SizeOf(typeof(Color3))); // fails
                            // pixelMap = Make2DArray(pixelMap1D, pixelMap.GetLength(0), pixelMap.GetLength(1)); // still slow

                            //sw.OutputDelta("Copy PixelMap");

                            // MainForm.form.DrawPixels(pixelMap);
                            MainForm.form.DrawPixels(imageBytes, pixelMap.GetLength(0), pixelMap.GetLength(1));
                            Application.DoEvents();

                            //Debugger.Break();

                            sw.OutputDelta("DrawPixels");
                        }

                        buffer.Dispose();
                        buffer2.Dispose();
                        buffer3.Dispose();

                        Application.Exit();

                        //Debugger.Break();
                    }
                }
            }
        }