Example #1
0
 /// <summary>
 /// Creates all potential kernels in program
 /// </summary>
 public void CreateAllKernels()
 {
     Kernel[] kernels = Cl.CreateKernelsInProgram(_program, out _error);
     CLException.CheckException(_error);
     for (int i = 0; i < kernels.Length; i++)
     {
         InfoBuffer name = Cl.GetKernelInfo(kernels[i], KernelInfo.FunctionName, out _error);
         _kernels.Add(name.ToString(), kernels[i]);
     }
 }
Example #2
0
        public OpenClCompiler(Device device, string source)
        {
            _device = device;
            _ctx    = device.CreateContext();


            Source   = source;
            _program = new Program(Cl.CreateProgramWithSource(_ctx, 1, new string[] { source }, null, out ErrorCode error));
            Cl.BuildProgram(_program, 1, new[] { _device }, string.Empty, null, IntPtr.Zero);

            KernelCount = _program.NumKernels;
            Methodes    = _program.KernelNames;

            _kernels = new Kernel[KernelCount];

            if ((error = Cl.CreateKernelsInProgram(_program, KernelCount, _kernels, out _)) != ErrorCode.Success)
            {
                throw new Exception($"{error}");
            }

            _queue = Cl.CreateCommandQueue(_ctx, _device, CommandQueueProperties.None, out _);
        }
Example #3
0
        public OpenClCompiler(Device device, string source)
        {
            _device = device;
            _ctx    = device.CreateContext();

            SVMCapabilities capabilities = _device.SvmCapabilities;

            IsCoarseGrainBufferSupported = (capabilities & SVMCapabilities.SvmCoarseGrainBuffer) == SVMCapabilities.SvmCoarseGrainBuffer;
            IsFineGrainBufferSupported   = (capabilities & SVMCapabilities.SvmFineGrainBuffer) == SVMCapabilities.SvmFineGrainBuffer;
            IsFineGrainSystemSupported   = (capabilities & SVMCapabilities.SvmFineGrainSystem) == SVMCapabilities.SvmFineGrainSystem;
            IsAtomicSupported            = (capabilities & SVMCapabilities.SvmAtomics) == SVMCapabilities.SvmAtomics;

            Source   = source;
            _program = new Program(Cl.CreateProgramWithSource(_ctx, 1, new string[] { source }, null, out ErrorCode error));
            Cl.BuildProgram(_program, 1, new[] { _device }, string.Empty, null, IntPtr.Zero);

            KernelCount = _program.NumKernels;
            Methodes    = _program.KernelNames;

            _kernels = new Kernel[KernelCount];

            Cl.CreateKernelsInProgram(_program, KernelCount, _kernels, out _);
        }
Example #4
0
        public static                 Kernel[] CreateKernelsInProgram(Program program)
        {
            if (program == Program.Null)
            {
                throw new ArgumentNullException("context");
            }

            unsafe
            {
                uint num_kernels = 0;
                ClHelper.GetError(Cl.CreateKernelsInProgram(program.Handle, 0, null, &num_kernels));

                IntPtr *kernel_ptrs = stackalloc IntPtr[(int)num_kernels];
                ClHelper.GetError(Cl.CreateKernelsInProgram(program.Handle, num_kernels, kernel_ptrs, null));

                Kernel[] kernels = new Kernel[(int)num_kernels];
                for (int i = 0; i < kernels.Length; ++i)
                {
                    kernels[i] = new Kernel(kernel_ptrs[i]);
                }

                return(kernels);
            }
        }
        private void ready()
        {
            ErrorCode error;

            context = Cl.CreateContext(null, 1, new[] { device }, null, IntPtr.Zero, out error);

            string source = System.IO.File.ReadAllText("kernels.cl");

            program = Cl.CreateProgramWithSource(context, 1, new[] { source }, null, out error);

            error = Cl.BuildProgram(program, 1, new[] { device }, string.Empty, null, IntPtr.Zero);
            InfoBuffer buildStatus = Cl.GetProgramBuildInfo(program, device, ProgramBuildInfo.Status, out error);

            if (buildStatus.CastTo <BuildStatus>() != BuildStatus.Success)
            {
                throw new Exception($"OpenCL could not build the kernel successfully: {buildStatus.CastTo<BuildStatus>()}");
            }
            allGood(error);

            Kernel[] kernels = Cl.CreateKernelsInProgram(program, out error);
            kernel = kernels[0];
            allGood(error);

            queue = Cl.CreateCommandQueue(context, device, CommandQueueProperties.None, out error);
            allGood(error);

            dataOut = Cl.CreateBuffer(context, MemFlags.WriteOnly, (IntPtr)(globalSize * sizeof(int)), out error);
            allGood(error);

            var intSizePtr = new IntPtr(Marshal.SizeOf(typeof(int)));

            error |= Cl.SetKernelArg(kernel, 2, new IntPtr(Marshal.SizeOf(typeof(IntPtr))), dataOut);
            error |= Cl.SetKernelArg(kernel, 3, intSizePtr, new IntPtr(worldSeed));
            error |= Cl.SetKernelArg(kernel, 4, intSizePtr, new IntPtr(globalSize));
            allGood(error);
        }
Example #6
0
        static void Main(string[] args)
        {
            Console.WriteLine("Hello World!");
            uint      platformCount;
            ErrorCode result = Cl.GetPlatformIDs(0, null, out platformCount);

            Console.WriteLine("{0} platforms found", platformCount);


            var platformIds = new Platform[platformCount];

            result = Cl.GetPlatformIDs(platformCount, platformIds, out platformCount);
            var platformCounter = 0;

            foreach (var platformId in platformIds)
            {
                IntPtr paramSize;
                result = Cl.GetPlatformInfo(platformId, PlatformInfo.Name, IntPtr.Zero, InfoBuffer.Empty, out paramSize);

                using (var buffer = new InfoBuffer(paramSize))
                {
                    result = Cl.GetPlatformInfo(platformIds[0], PlatformInfo.Name, paramSize, buffer, out paramSize);

                    Console.WriteLine($"Platform {platformCounter}: {buffer}");
                }
                platformCounter++;
            }

            Console.WriteLine($"Using first platform...");

            uint deviceCount;

            result = Cl.GetDeviceIDs(platformIds[0], DeviceType.All, 0, null, out deviceCount);
            Console.WriteLine("{0} devices found", deviceCount);

            var deviceIds = new Device[deviceCount];

            result = Cl.GetDeviceIDs(platformIds[0], DeviceType.All, deviceCount, deviceIds, out var numberDevices);

            var selectedDevice = deviceIds[0];

            var context = Cl.CreateContext(null, 1, new[] { selectedDevice }, null, IntPtr.Zero, out var error);

            const string kernelSrc = @"
            // Simple test; c[i] = a[i] + b[i]
            __kernel void add_array(__global float *a, __global float *b, __global float *c)
            {
                int xid = get_global_id(0);
                c[xid] = a[xid] + b[xid] - 1500;
            }
            
            __kernel void sub_array(__global float *a, __global float *b, __global float *c)
            {
                int xid = get_global_id(0);
                c[xid] = a[xid] - b[xid] - 2000;
            }
                        
            __kernel void double_everything(__global float *a)
            {
                int xid = get_global_id(0);
                a[xid] = a[xid] * 2;
            }

            ";


            var src = kernelSrc;

            Console.WriteLine("=== src ===");
            Console.WriteLine(src);
            Console.WriteLine("============");

            var program = Cl.CreateProgramWithSource(context, 1, new[] { src }, null, out var error2);

            error2 = Cl.BuildProgram(program, 1, new[] { selectedDevice }, string.Empty, null, IntPtr.Zero);

            if (error2 == ErrorCode.BuildProgramFailure)
            {
                Console.Error.WriteLine(Cl.GetProgramBuildInfo(program, selectedDevice, ProgramBuildInfo.Log, out error));
            }

            Console.WriteLine(error2);

            // Get the kernels.
            var kernels = Cl.CreateKernelsInProgram(program, out error);

            Console.WriteLine($"Program contains {kernels.Length} kernels.");
            var kernelAdd    = kernels[0];
            var kernelDouble = kernels[2];

            //
            float[] A = new float[1000];
            float[] B = new float[1000];
            float[] C = new float[1000];

            for (var i = 0; i < 1000; i++)
            {
                A[i] = i;
                B[i] = i;
            }

            IMem <float> hDeviceMemA = Cl.CreateBuffer(context, MemFlags.CopyHostPtr | MemFlags.ReadOnly, A, out error);
            IMem <float> hDeviceMemB = Cl.CreateBuffer(context, MemFlags.CopyHostPtr | MemFlags.ReadOnly, B, out error);
            IMem <float> hDeviceMemC = Cl.CreateBuffer(context, MemFlags.CopyHostPtr | MemFlags.ReadOnly, C, out error);

            // Create a command queue.
            var cmdQueue = Cl.CreateCommandQueue(context, selectedDevice, CommandQueueProperties.None, out error);

            int intPtrSize = 0;

            intPtrSize = Marshal.SizeOf(typeof(IntPtr));

            error = Cl.SetKernelArg(kernelDouble, 0, new IntPtr(intPtrSize), hDeviceMemA);

            error = Cl.SetKernelArg(kernelAdd, 0, new IntPtr(intPtrSize), hDeviceMemA);
            error = Cl.SetKernelArg(kernelAdd, 1, new IntPtr(intPtrSize), hDeviceMemB);
            error = Cl.SetKernelArg(kernelAdd, 2, new IntPtr(intPtrSize), hDeviceMemC);

            // write data from host to device
            Event clevent;

            error = Cl.EnqueueWriteBuffer(cmdQueue, hDeviceMemA, Bool.True, IntPtr.Zero,
                                          new IntPtr(1000 * sizeof(float)),
                                          A, 0, null, out clevent);
            error = Cl.EnqueueWriteBuffer(cmdQueue, hDeviceMemB, Bool.True, IntPtr.Zero,
                                          new IntPtr(1000 * sizeof(float)),
                                          B, 1, new [] { clevent }, out clevent);

            // execute kernel
            error = Cl.EnqueueNDRangeKernel(cmdQueue, kernelDouble, 1, null, new IntPtr[] { new IntPtr(1000) }, null, 1, new [] { clevent }, out clevent);


            var infoBuffer = Cl.GetEventInfo(clevent, EventInfo.CommandExecutionStatus, out var e2);

            error = Cl.EnqueueNDRangeKernel(cmdQueue, kernelAdd, 1, null, new IntPtr[] { new IntPtr(1000) }, null, 1, new [] { clevent }, out clevent);
            Console.WriteLine($"Run result: {error}");



            error = Cl.EnqueueReadBuffer(cmdQueue, hDeviceMemC, Bool.False, 0, C.Length, C, 1, new [] { clevent }, out clevent);
            Cl.WaitForEvents(1, new [] { clevent });

            for (var i = 0; i < 1000; i++)
            {
                Console.WriteLine($"[{i}]: {C[i]}");
            }

            program.Dispose();

            foreach (var res in typeof(SourceLoader).Assembly.GetManifestResourceNames())
            {
                Console.WriteLine(res);
            }
        }
Example #7
0
        // Partially from OpenTK demo - Submitted by "mfagerlund"
        public void AddArrayAddsCorrectly()
        {
            const string correctSource = @"
                // Simple test; c[i] = a[i] + b[i]

                __kernel void add_array(__global float *a, __global float *b, __global float *c)
                {
                    int xid = get_global_id(0);
                    c[xid] = a[xid] + b[xid];
                }
                
                __kernel void sub_array(__global float *a, __global float *b, __global float *c)
                {
                    int xid = get_global_id(0);
                    c[xid] = a[xid] - b[xid];
                }

                ";

            ErrorCode error;

            using (Program program = Cl.CreateProgramWithSource(_context, 1, new[] { correctSource }, null, out error))
            {
                Assert.AreEqual(error, ErrorCode.Success);
                error = Cl.BuildProgram(program, 1, new[] { _device }, string.Empty, null, IntPtr.Zero);
                Assert.AreEqual(ErrorCode.Success, error);
                Assert.AreEqual(Cl.GetProgramBuildInfo(program, _device, ProgramBuildInfo.Status, out error).CastTo <BuildStatus>(), BuildStatus.Success);

                Kernel[] kernels = Cl.CreateKernelsInProgram(program, out error);
                Kernel   kernel  = kernels[0];

                const int cnBlockSize = 4;
                const int cnBlocks    = 3;
                IntPtr    cnDimension = new IntPtr(cnBlocks * cnBlockSize);

                // allocate host  vectors
                float[] A = new float[cnDimension.ToInt32()];
                float[] B = new float[cnDimension.ToInt32()];
                float[] C = new float[cnDimension.ToInt32()];

                // initialize host memory
                Random rand = new Random();
                for (int i = 0; i < A.Length; i++)
                {
                    A[i] = rand.Next() % 256;
                    B[i] = rand.Next() % 256;
                }

                //Cl.IMem hDeviceMemA = Cl.CreateBuffer(_context, Cl.MemFlags.CopyHostPtr | Cl.MemFlags.ReadOnly, (IntPtr)(sizeof(float) * cnDimension.ToInt32()), A, out error);
                //Assert.AreEqual(Cl.ErrorCode.Success, error);

                IMem <float> hDeviceMemA = Cl.CreateBuffer(_context, MemFlags.CopyHostPtr | MemFlags.ReadOnly, A, out error);
                Assert.AreEqual(ErrorCode.Success, error);

                IMem hDeviceMemB = Cl.CreateBuffer(_context, MemFlags.CopyHostPtr | MemFlags.ReadOnly, (IntPtr)(sizeof(float) * cnDimension.ToInt32()), B, out error);
                Assert.AreEqual(ErrorCode.Success, error);
                IMem hDeviceMemC = Cl.CreateBuffer(_context, MemFlags.WriteOnly, (IntPtr)(sizeof(float) * cnDimension.ToInt32()), IntPtr.Zero, out error);
                Assert.AreEqual(ErrorCode.Success, error);

                CommandQueue cmdQueue = Cl.CreateCommandQueue(_context, _device, (CommandQueueProperties)0, out error);

                Event clevent;

                int intPtrSize = 0;
                intPtrSize = Marshal.SizeOf(typeof(IntPtr));

                // setup parameter values
                error = Cl.SetKernelArg(kernel, 0, new IntPtr(intPtrSize), hDeviceMemA);
                Assert.AreEqual(ErrorCode.Success, error);
                error = Cl.SetKernelArg(kernel, 1, new IntPtr(intPtrSize), hDeviceMemB);
                Assert.AreEqual(ErrorCode.Success, error);
                error = Cl.SetKernelArg(kernel, 2, new IntPtr(intPtrSize), hDeviceMemC);
                Assert.AreEqual(ErrorCode.Success, error);

                // write data from host to device
                error = Cl.EnqueueWriteBuffer(cmdQueue, hDeviceMemA, Bool.True, IntPtr.Zero,
                                              new IntPtr(cnDimension.ToInt32() * sizeof(float)),
                                              A, 0, null, out clevent);
                Assert.AreEqual(ErrorCode.Success, error);
                error = Cl.EnqueueWriteBuffer(cmdQueue, hDeviceMemB, Bool.True, IntPtr.Zero,
                                              new IntPtr(cnDimension.ToInt32() * sizeof(float)),
                                              B, 0, null, out clevent);
                Assert.AreEqual(ErrorCode.Success, error);

                // execute kernel
                error = Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 1, null, new IntPtr[] { cnDimension }, null, 0, null, out clevent);
                Assert.AreEqual(ErrorCode.Success, error, error.ToString());

                // copy results from device back to host
                IntPtr event_handle = IntPtr.Zero;

                error = Cl.EnqueueReadBuffer(cmdQueue, hDeviceMemC, Bool.True, 0, C.Length, C, 0, null, out clevent);
                Assert.AreEqual(ErrorCode.Success, error, error.ToString());

                for (int i = 0; i < A.Length; i++)
                {
                    Assert.That(A[i] + B[i], Is.EqualTo(C[i]));
                }

                Cl.Finish(cmdQueue);

                Cl.ReleaseMemObject(hDeviceMemA);
                Cl.ReleaseMemObject(hDeviceMemB);
                Cl.ReleaseMemObject(hDeviceMemC);
            }
        }
Example #8
0
        public void ProgramAndKernelTests()
        {
            const string correctSource    = @"
                // Simple test; c[i] = a[i] + b[i]

                __kernel void add_array(__global float *a, __global float *b, __global float *c)
                {
                    int xid = get_global_id(0);
                    c[xid] = a[xid] + b[xid];
                }
                
                __kernel void sub_array(__global float *a, __global float *b, __global float *c)
                {
                    int xid = get_global_id(0);
                    c[xid] = a[xid] - b[xid];
                }

                ";
            const string sourceWithErrors = @"
                // Erroneous kernel

                __kernel void add_array(__global float *a, __global float *b, __global float *c)
                {
                    foo(); // <-- Error right here!
                    int xid = get_global_id(0);
                    c[xid] = a[xid] + b[xid];
                }";

            ErrorCode error;


            using (Program program = Cl.CreateProgramWithSource(_context, 1, new[] { sourceWithErrors }, null, out error))
            {
                Assert.AreEqual(error, ErrorCode.Success);

                error = Cl.BuildProgram(program, 1, new[] { _device }, string.Empty, null, IntPtr.Zero);
                Assert.AreNotEqual(ErrorCode.Success, error);

                Assert.AreEqual(Cl.GetProgramBuildInfo(program, _device, ProgramBuildInfo.Status, out error).CastTo <BuildStatus>(), BuildStatus.Error);

                Console.WriteLine("There were error(s) compiling the provided kernel");
                Console.WriteLine(Cl.GetProgramBuildInfo(program, _device, ProgramBuildInfo.Log, out error));
            }

            using (Program program = Cl.CreateProgramWithSource(_context, 1, new[] { correctSource }, null, out error))
            {
                Assert.AreEqual(error, ErrorCode.Success);

                error = Cl.BuildProgram(program, 1, new[] { _device }, string.Empty, null, IntPtr.Zero);
                Assert.AreEqual(ErrorCode.Success, error);

                Assert.AreEqual(Cl.GetProgramBuildInfo(program, _device, ProgramBuildInfo.Status, out error).CastTo <BuildStatus>(), BuildStatus.Success);

                // Try to get information from the program
                Assert.AreEqual(Cl.GetProgramInfo(program, ProgramInfo.Context, out error).CastTo <Context>(), _context);
                Assert.AreEqual(Cl.GetProgramInfo(program, ProgramInfo.NumDevices, out error).CastTo <int>(), 1);
                Assert.AreEqual(Cl.GetProgramInfo(program, ProgramInfo.Devices, out error).CastTo <Device>(0), _device);

                Console.WriteLine("Program source was:");
                Console.WriteLine(Cl.GetProgramInfo(program, ProgramInfo.Source, out error));

                Kernel kernel = Cl.CreateKernel(program, "add_array", out error);
                Assert.AreEqual(error, ErrorCode.Success);

                kernel.Dispose();

                Kernel[] kernels = Cl.CreateKernelsInProgram(program, out error);
                Assert.AreEqual(error, ErrorCode.Success);
                Assert.AreEqual(kernels.Length, 2);
                Assert.AreEqual("add_array", Cl.GetKernelInfo(kernels[0], KernelInfo.FunctionName, out error).ToString());
                Assert.AreEqual("sub_array", Cl.GetKernelInfo(kernels[1], KernelInfo.FunctionName, out error).ToString());
            }
        }
        public void Prototype()
        {
            ErrorCode error;
            Device    device = (from d in
                                Cl.GetDeviceIDs(
                                    (from platform in Cl.GetPlatformIDs(out error)
                                     where Cl.GetPlatformInfo(platform, PlatformInfo.Name, out error).ToString() == "AMD Accelerated Parallel Processing" // Use "NVIDIA CUDA" if you don't have amd
                                     select platform).First(), DeviceType.Gpu, out error)
                                select d).First();

            Context context = Cl.CreateContext(null, 1, new[] { device }, null, IntPtr.Zero, out error);

            string source = System.IO.File.ReadAllText("kernels.cl");

            int    chunkHalfLength  = 300000000;
            int    worldSeed        = 420;
            int    workItems        = 3000;
            int    outputAllocation = 100;
            IntPtr outputSize       = new IntPtr(workItems * outputAllocation);

            var xr = new int[outputSize.ToInt32()];
            var zr = new int[outputSize.ToInt32()];
            var sc = new int[outputSize.ToInt32()];


            using (Program program = Cl.CreateProgramWithSource(context, 1, new[] { source }, null, out error))
            {
                Assert.AreEqual(error, ErrorCode.Success);
                error = Cl.BuildProgram(program, 1, new[] { device }, "", null, IntPtr.Zero);
                Assert.AreEqual(error, ErrorCode.Success);
                var buildInfo = Cl.GetProgramBuildInfo(program, device, ProgramBuildInfo.Status, out error).CastTo <BuildStatus>();
                Assert.AreEqual(buildInfo, BuildStatus.Success);
                Assert.AreEqual(error, ErrorCode.Success);

                Kernel[] kernels = Cl.CreateKernelsInProgram(program, out error);
                Assert.AreEqual(error, ErrorCode.Success);
                Kernel kernel = kernels[0];

                IMem hDeviceMemXr = Cl.CreateBuffer(context, MemFlags.WriteOnly, (IntPtr)(sizeof(int) * outputSize.ToInt32()), IntPtr.Zero, out error);
                Assert.AreEqual(ErrorCode.Success, error);
                IMem hDeviceMemZr = Cl.CreateBuffer(context, MemFlags.WriteOnly, (IntPtr)(sizeof(int) * outputSize.ToInt32()), IntPtr.Zero, out error);
                Assert.AreEqual(ErrorCode.Success, error);
                IMem hDeviceMemSc = Cl.CreateBuffer(context, MemFlags.WriteOnly, (IntPtr)(sizeof(int) * outputSize.ToInt32()), IntPtr.Zero, out error);
                Assert.AreEqual(ErrorCode.Success, error);


                CommandQueue cmdQueue = Cl.CreateCommandQueue(context, device, (CommandQueueProperties)0, out error);

                int intPtrSize = Marshal.SizeOf(typeof(IntPtr));
                int intSize    = Marshal.SizeOf(typeof(int));

                error = Cl.SetKernelArg(kernel, 0, new IntPtr(intPtrSize), hDeviceMemXr);
                Assert.AreEqual(ErrorCode.Success, error);
                error = Cl.SetKernelArg(kernel, 1, new IntPtr(intPtrSize), hDeviceMemZr);
                Assert.AreEqual(ErrorCode.Success, error);
                error = Cl.SetKernelArg(kernel, 2, new IntPtr(intPtrSize), hDeviceMemSc);
                Assert.AreEqual(ErrorCode.Success, error);
                error = Cl.SetKernelArg(kernel, 3, new IntPtr(intSize), new IntPtr(chunkHalfLength));
                Assert.AreEqual(ErrorCode.Success, error);
                error = Cl.SetKernelArg(kernel, 4, new IntPtr(intSize), new IntPtr(worldSeed));
                Assert.AreEqual(ErrorCode.Success, error);
                error = Cl.SetKernelArg(kernel, 5, new IntPtr(intSize), new IntPtr(workItems));
                Assert.AreEqual(ErrorCode.Success, error);
                error = Cl.SetKernelArg(kernel, 6, new IntPtr(intSize), new IntPtr(outputAllocation));
                Assert.AreEqual(ErrorCode.Success, error);

                error = Cl.EnqueueWriteBuffer(cmdQueue, hDeviceMemXr, Bool.True, IntPtr.Zero,
                                              new IntPtr(outputSize.ToInt32() * sizeof(float)),
                                              xr, 0, null, out Event clevent);
                Assert.AreEqual(ErrorCode.Success, error);

                error = Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 1, null, new IntPtr[] { new IntPtr(workItems) }, null, 0, null, out clevent);

                error = Cl.EnqueueReadBuffer(cmdQueue, hDeviceMemXr, Bool.True, 0, xr.Length, xr, 0, null, out clevent);
                Assert.AreEqual(ErrorCode.Success, error, error.ToString());

                Cl.Finish(cmdQueue);
            }
        }
        public void TestSlimeFinder()
        {
            const int squareLength = 1024;
            int       globalSize   = squareLength * squareLength;
            var       candidates   = new int[globalSize];

            ErrorCode error;
            Device    device = (from d in
                                Cl.GetDeviceIDs(
                                    (from platform in Cl.GetPlatformIDs(out error)
                                     where Cl.GetPlatformInfo(platform, PlatformInfo.Name, out error).ToString() == "AMD Accelerated Parallel Processing" // Use "NVIDIA CUDA" if you don't have amd
                                     select platform).First(), DeviceType.Gpu, out error)
                                select d).First();

            Context context = Cl.CreateContext(null, 1, new[] { device }, null, IntPtr.Zero, out error);

            string source = System.IO.File.ReadAllText("kernels.cl");

            using Program program = Cl.CreateProgramWithSource(context, 1, new[] { source }, null, out error);

            error = Cl.BuildProgram(program, 1, new[] { device }, string.Empty, null, IntPtr.Zero);
            InfoBuffer buildStatus = Cl.GetProgramBuildInfo(program, device, ProgramBuildInfo.Status, out error);

            Assert.AreEqual(buildStatus.CastTo <BuildStatus>(), BuildStatus.Success);
            Assert.AreEqual(error, ErrorCode.Success);

            Kernel[] kernels = Cl.CreateKernelsInProgram(program, out error);
            Kernel   kernel  = kernels[0];

            Assert.AreEqual(error, ErrorCode.Success);

            CommandQueue queue = Cl.CreateCommandQueue(context, device, CommandQueueProperties.None, out error);

            Assert.AreEqual(error, ErrorCode.Success);

            IMem dataOut = Cl.CreateBuffer(context, MemFlags.WriteOnly, (IntPtr)(globalSize * sizeof(int)), out error);

            Assert.AreEqual(error, ErrorCode.Success);

            var intSizePtr = new IntPtr(Marshal.SizeOf(typeof(int)));

            error  = Cl.SetKernelArg(kernel, 0, intSizePtr, new IntPtr(0));
            error |= Cl.SetKernelArg(kernel, 1, intSizePtr, new IntPtr(0));
            error |= Cl.SetKernelArg(kernel, 2, new IntPtr(Marshal.SizeOf(typeof(IntPtr))), dataOut);
            error |= Cl.SetKernelArg(kernel, 3, intSizePtr, new IntPtr(420));
            error |= Cl.SetKernelArg(kernel, 4, intSizePtr, new IntPtr(globalSize));
            Assert.AreEqual(error, ErrorCode.Success);

            int local_size  = 256;
            int global_size = (int)Math.Ceiling(globalSize / (float)local_size) * local_size;
            var stopW       = new Stopwatch();

            stopW.Start();
            error = Cl.EnqueueNDRangeKernel(queue, kernel, 1, null, new IntPtr[] { new IntPtr(global_size) }, new IntPtr[] { new IntPtr(local_size) }, 0, null, out Event clevent);
            Assert.AreEqual(error, ErrorCode.Success);

            Cl.Finish(queue);
            stopW.Stop();


            Cl.EnqueueReadBuffer(queue, dataOut, Bool.True, IntPtr.Zero, (IntPtr)(globalSize * sizeof(int)), candidates, 0, null, out clevent);
            candidates.ForEach(c =>
            {
                if (c > 50)
                {
                    Console.Write($"{c},");
                }
            });

            Console.WriteLine($"\n{stopW.ElapsedMilliseconds} ms");


            error  = Cl.SetKernelArg(kernel, 0, intSizePtr, new IntPtr(16383));
            error |= Cl.SetKernelArg(kernel, 1, intSizePtr, new IntPtr(16383));

            stopW.Start();
            error = Cl.EnqueueNDRangeKernel(queue, kernel, 1, null, new IntPtr[] { new IntPtr(global_size) }, new IntPtr[] { new IntPtr(local_size) }, 0, null, out clevent);
            Assert.AreEqual(error, ErrorCode.Success);

            Cl.Finish(queue);
            stopW.Stop();


            Cl.EnqueueReadBuffer(queue, dataOut, Bool.True, IntPtr.Zero, (IntPtr)(globalSize * sizeof(int)), candidates, 0, null, out clevent);
            candidates.ForEach(c =>
            {
                if (c > 50)
                {
                    Console.Write($"{c},");
                }
            });

            Console.WriteLine($"\n{stopW.ElapsedMilliseconds} ms");

            Cl.ReleaseKernel(kernel);
            Cl.ReleaseMemObject(dataOut);
            Cl.ReleaseCommandQueue(queue);
            Cl.ReleaseProgram(program);
            Cl.ReleaseContext(context);
        }
        public void SquareArray()
        {
            // Adapted from
            //https://github.com/rsnemmen/OpenCL-examples/blob/master/square_array/square.cl
            int array_size = 100000;
            var bytes      = (IntPtr)(array_size * sizeof(float));

            var hdata   = new float[array_size];
            var houtput = new float[array_size];

            for (int i = 0; i < array_size; i++)
            {
                hdata[i] = 1.0f * i;
            }

            ErrorCode error;
            Device    device = (from d in
                                Cl.GetDeviceIDs(
                                    (from platform in Cl.GetPlatformIDs(out error)
                                     where Cl.GetPlatformInfo(platform, PlatformInfo.Name, out error).ToString() == "AMD Accelerated Parallel Processing" // Use "NVIDIA CUDA" if you don't have amd
                                     select platform).First(), DeviceType.Gpu, out error)
                                select d).First();

            Context context = Cl.CreateContext(null, 1, new[] { device }, null, IntPtr.Zero, out error);

            string source = System.IO.File.ReadAllText("squared.cl");

            using (Program program = Cl.CreateProgramWithSource(context, 1, new[] { source }, null, out error))
            {
                Assert.AreEqual(error, ErrorCode.Success);
                error = Cl.BuildProgram(program, 1, new[] { device }, string.Empty, null, IntPtr.Zero);
                Assert.AreEqual(ErrorCode.Success, error);
                Assert.AreEqual(Cl.GetProgramBuildInfo(program, device, ProgramBuildInfo.Status, out error).CastTo <BuildStatus>(), BuildStatus.Success);

                Kernel[] kernels = Cl.CreateKernelsInProgram(program, out error);
                Kernel   kernel  = kernels[0];

                CommandQueue cmdQueue = Cl.CreateCommandQueue(context, device, (CommandQueueProperties)0, out error);

                IMem ddata   = Cl.CreateBuffer(context, MemFlags.ReadOnly, bytes, null, out error);
                IMem doutput = Cl.CreateBuffer(context, MemFlags.WriteOnly, bytes, null, out error);

                error = Cl.EnqueueWriteBuffer(cmdQueue, ddata, Bool.True, (IntPtr)0, bytes, hdata, 0, null, out Event clevent);
                Assert.AreEqual(ErrorCode.Success, error);

                error  = Cl.SetKernelArg(kernel, 0, new IntPtr(Marshal.SizeOf(typeof(IntPtr))), ddata);
                error |= Cl.SetKernelArg(kernel, 1, new IntPtr(Marshal.SizeOf(typeof(IntPtr))), doutput);
                error |= Cl.SetKernelArg(kernel, 2, new IntPtr(Marshal.SizeOf(typeof(int))), new IntPtr(array_size));
                Assert.AreEqual(error, ErrorCode.Success);

                int local_size  = 256;
                var infoBufferr = new InfoBuffer();
                error = Cl.GetKernelWorkGroupInfo(kernel, device, KernelWorkGroupInfo.WorkGroupSize, new IntPtr(sizeof(int)), new InfoBuffer(), out IntPtr localSize);
                var x           = localSize.ToInt32();//Why is it giving me 8??? Vega 56 has 256 work group size
                int global_size = (int)Math.Ceiling(array_size / (float)local_size) * local_size;

                error = Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 1, null, new IntPtr[] { new IntPtr(global_size) }, new IntPtr[] { new IntPtr(local_size) }, 0, null, out clevent);
                Cl.Finish(cmdQueue);
                Cl.EnqueueReadBuffer(cmdQueue, doutput, Bool.True, IntPtr.Zero, bytes, houtput, 0, null, out clevent);
                houtput.ForEach(o => Console.Write($"{o}, "));


                Cl.ReleaseKernel(kernel);
                Cl.ReleaseMemObject(ddata);
                Cl.ReleaseMemObject(doutput);
                Cl.ReleaseCommandQueue(cmdQueue);
                Cl.ReleaseProgram(program);
                Cl.ReleaseContext(context);
            }
        }