Beispiel #1
1
        public void initialize()
        {
            ErrorCode errorCode;

            // http://www.codeproject.com/Articles/502829/GPGPU-image-processing-basics-using-OpenCL-NET
            // license  The Code Project Open License (CPOL)
            // snip ===

            OpenCL.Net.Platform[] platforms = Cl.GetPlatformIDs(out errorCode);
            if (errorCode != ErrorCode.Success)
            {
                throw new OpenClError();
            }

            List<OpenCL.Net.Device> devicesList = new List<OpenCL.Net.Device>();

            foreach (OpenCL.Net.Platform platform in platforms)
            {
                string platformName = Cl.GetPlatformInfo(platform, OpenCL.Net.PlatformInfo.Name, out errorCode).ToString();
                if (errorCode != ErrorCode.Success)
                {
                    throw new OpenClError();
                }

                Console.WriteLine("Platform: " + platformName);

                //We will be looking only for GPU devices
                foreach (OpenCL.Net.Device device in Cl.GetDeviceIDs(platform, OpenCL.Net.DeviceType.Gpu, out errorCode))
                {
                    if (errorCode != ErrorCode.Success)
                    {
                        throw new OpenClError();
                    }

                    Console.WriteLine("Device: " + device.ToString());
                    devicesList.Add(device);
                }
            }

            if (devicesList.Count <= 0)
            {
                Console.WriteLine("No devices found.");
                throw new OpenClError();
            }

            chosenDevice = devicesList[1];
            // end snip ===

            context = Cl.CreateContext(null, 1, new Device[] { chosenDevice }, null, IntPtr.Zero, out errorCode);
            if (errorCode != ErrorCode.Success)
            {
                throw new OpenClError();
            }

            commandQueue = Cl.CreateCommandQueue(context, chosenDevice, CommandQueueProperties.OutOfOrderExecModeEnable, out errorCode);
            if (errorCode != ErrorCode.Success)
            {
                throw new OpenClError();
            }
        }
 private OpenCL.Net.Event run(OpenCL.Net.CommandQueue commandQueue, OpenCL.Net.IMem srcImg, OpenCL.Net.IMem dstImg, uint globalWorkSize0, uint globalWorkSize1 = 0, uint globalWorkSize2 = 0, uint localWorkSize0 = 0, uint localWorkSize1 = 0, uint localWorkSize2 = 0, params OpenCL.Net.Event[] waitFor)
 {
     OpenCL.Net.Cl.SetKernelArg(this.Kernel, 0, srcImg);
     OpenCL.Net.Cl.SetKernelArg(this.Kernel, 1, dstImg);
     OpenCL.Net.Event     ev;
     OpenCL.Net.ErrorCode err;
     err = OpenCL.Net.Cl.EnqueueNDRangeKernel(commandQueue, this.Kernel, base.GetWorkDimension(globalWorkSize0, globalWorkSize1, globalWorkSize2), null, base.GetWorkSizes(globalWorkSize0, globalWorkSize1, globalWorkSize2), base.GetWorkSizes(localWorkSize0, localWorkSize1, localWorkSize2), ((uint)(waitFor.Length)), waitFor.Length == 0 ? null : waitFor, out ev);
     OpenCL.Net.Cl.Check(err);
     return(ev);
 }
Beispiel #3
0
 private OpenCL.Net.Event run(OpenCL.Net.CommandQueue commandQueue, OpenCL.Net.IMem <System.Single> a, OpenCL.Net.IMem <System.Single> b, int local_length, float scale, uint globalWorkSize0, uint globalWorkSize1 = 0, uint globalWorkSize2 = 0, uint localWorkSize0 = 0, uint localWorkSize1 = 0, uint localWorkSize2 = 0, params OpenCL.Net.Event[] waitFor)
 {
     OpenCL.Net.Cl.SetKernelArg(this.Kernel, 0, a);
     OpenCL.Net.Cl.SetKernelArg(this.Kernel, 1, b);
     OpenCL.Net.Cl.SetKernelArg(this.Kernel, 2, ((System.IntPtr)((local_length * OpenCL.Net.TypeSize <uchar3> .SizeInt))), null);
     OpenCL.Net.Cl.SetKernelArg(this.Kernel, 3, scale);
     OpenCL.Net.Event     ev;
     OpenCL.Net.ErrorCode err;
     err = OpenCL.Net.Cl.EnqueueNDRangeKernel(commandQueue, this.Kernel, base.GetWorkDimension(globalWorkSize0, globalWorkSize1, globalWorkSize2), null, base.GetWorkSizes(globalWorkSize0, globalWorkSize1, globalWorkSize2), base.GetWorkSizes(localWorkSize0, localWorkSize1, localWorkSize2), ((uint)(waitFor.Length)), waitFor.Length == 0 ? null : waitFor, out ev);
     OpenCL.Net.Cl.Check(err);
     return(ev);
 }
        public CommandQueue(ComputeProvider provider, ClNet.Device device, bool outOfOrderExecution = false)
        {
            ClNet.ErrorCode error;
            _queue = Cl.CreateCommandQueue
                (provider.Context
                , device
                , outOfOrderExecution 
                    ? ClNet.CommandQueueProperties.OutOfOrderExecModeEnable
                    : ClNet.CommandQueueProperties.None, out error);

            if (error != ClNet.ErrorCode.Success)
                throw new Cl.Exception(error);
        }
Beispiel #5
0
        public CommandQueue(ComputeProvider provider, ClNet.Device device, bool outOfOrderExecution = false)
        {
            ClNet.ErrorCode error;
            _queue = Cl.CreateCommandQueue
                         (provider.Context
                         , device
                         , outOfOrderExecution
                    ? ClNet.CommandQueueProperties.OutOfOrderExecModeEnable
                    : ClNet.CommandQueueProperties.None, out error);

            if (error != ClNet.ErrorCode.Success)
            {
                throw new Cl.Exception(error);
            }
        }
Beispiel #6
0
        public void SquareArray(float[] array)
        {
            Cl.ErrorCode error;

            //Create the required kernel (entry function)
            Cl.Kernel kernel = Cl.Cl.CreateKernel(_program, "square_array", out error);
            CheckErr(error, "Cl.CreateKernel");

            int intPtrSize = 0;

            intPtrSize = Marshal.SizeOf(typeof(IntPtr));

            var arrayBuffer = Cl.Cl.CreateBuffer <float>(_context, Cl.MemFlags.CopyHostPtr | Cl.MemFlags.ReadWrite, array, out error);

            CheckErr(error, "Cl.CreateBuffer plaintext_bytes");

            //Pass the memory buffers to our kernel function
            error = Cl.Cl.SetKernelArg(kernel, 0, (IntPtr)intPtrSize, arrayBuffer);
            CheckErr(error, "Cl.SetKernelArg");

            //Create a command queue, where all of the commands for execution will be added
            Cl.CommandQueue cmdQueue = Cl.Cl.CreateCommandQueue(_context, _device, (Cl.CommandQueueProperties) 0, out error);
            CheckErr(error, "Cl.CreateCommandQueue");
            Cl.Event clevent;

            IntPtr[] workGroupSizePtr = new IntPtr[] { (IntPtr)array.Length };
            //Execute our kernel (OpenCL code)
            error = Cl.Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 1, null, workGroupSizePtr, null, 0, null, out clevent);
            CheckErr(error, "Cl.EnqueueNDRangeKernel");

            //Wait for completion of all calculations on the GPU.
            error = Cl.Cl.Finish(cmdQueue);
            CheckErr(error, "Cl.Finish");

            // Read the buffer from memory
            error = Cl.Cl.EnqueueReadBuffer(cmdQueue, arrayBuffer, Cl.Bool.True, array, 0, null, out clevent);
            CheckErr(error, "Cl.EnqueueReadBuffer");

            //Clean up memory
            Cl.Cl.ReleaseKernel(kernel);
            Cl.Cl.ReleaseCommandQueue(cmdQueue);
            Cl.Cl.ReleaseMemObject(arrayBuffer);
        }
 public OpenCL.Net.Event EnqueueRun(OpenCL.Net.CommandQueue commandQueue, OpenCL.Net.IMem srcImg, OpenCL.Net.IMem dstImg, uint globalWorkSize0, uint globalWorkSize1, uint globalWorkSize2, uint localWorkSize0 = 0, uint localWorkSize1 = 0, uint localWorkSize2 = 0, params OpenCL.Net.Event[] waitFor)
 {
     return(this.run(commandQueue, srcImg, dstImg, globalWorkSize0: globalWorkSize0, globalWorkSize1: globalWorkSize1, globalWorkSize2: globalWorkSize2, localWorkSize0: localWorkSize0, localWorkSize1: localWorkSize1, localWorkSize2: localWorkSize2, waitFor: waitFor));
 }
 public void Run(OpenCL.Net.CommandQueue commandQueue, OpenCL.Net.IMem srcImg, OpenCL.Net.IMem dstImg, uint globalWorkSize0, uint globalWorkSize1, uint globalWorkSize2, uint localWorkSize0 = 0, uint localWorkSize1 = 0, uint localWorkSize2 = 0, params OpenCL.Net.Event[] waitFor)
 {
     OpenCL.Net.Event ev = this.run(commandQueue, srcImg, dstImg, globalWorkSize0: globalWorkSize0, globalWorkSize1: globalWorkSize1, globalWorkSize2: globalWorkSize2, localWorkSize0: localWorkSize0, localWorkSize1: localWorkSize1, localWorkSize2: localWorkSize2, waitFor: waitFor);
     ev.Wait();
 }
                public int[] ClosestPoints(double[,] source_points, double[,] target_points)
                {
                    System.Diagnostics.Trace.Assert(source_points.GetLength(1) == target_points.GetLength(1));
                    int[]        ans = new int[source_points.GetLength(0)];
                    CL.ErrorCode error;
                    byte[]       source_points_byte_array = new byte[Marshal.SizeOf(typeof(double)) * source_points.Length];
                    byte[]       target_points_byte_array = new byte[Marshal.SizeOf(typeof(double)) * target_points.Length];
                    byte[]       dims_byte_array          = new byte[Marshal.SizeOf(typeof(int)) * 3];
                    byte[]       output_byte_array        = new byte[Marshal.SizeOf(typeof(int)) * ans.Length];
                    int[]        dims = new[] { source_points.GetLength(1), source_points.GetLength(0), target_points.GetLength(0) };
                    MyCL.memcpy(ref source_points, ref source_points_byte_array);
                    MyCL.memcpy(ref target_points, ref target_points_byte_array);
                    MyCL.memcpy(ref dims, ref dims_byte_array);
                    lock (cl_lock)
                    {
                        if (!program_initialized)
                        {
                            program_initialized = true;
                            string programPath = Path.Combine(Environment.CurrentDirectory, "../../ClosestPoints.cl");
                            if (!File.Exists(programPath))
                            {
                                throw new Exception("Program doesn't exist at path " + programPath);
                            }
                            string programSource = System.IO.File.ReadAllText(programPath);

                            program = CL.Cl.CreateProgramWithSource(MyCL.context, 1, new[] { programSource }, null, out error);
                            MyCL.CheckErr(error, "Cl.CreateProgramWithSource");
                            error = CL.Cl.BuildProgram(program, 1, new[] { MyCL.device }, string.Empty, null, IntPtr.Zero);
                            MyCL.CheckErr(error, "Cl.BuildProgram");
                            if (CL.Cl.GetProgramBuildInfo(program, MyCL.device, CL.ProgramBuildInfo.Status, out error).CastTo <CL.BuildStatus>() != CL.BuildStatus.Success)
                            {
                                MyCL.CheckErr(error, "Cl.GetProgramBuildInfo");
                                throw new Exception($"Cl.GetProgramBuildInfo != Success\r\n{CL.Cl.GetProgramBuildInfo(program, MyCL.device, CL.ProgramBuildInfo.Log, out error)}");
                            }
                        }
                        using (CL.Kernel kernel = CL.Cl.CreateKernel(program, "weighted_sum", out error))
                        {
                            MyCL.CheckErr(error, "Cl.CreateKernel");
                            //OpenCL memory buffer that will keep our image's byte[] data.
                            using (CL.IMem
                                   source_points_buffer = CL.Cl.CreateBuffer(MyCL.context, CL.MemFlags.CopyHostPtr | CL.MemFlags.ReadOnly, source_points_byte_array, out CL.ErrorCode err1),
                                   target_points_buffer = CL.Cl.CreateBuffer(MyCL.context, CL.MemFlags.CopyHostPtr | CL.MemFlags.ReadOnly, target_points_byte_array, out CL.ErrorCode err2),
                                   dims_buffer = CL.Cl.CreateBuffer(MyCL.context, CL.MemFlags.CopyHostPtr | CL.MemFlags.ReadOnly, dims_byte_array, out CL.ErrorCode err3),
                                   output_buffer = CL.Cl.CreateBuffer(MyCL.context, CL.MemFlags.CopyHostPtr | CL.MemFlags.WriteOnly, output_byte_array, out CL.ErrorCode err4))
                            {
                                MyCL.CheckErr(err1, "Cl.CreateBuffer source_points");
                                MyCL.CheckErr(err2, "Cl.CreateBuffer target_points");
                                MyCL.CheckErr(err3, "Cl.CreateBuffer dims");
                                MyCL.CheckErr(err4, "Cl.CreateBuffer output");
                                int intPtrSize = Marshal.SizeOf(typeof(IntPtr));
                                error =
                                    CL.Cl.SetKernelArg(kernel, 0, (IntPtr)intPtrSize, source_points_buffer) |
                                    CL.Cl.SetKernelArg(kernel, 1, (IntPtr)intPtrSize, target_points_buffer) |
                                    CL.Cl.SetKernelArg(kernel, 2, (IntPtr)intPtrSize, dims_buffer) |
                                    CL.Cl.SetKernelArg(kernel, 3, (IntPtr)intPtrSize, output_buffer);
                                MyCL.CheckErr(error, "Cl.SetKernelArg");

                                //Create a command queue, where all of the commands for execution will be added
                                using (CL.CommandQueue cmdQueue = CL.Cl.CreateCommandQueue(MyCL.context, MyCL.device, (CL.CommandQueueProperties) 0, out error))
                                {
                                    MyCL.CheckErr(error, "Cl.CreateCommandQueue");
                                    CL.Event clevent;
                                    IntPtr[] workGroupSizePtr = new IntPtr[] { (IntPtr)source_points.GetLength(0) };
                                    error = CL.Cl.EnqueueNDRangeKernel(
                                        cmdQueue,
                                        kernel,
                                        1,
                                        null,//not used
                                        workGroupSizePtr, null, 0, null, out clevent);
                                    CL.Cl.ReleaseEvent(clevent);
                                    MyCL.CheckErr(error, "Cl.EnqueueNDRangeKernel");
                                    error = CL.Cl.Finish(cmdQueue);
                                    MyCL.CheckErr(error, "Cl.Finish");
                                    error = CL.Cl.EnqueueReadBuffer(cmdQueue, output_buffer, CL.Bool.True, 0, Marshal.SizeOf(typeof(byte)) * output_byte_array.Length, output_byte_array, 0, null, out clevent);
                                    CL.Cl.ReleaseEvent(clevent);
                                    MyCL.CheckErr(error, "Cl.EnqueueReadBuffer");
                                    MyCL.memcpy(ref output_byte_array, ref ans);
                                    //CL.Cl.ReleaseCommandQueue(cmdQueue);
                                }
                                //CL.Cl.ReleaseMemObject(data_buffer);
                                //CL.Cl.ReleaseMemObject(offsets_x_buffer);
                                //CL.Cl.ReleaseMemObject(offsets_y_buffer);
                                //CL.Cl.ReleaseMemObject(weights_buffer);
                                //CL.Cl.ReleaseMemObject(dims_buffer);
                                //CL.Cl.ReleaseMemObject(output_buffer);
                            }
                            //CL.Cl.ReleaseKernel(kernel);
                        }
                    }
                    return(ans);
                }
Beispiel #10
0
 public static InfoBuffer GetCommandQueueInfo(CommandQueue commandQueue, CommandQueueInfo paramName, out ErrorCode error)
 {
     return(GetInfo(GetCommandQueueInfo, commandQueue, paramName, out error));
 }
Beispiel #11
0
 public OpenCL.Net.Event EnqueueRun(OpenCL.Net.CommandQueue commandQueue, OpenCL.Net.IMem <System.Single> a, OpenCL.Net.IMem <System.Single> b, int local_length, float scale, uint globalWorkSize0, uint globalWorkSize1, uint globalWorkSize2, uint localWorkSize0 = 0, uint localWorkSize1 = 0, uint localWorkSize2 = 0, params OpenCL.Net.Event[] waitFor)
 {
     return(this.run(commandQueue, a, b, local_length, scale, globalWorkSize0: globalWorkSize0, globalWorkSize1: globalWorkSize1, globalWorkSize2: globalWorkSize2, localWorkSize0: localWorkSize0, localWorkSize1: localWorkSize1, localWorkSize2: localWorkSize2, waitFor: waitFor));
 }
Beispiel #12
0
 public void Run(OpenCL.Net.CommandQueue commandQueue, OpenCL.Net.IMem <System.Single> a, OpenCL.Net.IMem <System.Single> b, int local_length, float scale, uint globalWorkSize0, uint globalWorkSize1, uint localWorkSize0 = 0, uint localWorkSize1 = 0, params OpenCL.Net.Event[] waitFor)
 {
     OpenCL.Net.Event ev = this.run(commandQueue, a, b, local_length, scale, globalWorkSize0: globalWorkSize0, globalWorkSize1: globalWorkSize1, localWorkSize0: localWorkSize0, localWorkSize1: localWorkSize1, waitFor: waitFor);
     ev.Wait();
 }
Beispiel #13
0
        private void CrackImpl(byte[] plaintext_bytes, byte[] plaintext_lengths, byte[] target, out byte[] match)
        {
            Cl.ErrorCode error;
            match = null;

            //Create the required kernel (entry function)
            Cl.Kernel kernel = Cl.Cl.CreateKernel(program, "md5", out error);
            CheckErr(error, "Cl.CreateKernel");

            int intPtrSize = 0;

            intPtrSize = Marshal.SizeOf(typeof(IntPtr));

            var plaintextBytesBuffer = Cl.Cl.CreateBuffer(_context, Cl.MemFlags.CopyHostPtr | Cl.MemFlags.ReadOnly, (IntPtr)plaintext_bytes.Length, plaintext_bytes, out error);

            CheckErr(error, "Cl.CreateBuffer plaintext_bytes");

            var plaintextLengthsBuffer = Cl.Cl.CreateBuffer(_context, Cl.MemFlags.CopyHostPtr | Cl.MemFlags.ReadOnly, (IntPtr)plaintext_lengths.Length, plaintext_lengths, out error);

            CheckErr(error, "Cl.CreateBuffer plaintext_lengths");

            var targetBuffer = Cl.Cl.CreateBuffer(_context, Cl.MemFlags.CopyHostPtr | Cl.MemFlags.ReadOnly, (IntPtr)KEY_LENGTH, target, out error);

            CheckErr(error, "Cl.CreateBuffer target");

            match = new byte[KEY_LENGTH];
            var matchBuffer = Cl.Cl.CreateBuffer <byte>(_context, Cl.MemFlags.WriteOnly | Cl.MemFlags.CopyHostPtr, match, out error);

            CheckErr(error, "Cl.CreateBuffer match");

            //Pass the memory buffers to our kernel function
            error  = Cl.Cl.SetKernelArg(kernel, 0, (IntPtr)intPtrSize, plaintextBytesBuffer);
            error |= Cl.Cl.SetKernelArg(kernel, 1, (IntPtr)intPtrSize, plaintextLengthsBuffer);
            error |= Cl.Cl.SetKernelArg(kernel, 2, (IntPtr)intPtrSize, targetBuffer);
            error |= Cl.Cl.SetKernelArg(kernel, 3, (IntPtr)intPtrSize, matchBuffer);
            CheckErr(error, "Cl.SetKernelArg");

            //Create a command queue, where all of the commands for execution will be added
            Cl.CommandQueue cmdQueue = Cl.Cl.CreateCommandQueue(_context, _device, (Cl.CommandQueueProperties) 0, out error);
            CheckErr(error, "Cl.CreateCommandQueue");
            Cl.Event clevent;

            IntPtr[] workGroupSizePtr = new IntPtr[] { (IntPtr)(plaintext_bytes.Length / KEY_LENGTH) };
            //Execute our kernel (OpenCL code)
            error = Cl.Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 1, null, workGroupSizePtr, null, 0, null, out clevent);
            CheckErr(error, "Cl.EnqueueNDRangeKernel");
            //Wait for completion of all calculations on the GPU.
            error = Cl.Cl.Finish(cmdQueue);
            CheckErr(error, "Cl.Finish");

            error = Cl.Cl.EnqueueReadBuffer(cmdQueue, matchBuffer, Cl.Bool.True, match, 0, null, out clevent);
            CheckErr(error, "Cl.EnqueueReadBuffer");
            //Clean up memory
            Cl.Cl.ReleaseKernel(kernel);
            Cl.Cl.ReleaseCommandQueue(cmdQueue);

            Cl.Cl.ReleaseMemObject(plaintextBytesBuffer);
            Cl.Cl.ReleaseMemObject(plaintextLengthsBuffer);
            Cl.Cl.ReleaseMemObject(targetBuffer);
            Cl.Cl.ReleaseMemObject(matchBuffer);
        }
 public static ErrorCode EnqueueReleaseGLObjects(CommandQueue queue, IMem[] glObjects, uint waitListCount, OpenCL.Net.Event[] waitList, out OpenCL.Net.Event outEvent)
 {
     return(clEnqueueReleaseGLObjects(queue, (uint)glObjects.Length, (from m in glObjects select(m as IHandleData).Handle).ToArray(), waitListCount, waitList, out outEvent));
 }
 private static extern ErrorCode clEnqueueReleaseGLObjects(CommandQueue queue,
                                                           uint num_objects,
                                                           [In][MarshalAs(UnmanagedType.LPArray)] IntPtr[] memObjects,
                                                           uint num_waitlist,
                                                           [In][MarshalAs(UnmanagedType.LPArray)] OpenCL.Net.Event[] waitList,
                                                           out Event ev);
        public void Process(int[] inputArray, int inputImgWidth, int inputImgHeight, double threshold, out int[] outputArray)
        {
            Cl.ErrorCode error;
            outputArray = null;

            //Create the required kernel (entry function)
            Cl.Kernel kernel = Cl.Cl.CreateKernel(program, "sobelEdgeDetect", out error);
            CheckErr(error, "Cl.CreateKernel");

            int intPtrSize = 0;

            intPtrSize = Marshal.SizeOf(typeof(IntPtr));

            //OpenCL memory buffer that will keep our image's byte[] data.
            Cl.IMem        inputImage2DBuffer;
            Cl.ImageFormat clImageFormat = new Cl.ImageFormat(Cl.ChannelOrder.RGBA, Cl.ChannelType.Unsigned_Int8);

            //Copy the raw bitmap data to an unmanaged byte[] array
            //inputByteArray = new byte[inputImgBytesSize];
            //Marshal.Copy(bitmapData.Scan0, inputByteArray, 0, inputImgBytesSize);
            //Allocate OpenCL image memory buffer
            inputImage2DBuffer = Cl.Cl.CreateImage2D(_context, Cl.MemFlags.CopyHostPtr | Cl.MemFlags.ReadOnly, clImageFormat,
                                                     (IntPtr)inputImgWidth, (IntPtr)inputImgHeight,
                                                     (IntPtr)0, inputArray, out error);
            CheckErr(error, "Cl.CreateImage2D input");

            //Unmanaged output image's raw RGBA byte[] array
            outputArray = new int[inputArray.Length];
            //Allocate OpenCL image memory buffer
            Cl.IMem outputImage2DBuffer = Cl.Cl.CreateImage2D(_context, Cl.MemFlags.CopyHostPtr | Cl.MemFlags.WriteOnly, clImageFormat, (IntPtr)inputImgWidth,
                                                              (IntPtr)inputImgHeight, (IntPtr)0, outputArray, out error);
            CheckErr(error, "Cl.CreateImage2D output");
            //Pass the memory buffers to our kernel function
            error  = Cl.Cl.SetKernelArg(kernel, 0, (IntPtr)intPtrSize, inputImage2DBuffer);
            error |= Cl.Cl.SetKernelArg(kernel, 1, (IntPtr)intPtrSize, outputImage2DBuffer);
            error |= Cl.Cl.SetKernelArg(kernel, 2, (float)threshold / 250.0f);
            CheckErr(error, "Cl.SetKernelArg");

            //Create a command queue, where all of the commands for execution will be added
            Cl.CommandQueue cmdQueue = Cl.Cl.CreateCommandQueue(_context, _device, (Cl.CommandQueueProperties) 0, out error);
            CheckErr(error, "Cl.CreateCommandQueue");
            Cl.Event clevent;

            //Copy input image from the host to the GPU.
            IntPtr[] originPtr        = new IntPtr[] { (IntPtr)0, (IntPtr)0, (IntPtr)0 };                          //x, y, z
            IntPtr[] regionPtr        = new IntPtr[] { (IntPtr)inputImgWidth, (IntPtr)inputImgHeight, (IntPtr)1 }; //x, y, z
            IntPtr[] workGroupSizePtr = new IntPtr[] { (IntPtr)inputImgWidth, (IntPtr)inputImgHeight, (IntPtr)1 };
            error = Cl.Cl.EnqueueWriteImage(cmdQueue, inputImage2DBuffer, Cl.Bool.True,
                                            originPtr, regionPtr, (IntPtr)0, (IntPtr)0, inputArray, 0, null, out clevent);
            CheckErr(error, "Cl.EnqueueWriteImage");
            //Execute our kernel (OpenCL code)
            error = Cl.Cl.EnqueueNDRangeKernel(cmdQueue, kernel, 2, null, workGroupSizePtr, null, 0, null, out clevent);
            CheckErr(error, "Cl.EnqueueNDRangeKernel");
            //Wait for completion of all calculations on the GPU.
            error = Cl.Cl.Finish(cmdQueue);
            CheckErr(error, "Cl.Finish");
            //Read the processed image from GPU to raw RGBA data byte[] array
            error = Cl.Cl.EnqueueReadImage(cmdQueue, outputImage2DBuffer, Cl.Bool.True, originPtr, regionPtr,
                                           (IntPtr)0, (IntPtr)0, outputArray, 0, null, out clevent);
            CheckErr(error, "Cl.clEnqueueReadImage");
            //Clean up memory
            Cl.Cl.ReleaseKernel(kernel);
            Cl.Cl.ReleaseCommandQueue(cmdQueue);

            Cl.Cl.ReleaseMemObject(inputImage2DBuffer);
            Cl.Cl.ReleaseMemObject(outputImage2DBuffer);
        }