/// <summary>
        /// Extracts a channel of an RGB image
        /// </summary>
        /// <param name="queue">cloo command queue</param>
        /// <param name="source">image source</param>
        /// <param name="dest">image destination</param>
        /// <param name="offset">offset (0..3)</param>
        public void ExtractChannel(ClooCommandQueue queue, ClooImage2DByteRgbA source, ClooImage2DByteA dest, byte offset)
        {
            if (queue == null) throw new ArgumentNullException("queue");
            if (source == null) throw new ArgumentNullException("source");
            if (dest == null) throw new ArgumentNullException("dest");
            if (offset > 3) throw new ArgumentOutOfRangeException("offset", String.Format("offset must be between 0..3 but was {0}", offset));
            if ((source.Width > dest.Width) || (source.Height > dest.Height)) throw new ArgumentException("Destination image (" + dest.Width + "x" + dest.Height + ") must have at least the same size as the source image (" + source.Width + "x" + source.Height + ")");

            KernelImageByteExtractChannel.SetArguments(source, dest, offset);
            queue.Execute(KernelImageByteExtractChannel, null, new long[] { source.Width, source.Height }, null, null);
            dest.Modified = true;
        }
        void initializeDevice()
        {
            try
            {
                _selectedDevice = comboBoxDevices.SelectedItem as ClooDevice;
                if (_context != null)
                {
                    _startProcessing = false;

                    // dispose previous context
                    _context.Dispose();
                    _kernels = null;
                    _context = null;
                    _sampler = null;
                    _queue = null;

                    image2.Source = null;
                    image3.Source = null;
                    image4.Source = null;
                }
                if (_selectedDevice != null)
                {
                    // create context
                    _context = _selectedDevice.CreateContext();
                    _queue = _context.CreateCommandQueue();
                    _sampler = new ClooSampler(_context, false, ComputeImageAddressing.ClampToEdge, ComputeImageFiltering.Linear);
                    _kernels = ClooProgramViolaJones.Create(_context);

                    _haarObjectDetector = ClooHaarObjectDetector.CreateFaceDetector(_context, _queue, 640, 480);
                    _haarObjectDetector.ScalingFactor = 1.25f;
                    _haarObjectDetector.ScalingMode = ScalingMode.SmallerToLarger;
                    _haarObjectDetector.MinSize = new System.Drawing.Size(30, 30);
                    _haarObjectDetector.MaxSize = new System.Drawing.Size(100, 100);

                    Stopwatch stopwatch = new Stopwatch();
                    stopwatch.Reset();
                    stopwatch.Start();
                    _histogram = new ClooBuffer<uint>(_context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.AllocateHostPointer, 32 * 32 * 32);
                    _histogram2 = new ClooBuffer<uint>(_context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.AllocateHostPointer, 32 * 32 * 32);
                    _clooImageByteOriginal = ClooImage2DByteRgbA.CreateFromBitmap(_context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.UseHostPointer, _bitmapImage1);
                    _clooImageByteOriginal.WriteToDevice(_queue);
                    _clooImageByteGrayOriginal = ClooImage2DByteA.CreateFromBitmap(_context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.UseHostPointer, _bitmapImage1);
                    _clooImageByteGrayOriginal.WriteToDevice(_queue);
                    _clooImageByteResult = ClooImage2DByteRgbA.Create(_context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.UseHostPointer, _bitmapImage1.Width, _bitmapImage1.Height);
                    _clooImageByteResultA = ClooImage2DByteA.Create(_context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.UseHostPointer, _bitmapImage1.Width, _bitmapImage1.Height);
                    _clooImageFloatOriginal = ClooImage2DFloatRgbA.Create(_context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.UseHostPointer, _bitmapImage1.Width, _bitmapImage1.Height);
                    _clooImageFloatGrayOriginal = ClooImage2DFloatA.Create(_context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.UseHostPointer, _bitmapImage1.Width, _bitmapImage1.Height);
                    _clooImageFloatTemp1 = ClooImage2DFloatRgbA.Create(_context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.UseHostPointer, _bitmapImage1.Width, _bitmapImage1.Height);
                    _clooImageFloatTemp2 = ClooImage2DFloatRgbA.Create(_context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.UseHostPointer, _bitmapImage1.Width, _bitmapImage1.Height);
                    _clooImageFloatATemp1 = ClooImage2DFloatA.Create(_context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.UseHostPointer, _bitmapImage1.Width, _bitmapImage1.Height);
                    _clooImageFloatATemp2 = ClooImage2DFloatA.Create(_context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.UseHostPointer, _bitmapImage1.Width, _bitmapImage1.Height);
                    _clooImageFloatIntegral = ClooImage2DFloatA.Create(_context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.UseHostPointer, _bitmapImage1.Width + 1, _bitmapImage1.Height + 1);
                    _clooImageUIntIntegral = ClooImage2DUIntA.Create(_context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.UseHostPointer, _bitmapImage1.Width + 1, _bitmapImage1.Height + 1);
                    _clooImageUIntIntegralSquare = ClooImage2DUIntA.Create(_context, ComputeMemoryFlags.ReadWrite | ComputeMemoryFlags.UseHostPointer, _bitmapImage1.Width + 1, _bitmapImage1.Height + 1);
                    _queue.Finish();
                    label1.Content = stopwatch.ElapsedMilliseconds + " ms - original " + _bitmapImage1.Width + "x" + _bitmapImage1.Height;

                    _startProcessing = true;

                    Update();
                    Update();
                }
            }
            catch (Exception ex)
            {
                // show exception
                MessageBox.Show(ex.Message, ex.GetType().ToString(), MessageBoxButton.OK, MessageBoxImage.Stop);
            }
        }
        /// <summary>
        /// Box blur image
        /// </summary>
        /// <param name="queue">cloo command queue</param>
        /// <param name="source">image source</param>
        /// <param name="dest">image destination</param>
        /// <param name="sampler">sampler to be used for image reading</param>
        /// <param name="offset">offset</param>
        public void BoxBlur(ClooCommandQueue queue, ClooImage2DByteA source, ClooImage2DByteA dest, ClooSampler sampler, int offset)
        {
            if (queue == null) throw new ArgumentNullException("queue");
            if (source == null) throw new ArgumentNullException("source");
            if (dest == null) throw new ArgumentNullException("dest");
            if (sampler == null) throw new ArgumentNullException("sampler");
            if ((source.Width > dest.Width) || (source.Height > dest.Height)) throw new ArgumentException("Destination image (" + dest.Width + "x" + dest.Height + ") must have at least the same size as the source image (" + source.Width + "x" + source.Height + ")");

            KernelImageByteBoxBlur.SetArguments(source, dest, sampler, offset);
            queue.Execute(KernelImageByteBoxBlur, null, new long[] { source.Width, source.Height }, null, null);
            dest.Modified = true;
        }
        /// <summary>
        /// Convert byte image to float image
        /// </summary>
        /// <param name="queue">cloo command queue</param>
        /// <param name="source">image source</param>
        /// <param name="dest">image destination</param>
        public void ByteToFloat(ClooCommandQueue queue, ClooImage2DByteA source, ClooImage2DFloatA dest)
        {
            if (queue == null) throw new ArgumentNullException("queue");
            if (source == null) throw new ArgumentNullException("source");
            if (dest == null) throw new ArgumentNullException("dest");
            if ((source.Width > dest.Width) || (source.Height > dest.Height)) throw new ArgumentException("Destination image (" + dest.Width + "x" + dest.Height + ") must have at least the same size as the source image (" + source.Width + "x" + source.Height + ")");

            KernelImageByteToFloat.SetArguments(source, dest);
            queue.Execute(KernelImageByteToFloat, null, new long[] { source.Width, source.Height }, null, null);
            dest.Modified = true;
            dest.Normalized = false;
        }
 /// <summary>
 /// Sets channel of a RGBA image
 /// </summary>
 /// <param name="queue">cloo command queue</param>
 /// <param name="image">image</param>
 /// <param name="mask">mask image</param>
 /// <param name="offset">offset (0..3)</param>
 public void SetChannel(ClooCommandQueue queue, ClooImage2DByteRgbA image, ClooImage2DByteA mask, byte offset)
 {
     SetChannel(queue, image, mask, image, offset);
 }
        /// <summary>
        /// Sets a constant value to all cells in an image
        /// </summary>
        /// <param name="queue">cloo command queue</param>
        /// <param name="image">image</param>
        /// <param name="value">value to set</param>
        public void SetValue(ClooCommandQueue queue, ClooImage2DByteA image, byte value)
        {
            if (queue == null) throw new ArgumentNullException("queue");
            if (image == null) throw new ArgumentNullException("dest");

            KernelImageByteSetValueA.SetArguments(image, (uint)value);
            queue.Execute(KernelImageByteSetValueA, null, new long[] { image.Width, image.Height }, null, null);
            image.Modified = true;
        }
        /// <summary>
        /// Convert an float image to a squared integral image
        /// </summary>
        /// <remarks>
        /// We skip the last line to keep the original size (better performance)
        /// </remarks>
        /// <param name="queue">cloo command queue</param>
        /// <param name="source">image source</param>
        /// <param name="dest">image destination</param>
        public void IntegralSquare(ClooCommandQueue queue, ClooImage2DByteA source, ClooImage2DUIntA dest)
        {
            if (queue == null) throw new ArgumentNullException("queue");
            if (source == null) throw new ArgumentNullException("source");
            if (dest == null) throw new ArgumentNullException("dest");
            if ((source.Width > dest.Width) || (source.Height > dest.Height)) throw new ArgumentException("Destination image (" + dest.Width + "x" + dest.Height + ") must have at least the source image width and height but is only " + source.Width + "x" + source.Height);

            // execute step 1
            SetValue(queue, dest, 0);
            KernelImageUIntIntegralSquareStep1.SetArguments(source, dest);
            queue.Execute(KernelImageUIntIntegralSquareStep1, null, new long[] { source.Height - 1 }, null, null);
            dest.Modified = true;

            // execute step 2
            KernelImageUIntIntegralSquare.SetArguments(dest, dest, source.Height);
            queue.Execute(KernelImageUIntIntegralSquare, null, new long[] { source.Width - 1 }, null, null);
            dest.Modified = true;
        }
        /// <summary>
        /// Create histogram backprojection for byte image
        /// </summary>
        /// <param name="queue">cloo command queue</param>
        /// <param name="source">image source</param>
        /// <param name="dest">image destination (for propability map)</param>
        /// <param name="srcHistogram">source histogram uint buffer (must be at least bins^3 in length)</param>
        /// <param name="frameHistogram">frame histogram uint buffer (must be at least bins^3 in length)</param>
        /// <param name="bins">number of bins</param>
        /// <param name="startX">start from X coordinate</param>
        /// <param name="startY">start from Y coordinate</param>
        /// <param name="width">width</param>
        /// <param name="height">height</param>
        public void HistogramBackprojection(ClooCommandQueue queue, ClooImage2DByteRgbA source, ClooImage2DByteA dest, 
            ClooBuffer<uint> srcHistogram, ClooBuffer<uint> frameHistogram, byte bins, int startX = 0, int startY = 0, 
            int width = 0, int height = 0)
        {
            if (queue == null) throw new ArgumentNullException("queue");
            if (source == null) throw new ArgumentNullException("source");
            if (dest == null) throw new ArgumentNullException("dest");
            if (srcHistogram == null) throw new ArgumentNullException("srcHistogram");
            if (frameHistogram == null) throw new ArgumentNullException("frameHistogram");
            if (bins < 2) throw new ArgumentException("bins must be at least 2", "bins");

            int length = bins * bins * bins;
            if (srcHistogram.Count < length) throw new ArgumentException("Buffer length for histogram must be at least " + length, "srcHistogram");
            if (frameHistogram.Count < length) throw new ArgumentException("Buffer length for histogram must be at least " + length, "frameHistogram");
            if (width == 0) width = source.Width - startX;
            if (height == 0) height = source.Height - startY;
            if (width < 0) throw new ArgumentOutOfRangeException("width");
            if (height < 0) throw new ArgumentOutOfRangeException("height");

            KernelImageByteRgbHistogramByteBP.SetArguments(source, dest, srcHistogram, frameHistogram, bins, (uint)startX, (uint)startY);
            queue.Execute(KernelImageByteRgbHistogramByteBP, null, new long[] { width, height }, null, null);
            dest.Modified = true;
        }
        /// <summary>
        /// Create histogram
        /// </summary>
        /// <param name="queue">cloo command queue</param>
        /// <param name="source">image source</param>
        /// <param name="histogram">byte buffer (256 bytes)</param>
        /// <param name="startX">start from X coordinate</param>
        /// <param name="startY">start from Y coordinate</param>
        /// <param name="width">width</param>
        /// <param name="height">height</param>
        public void Histogram256(ClooCommandQueue queue, ClooImage2DByteA source, ClooBuffer<uint> histogram,
            int startX = 0, int startY = 0, int width = 0, int height = 0)
        {
            if (queue == null) throw new ArgumentNullException("queue");
            if (source == null) throw new ArgumentNullException("source");
            if (histogram == null) throw new ArgumentNullException("histogram");

            if (histogram.Size < 256) throw new ArgumentException("Buffer size for histogram must be at least 256 bytes", "histogram");
            if (width == 0) width = source.Width - startX;
            if (height == 0) height = source.Height - startY;
            if (width < 0) throw new ArgumentOutOfRangeException("width");
            if (height < 0) throw new ArgumentOutOfRangeException("height");

            KernelCoreUIntSetValue.SetArguments(histogram, (uint)0);
            queue.Execute(KernelCoreUIntSetValue, null, new long[] { 256 }, null, null);

            KernelImageByteHistogram256.SetArguments(source, histogram, (uint)startX, (uint)startY);
            queue.Execute(KernelImageByteHistogram256, null, new long[] { width, height }, null, null);
            histogram.Modified = true;
        }
        /// <summary>
        /// Convert float image to byte image
        /// </summary>
        /// <param name="queue">cloo command queue</param>
        /// <param name="source">image source</param>
        /// <param name="dest">image destination</param>
        public void FloatToByte(ClooCommandQueue queue, ClooImage2DFloatA source, ClooImage2DByteA dest)
        {
            if (queue == null) throw new ArgumentNullException("queue");
            if (source == null) throw new ArgumentNullException("source");
            if (dest == null) throw new ArgumentNullException("dest");
            if ((source.Width > dest.Width) || (source.Height > dest.Height)) throw new ArgumentException("Destination image (" + dest.Width + "x" + dest.Height + ") must have at least the same size as the source image (" + source.Width + "x" + source.Height + ")");

            if (source.Normalized)
            {
                // use normalized kernel
                KernelImageFloatToByteNorm.SetArguments(source, dest);
                queue.Execute(KernelImageFloatToByteNorm, null, new long[] { source.Width, source.Height }, null, null);
            }
            else
            {
                // use normal kernel
                KernelImageFloatToByte.SetArguments(source, dest);
                queue.Execute(KernelImageFloatToByte, null, new long[] { source.Width, source.Height }, null, null);
            }
            dest.Modified = true;
        }
        /// <summary>
        /// Flip image Y coordinate
        /// </summary>
        /// <param name="queue">cloo command queue</param>
        /// <param name="source">image source</param>
        /// <param name="dest">image destination</param>
        public void FlipY(ClooCommandQueue queue, ClooImage2DByteA source, ClooImage2DByteA dest)
        {
            if (queue == null) throw new ArgumentNullException("queue");
            if (source == null) throw new ArgumentNullException("source");
            if (dest == null) throw new ArgumentNullException("dest");
            if (source == dest) throw new ArgumentException("Flipping kernel is not designed to run inline therefore source and destination must be different images");
            if ((source.Width > dest.Width) || (source.Height > dest.Height)) throw new ArgumentException("Destination image (" + dest.Width + "x" + dest.Height + ") must have at least the same size as the source image (" + source.Width + "x" + source.Height + ")");

            KernelImageByteFlipY.SetArguments(source, dest);
            queue.Execute(KernelImageByteFlipY, null, new long[] { source.Width, source.Height }, null, null);
            dest.Modified = true;
        }