public static void ParForGPU(ushort[] output, ushort[] input, int width, int height)
        {
            Parallel2D.For(window, width - window, window, height - window, (i, j) =>
            {
                var buffer = new StackArray <ushort>(windowCount * windowCount);
                for (int k = -window; k <= window; ++k)
                {
                    for (int p = -window; p <= window; ++p)
                    {
                        int bufferIndex     = (k + window) * windowCount + p + window;
                        int pixelIndex      = (j + k) * width + (i + p);
                        buffer[bufferIndex] = input[pixelIndex];
                    }
                }

                Bitonic.Sort(buffer.data, 0, windowCount * windowCount);
                output[j * width + i] = buffer[(windowCount * windowCount) / 2];
            });
        }
        public static void ParForGPU(ushort[] output, ushort[] input, int width, int height, int window)
        {
            int windowCount = 2 * window + 1;

            Parallel.For(window, height - window, j =>
            {
                var buffer = new ushort[windowCount * windowCount];
                for (int i = window; i < width - window; ++i)
                {
                    for (int k = -window; k <= window; ++k)
                    {
                        for (int p = -window; p <= window; ++p)
                        {
                            int bufferIndex     = (k + window) * windowCount + p + window;
                            int pixelIndex      = (j + k) * width + (i + p);
                            buffer[bufferIndex] = input[pixelIndex];
                        }
                    }

                    Bitonic.Sort(buffer, 0, windowCount * windowCount);
                    output[j * width + i] = buffer[(windowCount * windowCount) / 2];
                }
            });
        }