Пример #1
0
        public static void SimpleMultiply()
        {
            for (var iter = 1; iter <= 3; ++iter)
            {
                Console.WriteLine("====> Test SimpleMultiply with CUDAfy C# (#.{0}) <====", iter);

                var timer = Stopwatch.StartNew();
                var gpu = CudafyHost.GetDevice();
                Console.WriteLine("GPU: {0}", gpu.GetDeviceProperties().Name);
                timer.Stop();
                Console.WriteLine("Step 1) Runtime setup                  {0} ms", timer.Elapsed.TotalMilliseconds);

                timer.Restart();
                var module = CudafyTranslator.Cudafy();
                timer.Stop();
                Console.WriteLine("Step 2) Compile                        {0} ms", timer.Elapsed.TotalMilliseconds);

                timer.Restart();
                gpu.LoadModule(module);
                timer.Stop();
                Console.WriteLine("Step 3) Load module                    {0} ms", timer.Elapsed.TotalMilliseconds);

                const int factor = 8;
                var a = Util.RandomMatrix(100 * factor, 200 * factor);
                var b = Util.RandomMatrix(200 * factor, 300 * factor);
                var c = new float[a.GetLength(0), b.GetLength(1)];

                var devA = gpu.CopyToDevice(a);
                var devB = gpu.CopyToDevice(b);
                var devC = gpu.Allocate<float>(a.GetLength(0), b.GetLength(1));

                var gridDim = new dim3(Util.Divup(b.GetLength(1), TileSize), Util.Divup(a.GetLength(0), TileSize));
                var blockDim = new dim3(TileSize, TileSize);

                // measure first kernel execution, need sync worker
                timer.Restart();
                gpu.Launch(gridDim, blockDim, SimpleMultiplyKernel, devA, devB, devC);
                gpu.Synchronize();
                timer.Stop();
                Console.WriteLine("Kernel launch first time               {0} ms", timer.Elapsed.TotalMilliseconds);

                // launch 50 kernels, and sync at last (1 sync only)
                const int repetitions = 50;
                timer.Restart();
                for (var i = 0; i < repetitions; ++i)
                {
                    gpu.Launch(gridDim, blockDim, SimpleMultiplyKernel, devA, devB, devC);
                }
                gpu.Synchronize();
                timer.Stop();
                Console.WriteLine("Kernel launch average time             {0} ms", timer.Elapsed.TotalMilliseconds / (float)repetitions);

                gpu.CopyFromDevice(devC, c);
                gpu.Free(devA);
                gpu.Free(devB);
                gpu.Free(devC);
                Util.VerifyResult(a, b, c);

            }
        }
Пример #2
0
        public static int MA(int[,] A, int[,] B, int[,] C, GPGPU gpu, int maxTheadBlockSize, int Size)
        {
            // allocate the memory on the GPU
            int[,] GPU_A = gpu.Allocate<int>(A);
            int[,] GPU_B = gpu.Allocate<int>(B);
            int[,] GPU_C = gpu.Allocate<int>(C);

            // copy the arrays 'a' and 'b' to the GPU
            gpu.CopyToDevice(A, GPU_A);
            gpu.CopyToDevice(B, GPU_B);
            dim3 threadsPerBlock;
            // find the number of threads and blocks
            if (Size < maxTheadBlockSize)
            {
                threadsPerBlock = new dim3(Size, Size);
            }
            else
            {
                threadsPerBlock = new dim3(maxTheadBlockSize, maxTheadBlockSize);
            }
            dim3 block = new dim3(Size, Size);

            // launch GPU_MA
            gpu.Launch(block, threadsPerBlock, "GPU_MA", GPU_A, GPU_B, GPU_C, Size);

            // copy the array 'c' back from the GPU to the CPU
            gpu.CopyFromDevice(GPU_C, C);

            gpu.Free(GPU_A);
            gpu.Free(GPU_B);
            gpu.Free(GPU_C);
            return 1;
        }
Пример #3
0
        public Bitmap Render(Rendering.ExecutionOptions options, Action<string> log)
        {
            try
            {
                var result = new Bitmap(options.Width, options.Height);
                int width = options.Width;
                int height = options.Height;
                log("Initializing and copying data to GPU memory");
                int[,] iterations = new int[height, width];
                var dev_iterations = gpu.CopyToDevice(iterations);
                var gridSize = new dim3(height, width);
                var blockSize = BlockSize;
                var minX = (float)options.MinX;
                var maxX = (float)options.MaxX;
                var minY = (float)options.MinY;
                var maxY = (float)options.MaxY;
                var stepX = (maxX - minX) / ((float)width);
                var stepY = (maxY - minY) / ((float)height);

                log("Launching Mandelbrot calculations");
                gpu.Launch(gridSize, blockSize, "CalculateMandelbrot", minX, maxY, stepX, stepY, dev_iterations);
                log("Mandelbrot calculations done, fetching results from GPU memory");
                gpu.CopyFromDevice(dev_iterations, iterations);

                log("Generating the final image");
                Rendering.fastDrawBitmap(result, iterations);
                return result;
            }
            finally
            {
                gpu.FreeAll();
            }

        }
Пример #4
0
 public double[] transpose(double[] inputArray)
 {
     GPGPU gpu = CudafyHost.GetDevice(eGPUType.Cuda);
       CudafyModule km = CudafyTranslator.Cudafy(eArchitecture.sm_35);
       gpu.LoadModule(km);
       dim3 grid = new dim3(1000);
       gpu.Launch();
       return new double[1];
 }
Пример #5
0
        public void Initialize(int bytes)
        {
            CudafyModule km = CudafyTranslator.Cudafy();

            _gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);
            _gpu.LoadModule(km);

            _dev_bitmap = _gpu.Allocate<byte>(bytes);

            _blocks = new dim3(DIM / 16, DIM / 16);
            _threads = new dim3(16, 16);
        }
Пример #6
0
        public static void Execute(byte[] bitmap)
        {
            CudafyModule km = CudafyModule.TryDeserialize();
            if (km == null || !km.TryVerifyChecksums())
            {
                km = CudafyTranslator.Cudafy();
                km.TrySerialize();
            }

            GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);
            gpu.LoadModule(km);

            // capture the start time
            gpu.StartTimer();

            // allocate memory on the GPU for the bitmap (same size as ptr)
            byte[] dev_bitmap = gpu.Allocate(bitmap);

            // allocate temp memory, initialize it, copy to constant memory on the GPU
            NestedSphere[] temp_s = new NestedSphere[SPHERES];
            for (int i = 0; i < SPHERES; i++)
            {
                temp_s[i].r = rnd(1.0f);
                temp_s[i].g = rnd(1.0f);
                temp_s[i].b = rnd(1.0f);

                temp_s[i].x = rnd(1000.0f) - 500;
                temp_s[i].y = rnd(1000.0f) - 500;
                temp_s[i].z = rnd(1000.0f) - 500;
                temp_s[i].radius = rnd(100.0f) + 20;

            }

            gpu.CopyToConstantMemory(temp_s, s);

            // generate a bitmap from our sphere data
            dim3 grids = new dim3(ray_gui.DIM / 16, ray_gui.DIM / 16);
            dim3 threads = new dim3(16, 16);
            //gpu.Launch(grids, threads).kernel(dev_bitmap); // Dynamic
            gpu.Launch(grids, threads, kernel, dev_bitmap);  // Strongly typed- compiler infers types from arguments
            // copy our bitmap back from the GPU for display
            gpu.CopyFromDevice(dev_bitmap, bitmap);

            // get stop time, and display the timing results
            float elapsedTime = gpu.StopTimer();
            Console.WriteLine("Time to generate: {0} ms", elapsedTime);

            gpu.FreeAll();
        }
Пример #7
0
        public GpuRenderer()
        {
            var availableOpenCLDevices = CudafyHost.GetDeviceProperties(eGPUType.OpenCL);
            if (availableOpenCLDevices.Any() == false)
            {
                throw new Exception("No OpenCL devices found...");
            }
            var device = availableOpenCLDevices.First();
            Module = CudafyTranslator.Cudafy(eArchitecture.OpenCL12);
            var blockSide =
                Enumerable
                .Range(1, 15)
                .Reverse()
                .First(count => count * count <= device.MaxThreadsPerBlock);
            BlockSize = new dim3(blockSide, blockSide);

            // Initialize gpu and load the module (avoids reloading every time)
            gpu = CudafyHost.GetDevice(eGPUType.OpenCL);
            gpu.LoadModule(Module);
        }
Пример #8
0
        public static TestOutput CorrectColour(ForeGroundStrucuture[] foregorungRGB_CPU, BackGroundStrucuture[] BackgroundXYZ_CPU)
        {
            //rgb = System.Drawing.Color.FromArgb(69, 77, 217);
            //X = 0.0630982813175294;
            //Y = 0.616476271122916;
            //Z = 0.667048468232457;

            const int image_size = 960 * 540;

            //cuda intializer
            CudafyModule km = CudafyModule.TryDeserialize();
            if (km == null || !km.TryVerifyChecksums())
            {
                // km = CudafyTranslator.Cudafy((typeof(ForeGroundStrucuture)), (typeof(BackGroundStrucuture)), typeof(Color));
                km = CudafyTranslator.Cudafy((typeof(ProfileStrucuture)), (typeof(ForeGroundStrucuture)), (typeof(BackGroundStrucuture)), (typeof(SampleStructure)), typeof(snake));
                km.TrySerialize();
            }

            CudafyTranslator.GenerateDebug = true;
            // cuda or emulator
            GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);
            //GPGPU gpu = CudafyHost.GetDevice(eGPUType.Emulator);
            Console.WriteLine("Running quick correction using {0}", gpu.GetDeviceProperties(false).Name);
            gpu.LoadModule(km);

            ForeGroundStrucuture[] distance_CPU = new ForeGroundStrucuture[image_size];

            // allocate memory on the GPU for the bitmap (same size as ptr)
            #region
            DataTable profile = new DataTable();
            try
            {
                // add the csv bin file
                using (GenericParserAdapter parser = new GenericParserAdapter(@"C:\lev\STColorCorrection\Data\PROFILE\p3700.csv"))
                {
                    System.Data.DataSet dsResult = parser.GetDataSet();
                    profile = dsResult.Tables[0];
                }
            }
            catch (Exception ex)
            { Console.WriteLine(ex); }
            #endregion

            // allocate temp memory, initialize it, copy to constant memory on the GPU
            // L 0-21 A 0-41 B 0-45

            ProfileStrucuture[, ,] profiles_CPU = new ProfileStrucuture[21, 41, 45];
            SampleStructure[,] samples_CPU = new SampleStructure[image_size, 6];

            //profile inicialization
            #region
            for (int indexL = 0; indexL < 21; indexL++)
            {
                for (int indexA = 0; indexA < 41; indexA++)
                {
                    for (int indexB = 0; indexB < 45; indexB++)
                    {
                        profiles_CPU[indexL, indexA, indexB].L = indexL;
                        profiles_CPU[indexL, indexA, indexB].A = indexA;
                        profiles_CPU[indexL, indexA, indexB].B = indexB;
                        profiles_CPU[indexL, indexA, indexB].Given_R = 0;
                        profiles_CPU[indexL, indexA, indexB].Given_G = 0;
                        profiles_CPU[indexL, indexA, indexB].Given_B = 0;
                        profiles_CPU[indexL, indexA, indexB].ML = 0;
                        profiles_CPU[indexL, indexA, indexB].MA = 0;
                        profiles_CPU[indexL, indexA, indexB].MB = 0;
                        profiles_CPU[indexL, indexA, indexB].MX = 0;
                        profiles_CPU[indexL, indexA, indexB].MY = 0;
                        profiles_CPU[indexL, indexA, indexB].MZ = 0;
                        profiles_CPU[indexL, indexA, indexB].distance = -1.0;
                        profiles_CPU[indexL, indexA, indexB].weight = -1.0;

                        profiles_CPU[indexL, indexA, indexB].isempty = TRUE;
                        profiles_CPU[indexL, indexA, indexB].isMoreAccurateThanOrigin = FALSE;
                    }
                }
            }

            int lvalue, avalue, bvalue;
            try
            {
                for (int i = 1; i < profile.Rows.Count; i++)
                {
                    lvalue = Convert.ToInt32(profile.Rows[i][0].ToString());
                    avalue = Convert.ToInt32(profile.Rows[i][1].ToString());
                    bvalue = Convert.ToInt32(profile.Rows[i][2].ToString());

                    lvalue = (int)(lvalue * 0.2);
                    avalue = (int)(avalue * 0.2) + 20;
                    bvalue = (int)(bvalue * 0.2) + 22;

                    profiles_CPU[lvalue, avalue, bvalue].L = lvalue;
                    profiles_CPU[lvalue, avalue, bvalue].A = avalue;
                    profiles_CPU[lvalue, avalue, bvalue].B = bvalue;

                    profiles_CPU[lvalue, avalue, bvalue].Given_R = (byte)Convert.ToByte(profile.Rows[i][9].ToString());
                    profiles_CPU[lvalue, avalue, bvalue].Given_G = (byte)Convert.ToByte(profile.Rows[i][10].ToString());
                    profiles_CPU[lvalue, avalue, bvalue].Given_B = (byte)Convert.ToByte(profile.Rows[i][11].ToString());

                    profiles_CPU[lvalue, avalue, bvalue].ML = (double)Convert.ToDouble(profile.Rows[i][3].ToString());
                    profiles_CPU[lvalue, avalue, bvalue].MA = (double)Convert.ToDouble(profile.Rows[i][4].ToString());
                    profiles_CPU[lvalue, avalue, bvalue].MB = (double)Convert.ToDouble(profile.Rows[i][5].ToString());

                    profiles_CPU[lvalue, avalue, bvalue].MX = (double)Convert.ToDouble(profile.Rows[i][6].ToString());
                    profiles_CPU[lvalue, avalue, bvalue].MY = (double)Convert.ToDouble(profile.Rows[i][7].ToString());
                    profiles_CPU[lvalue, avalue, bvalue].MZ = (double)Convert.ToDouble(profile.Rows[i][8].ToString());

                    profiles_CPU[lvalue, avalue, bvalue].isempty = FALSE;

                }

            }
            catch (Exception ex)
            { Console.WriteLine(ex); }
            #endregion

            //grab the colors
            ProfileStrucuture[, ,] profile_GPU = gpu.CopyToDevice(profiles_CPU);
            SampleStructure[,] samples_GPU = gpu.CopyToDevice(samples_CPU);

            //begin execution
            // capture the start time
            gpu.StartTimer();
            ForeGroundStrucuture[] foregorungRGB_GPU = gpu.CopyToDevice(foregorungRGB_CPU);
            BackGroundStrucuture[] BackgroundXYZ_GPU = gpu.CopyToDevice(BackgroundXYZ_CPU);

            //out put
            ForeGroundStrucuture[] distance_GPU = gpu.Allocate(distance_CPU);

            // generate a bitmap from our sphere data
            //Image size: 1024 x 768

            dim3 grids = new dim3(24, 675);
            dim3 threads = new dim3(8, 4);

            //dim3 grids = new dim3(1, 1);
            //dim3 threads = new dim3(1, 1);

            //quick_correct
            //gpu.Launch(grids, threads, ((Action<GThread, ProfileStrucuture[, ,], ForeGroundStrucuture[], BackGroundStrucuture[], ProfileStrucuture[], SampleStructure[,]>)QuickCorr), profile_GPU, foregorungRGB_GPU, BackgroundXYZ_GPU, distance_GPU, samples_GPU);

            //quick correct - testing
            gpu.Launch(grids, threads, ((Action<GThread, ProfileStrucuture[, ,], ForeGroundStrucuture[], BackGroundStrucuture[], ForeGroundStrucuture[], SampleStructure[,]>)Snake), profile_GPU, foregorungRGB_GPU, BackgroundXYZ_GPU, distance_GPU, samples_GPU);

            // copy our bitmap back from the GPU for display
            gpu.CopyFromDevice(distance_GPU, distance_CPU);

            // get stop time, and display the timing results
            double elapsedTime = gpu.StopTimer();
            TestOutput to_return = new TestOutput();
            to_return.output_image = distance_CPU;
            to_return.timeTaken = elapsedTime;
            Console.WriteLine("Time to generate: {0} ms", elapsedTime);
            gpu.Free(foregorungRGB_GPU);
            gpu.Free(BackgroundXYZ_GPU);
            gpu.Free(distance_GPU);
            gpu.FreeAll();

            return to_return;
        }
Пример #9
0
 /// <summary>
 /// Initializes a new instance of the <see cref="GGrid"/> class.
 /// </summary>
 /// <param name="size">The size.</param>
 public GGrid(dim3 size)
 {
     Dim = size;
 }
Пример #10
0
        public static void Execute(byte[] bitmap)
        {
            DateTime dt = DateTime.Now;
            CudafyModule km = CudafyModule.TryDeserialize(csFILENAME);
            // Check the module exists and matches the .NET modules, else make new
            if (km == null || !km.TryVerifyChecksums())
            {
                Console.WriteLine("There was no cached module available so we make a new one.");
                km = CudafyModule.Deserialize(typeof(ray_serialize).Name);
                km.Serialize(csFILENAME);
            }

            GPGPU gpu = CudafyHost.GetGPGPU(CudafyModes.Target, 1);
            gpu.LoadModule(km);

            Console.WriteLine("Time taken to load module: {0}ms", DateTime.Now.Subtract(dt).Milliseconds);

            // capture the start time
            gpu.StartTimer();

            // allocate memory on the GPU for the bitmap (same size as ptr)
            byte[] dev_bitmap = gpu.Allocate(bitmap);

            // allocate temp memory, initialize it, copy to constant memory on the GPU
            Sphere[] temp_s = new Sphere[SPHERES]; 
            for (int i = 0; i < SPHERES; i++)
            {
                temp_s[i].r = rnd(1.0f);
                temp_s[i].g = rnd(1.0f);
                temp_s[i].b = rnd(1.0f);

                temp_s[i].x = rnd(1000.0f) - 500;
                temp_s[i].y = rnd(1000.0f) - 500;
                temp_s[i].z = rnd(1000.0f) - 500;
                temp_s[i].radius = rnd(100.0f) + 20;

            }

            gpu.CopyToConstantMemory(temp_s, s);

            // generate a bitmap from our sphere data
            dim3 grids = new dim3(DIM/16, DIM/16);
            dim3 threads = new dim3(16, 16);
            gpu.Launch(grids, threads, "kernel", dev_bitmap);

            // copy our bitmap back from the GPU for display
            gpu.CopyFromDevice(dev_bitmap, bitmap);

            // get stop time, and display the timing results
            float elapsedTime = gpu.StopTimer();
            Console.WriteLine("Time to generate: {0} ms", elapsedTime);

            gpu.DeviceFreeAll();
        }
Пример #11
0
        public static TestOutput CorrectColour(ForeGroundStrucuture[] foregorungRGB_CPU, BackGroundStrucuture[] BackgroundXYZ_CPU)
        {
            //set these to constant if you want testing

            //rgb = System.Drawing.Color.FromArgb(65, 108, 20);
            //X = 0.613829950099918;
            //Y = 0.938638756488747;
            //Z = 1.08019833591292;

            const int image_size = 960 * 540;

              //cuda intializer
              CudafyModule km = CudafyModule.TryDeserialize();
              if (km == null || !km.TryVerifyChecksums())
              {
               // km = CudafyTranslator.Cudafy((typeof(ForeGroundStrucuture)), (typeof(BackGroundStrucuture)), typeof(Color));
              km = CudafyTranslator.Cudafy(typeof(ProfileStrucuture),typeof(ForeGroundStrucuture), typeof(BackGroundStrucuture), typeof(bf));
            km.TrySerialize();
              }

              CudafyTranslator.GenerateDebug = true;
              // cuda or emulator
              GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);
              //sGPGPU gpu = CudafyHost.GetDevice(eGPUType.Emulator);
              gpu.LoadModule(km);
              Console.WriteLine("Running brute force correction using {0}", gpu.GetDeviceProperties(false).Name);

              ForeGroundStrucuture[] output_image_CPU = new ForeGroundStrucuture[image_size];

              // allocate memory on the GPU for the bitmap (same size as ptr)

              DataTable profile = new DataTable();
              try
              {
            // add the csv bin file
            using (GenericParserAdapter parser = new GenericParserAdapter(@"C:\lev\STColorCorrection\Data\PROFILE\p3700.csv"))
            {
              System.Data.DataSet dsResult = parser.GetDataSet();
              profile = dsResult.Tables[0];
            }
              }
              catch(Exception ex)
              {Console.WriteLine(ex); }

              // allocate temp memory, initialize it, copy to constant memory on the GPU
              // L 0-21 A 0-41 B 0-45

             ProfileStrucuture[ , , ] profiles_CPU = new ProfileStrucuture[21,41,45];
             //ForeGroundStrucuture[] foregorungRGB_CPU = new ForeGroundStrucuture[image_size];
             //BackGroundStrucuture[] BackgroundXYZ_CPU = new BackGroundStrucuture[image_size];

             for (int indexL = 0; indexL < 21; indexL++)
              {
            for (int indexA = 0; indexA < 41; indexA++)
            {
              for (int indexB = 0; indexB < 45; indexB++)
              {
            profiles_CPU[indexL, indexA, indexB].L = indexL;
            profiles_CPU[indexL, indexA, indexB].A = indexA;
            profiles_CPU[indexL, indexA, indexB].B = indexB;
            profiles_CPU[indexL, indexA, indexB].Given_R = 0;
            profiles_CPU[indexL, indexA, indexB].Given_G = 0;
            profiles_CPU[indexL, indexA, indexB].Given_B = 0;
            profiles_CPU[indexL, indexA, indexB].ML = 0;
            profiles_CPU[indexL, indexA, indexB].MA = 0;
            profiles_CPU[indexL, indexA, indexB].MB = 0;
            profiles_CPU[indexL, indexA, indexB].MX = 0;
            profiles_CPU[indexL, indexA, indexB].MY = 0;
            profiles_CPU[indexL, indexA, indexB].MZ = 0;

            profiles_CPU[indexL, indexA, indexB].isempty = TRUE;
            profiles_CPU[indexL, indexA, indexB].isMoreAccurateThanOrigin = -1;
              }
            }
               }

              int lvalue, avalue, bvalue;
              try
              {
              for (int i = 1; i < profile.Rows.Count; i++)
              {
              lvalue=Convert.ToInt32 (profile.Rows[i][0].ToString());
              avalue = Convert.ToInt32(profile.Rows[i][1].ToString());
              bvalue= Convert.ToInt32(profile.Rows[i][2].ToString());

              lvalue=(int)(lvalue*0.2);
              avalue=(int)(avalue*0.2)+20;
              bvalue=(int)(bvalue*0.2)+22;

              profiles_CPU[lvalue, avalue, bvalue].L = lvalue;
              profiles_CPU[lvalue, avalue, bvalue].A = avalue;
              profiles_CPU[lvalue, avalue, bvalue].B = bvalue;

              profiles_CPU[lvalue, avalue, bvalue].Given_R = (byte)Convert.ToByte(profile.Rows[i][9].ToString());
              profiles_CPU[lvalue, avalue, bvalue].Given_G = (byte)Convert.ToByte(profile.Rows[i][10].ToString());
              profiles_CPU[lvalue, avalue, bvalue].Given_B = (byte)Convert.ToByte(profile.Rows[i][11].ToString());

              profiles_CPU[lvalue, avalue, bvalue].ML = (double)Convert.ToDouble(profile.Rows[i][3].ToString());
              profiles_CPU[lvalue, avalue, bvalue].MA = (double)Convert.ToDouble(profile.Rows[i][4].ToString());
              profiles_CPU[lvalue, avalue, bvalue].MB = (double)Convert.ToDouble(profile.Rows[i][5].ToString());

              profiles_CPU[lvalue, avalue, bvalue].MX = (double)Convert.ToDouble(profile.Rows[i][6].ToString());
              profiles_CPU[lvalue, avalue, bvalue].MY = (double)Convert.ToDouble(profile.Rows[i][7].ToString());
              profiles_CPU[lvalue, avalue, bvalue].MZ = (double)Convert.ToDouble(profile.Rows[i][8].ToString());

              profiles_CPU[lvalue, avalue, bvalue].isempty = FALSE;

              }

              }
              catch (Exception ex)
              { Console.WriteLine(ex); }

              //foreground and background image inicialization
              #region
              //try
              //{
              //    for (int i = 0; i < 1; i++)
              //    {
              //        foregorungRGB_CPU[i].R = rgb.R;
              //        foregorungRGB_CPU[i].G = rgb.G;
              //        foregorungRGB_CPU[i].B = rgb.B;

              //        BackgroundXYZ_CPU[i].X = X;
              //        BackgroundXYZ_CPU[i].Y = Y;
              //        BackgroundXYZ_CPU[i].Z = Z;
              //    }
              //}
              //catch (Exception ex)
              //{ Console.WriteLine(ex); }
              #endregion

              ProfileStrucuture[, ,] profile_GPU = gpu.CopyToDevice(profiles_CPU);

            // capture the start time
            gpu.StartTimer();
            ForeGroundStrucuture[] foregorungRGB_GPU = gpu.CopyToDevice(foregorungRGB_CPU);
            BackGroundStrucuture[] BackgroundXYZ_GPU = gpu.CopyToDevice(BackgroundXYZ_CPU);

            //out put
            ForeGroundStrucuture[] distance_GPU = gpu.Allocate(output_image_CPU);

            // generate a bitmap from our sphere data
            //Image size: 1024 x 768

            //dim3 grids = new dim3(1, 1);
            //dim3 threads = new dim3(1,1);

            dim3 grids = new dim3(24, 675);
            dim3 threads = new dim3(8, 4);

            gpu.Launch(grids, threads, ((Action<GThread, ProfileStrucuture[, ,], ForeGroundStrucuture[], BackGroundStrucuture[], ForeGroundStrucuture[]>)Bruteforce), profile_GPU, foregorungRGB_GPU, BackgroundXYZ_GPU, distance_GPU);

            //gpu.Launch(grids, threads, ((Action<GThread, ForeGroundStrucuture[], BackGroundStrucuture[], double[]>)Bruteforce), foregorungRGB_GPU, BackgroundXYZ_GPU, distance_GPU);

            // copy our bitmap back from the GPU for display
            gpu.CopyFromDevice(distance_GPU, output_image_CPU);

            // get stop time, and display the timing results
            double elapsedTime = gpu.StopTimer();
            TestOutput to_return = new TestOutput();
            to_return.output_image = output_image_CPU;
            to_return.timeTaken = elapsedTime;

            //encapsulte the output image into a class

            //output_image_CPU[0].execution_time = elapsedTime;
            Console.WriteLine("Time to generate: {0} ms", elapsedTime);

            gpu.Free(foregorungRGB_GPU);
            gpu.Free(BackgroundXYZ_GPU);
            gpu.Free(distance_GPU);
            gpu.FreeAll();
            return to_return;
        }