Пример #1
0
        internal override Answer GetAnswer()
        {
            var stopWatchLoad = Stopwatch.StartNew();

            using (var gpu = CudafyHost.GetDevice()) {
                gpu.LoadModule(CudafyTranslator.Cudafy());
                LoadTime = stopWatchLoad.ElapsedMilliseconds;

                var stopWatchRun = Stopwatch.StartNew();
                var gpuLatLong   = gpu.CopyToDevice(_latLong.ToArray());
                var answer       = new AnswerStruct[_blocksPerGrid];;
                var gpuAnswer    = gpu.Allocate(answer);

                gpu.SafeLaunch(_blocksPerGrid, _threadsPerBlock,
                               GpuFindPathDistance, (int)_permutations, gpuLatLong, gpuAnswer);

                gpu.Synchronize();
                gpu.CopyFromDevice(gpuAnswer, answer);
                gpu.FreeAll();

                var bestDistance    = float.MaxValue;
                var bestPermutation = 0;
                for (var i = 0; i < _blocksPerGrid; i++)
                {
                    if (answer[i].distance < bestDistance)
                    {
                        bestDistance    = answer[i].distance;
                        bestPermutation = answer[i].pathNo;
                    }
                }

                return(new Answer {
                    Distance = bestDistance,
                    Permutation = bestPermutation,
                    msLoadTime = LoadTime,
                    msRunTime = stopWatchRun.ElapsedMilliseconds
                });
            }
        }
Пример #2
0
        public static void Execute()
        {
            CudafyModule km = CudafyTranslator.Cudafy();

            GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);

            gpu.LoadModule(km);

            int c;

            int[] dev_c = gpu.Allocate <int>(); // cudaMalloc one Int32
            gpu.Launch().add(2, 7, dev_c);      // or gpu.Launch(1, 1, "add", 2, 7, dev_c);
            gpu.CopyFromDevice(dev_c, out c);

            Console.WriteLine("2 + 7 = {0}", c);
            gpu.Launch().sub(2, 7, dev_c);
            gpu.CopyFromDevice(dev_c, out c);

            Console.WriteLine("2 - 7 = {0}", c);

            gpu.Free(dev_c);
        }
Пример #3
0
        public static bool InitGPU(PictureBox passedViewport)
        {
            viewport = passedViewport;

            CudafyModes.Target        = eGPUType.OpenCL; // To use OpenCL, change this enum
            CudafyModes.DeviceId      = 0;
            CudafyTranslator.Language = CudafyModes.Target == eGPUType.OpenCL ? eLanguage.OpenCL : eLanguage.Cuda;

            CudafyModule km = null;

            try
            {
                int deviceCount = CudafyHost.GetDeviceCount(CudafyModes.Target);
                if (deviceCount == 0)
                {
                    Console.WriteLine("No suitable {0} devices found.", CudafyModes.Target);
                    return(false);
                }

                gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);
                Console.WriteLine("Device Name: {0}", gpu.GetDeviceProperties(false).Name);

                var result = gpu.GetDeviceProperties(true); // diagnostic data

                km = CudafyTranslator.Cudafy();
                gpu.LoadModule(km);
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex);
                Console.WriteLine(km.SourceCode);
                Debugger.Break();
                return(false);
            }

            InitDevicePointers();

            return(true);
        }
Пример #4
0
        public int Init()
        {
            this.m_km = CudafyTranslator.Cudafy();

            CudafyModes.Target = eGPUType.Cuda;
            var tgCount = CudafyHost.GetDeviceCount(CudafyModes.Target);


            if (tgCount <= 0)
            {
                CudafyModes.Target = eGPUType.OpenCL;
                tgCount            = CudafyHost.GetDeviceCount(CudafyModes.Target);
            }

            if (tgCount <= 0)
            {
                CudafyModes.Target = eGPUType.Emulator;
                tgCount            = CudafyHost.GetDeviceCount(CudafyModes.Target);
            }


            if (tgCount <= 0)
            {
                throw new CtkCudafyCannotUseException("無法使用Cudafy");
            }

            for (int idx = 0; idx < tgCount; idx++)
            {
                try
                {
                    this.m_gpu = CudafyHost.GetDevice(CudafyModes.Target, idx);
                    this.m_gpu.LoadModule(Km);
                    return(0);
                }
                catch (Cudafy.CudafyCompileException) { }
            }

            throw new Exception("Cudafy buidling fail.");
        }
Пример #5
0
        public static void Execute()
        {
            CudafyModule km = CudafyTranslator.Cudafy();

            GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);

            gpu.LoadModule(km);

            int[] a = new int[N];
            int[] b = new int[N];
            int[] c = new int[N];

            // allocate the memory on the GPU
            int[] dev_c = gpu.Allocate <int>(c);

            // fill the arrays 'a' and 'b' on the CPU
            for (int i = 0; i < N; i++)
            {
                a[i] = -i;
                b[i] = i * i;
            }

            // copy the arrays 'a' and 'b' to the GPU
            int[] dev_a = gpu.CopyToDevice(a);
            int[] dev_b = gpu.CopyToDevice(b);
            gpu.Launch(N, 1).add(dev_a, dev_b, dev_c);

            // copy the array 'c' back from the GPU to the CPU
            gpu.CopyFromDevice(dev_c, c);

            // display the results
            for (int i = 0; i < N; i++)
            {
                Console.WriteLine("{0} + {1} = {2}", a[i], b[i], c[i]);
            }

            // free the memory allocated on the GPU
            gpu.FreeAll();
        }
    private unsafe static void Main(string[] args)
    {
        GPGPU        gpuCuda = CudafyHost.GetDevice(eGPUType.Cuda, 0);
        CudafyModule km      = CudafyTranslator.Cudafy();

        gpuCuda.LoadModule(km);
        TestStruct[] host_array = new TestStruct[1];
        host_array[0] = new TestStruct();
        int[]       host_intArray = new[] { 1, 8, 3 };
        int[]       dev_intArray  = gpuCuda.CopyToDevice(host_intArray);
        DevicePtrEx p             = gpuCuda.GetDeviceMemory(dev_intArray);
        IntPtr      pointer       = p.Pointer;

        host_array[0].dataPointer = pointer.ToInt64();
        TestStruct[] dev_array = gpuCuda.Allocate(host_array);
        gpuCuda.CopyToDevice(host_array, dev_array);
        gpuCuda.Launch().kernelTest(dev_array, dev_intArray);

        gpuCuda.CopyFromDevice(dev_array, host_array);
        Console.WriteLine(host_array[0].value);
        Console.ReadKey();
    }
Пример #7
0
        /// <summary>
        ///     Вызов и исполнение одной элементарной функции по имени функции
        /// </summary>
        /// <param name="function"></param>
        public static void Execute(string function)
        {
            CudafyModule km = CudafyTranslator.Cudafy();

            GPGPU gpu = CudafyHost.GetDevice();

            gpu.LoadModule(km);

            int[] devA = gpu.Allocate(_a);
            int[] devB = gpu.Allocate(_b);
            int[] devC = gpu.Allocate(_c);
            int[] devD = gpu.Allocate(D);

            gpu.CopyToDevice(_a, devA);

            gpu.Launch(_gridSize, _blockSize, function, devA, devB, devC, devD, 1);
            gpu.Launch(1, 1, function, devA, devB, devC, devD, 2);

            gpu.CopyFromDevice(devD, D);

            // free the memory allocated on the GPU
            gpu.FreeAll();
        }
Пример #8
0
        public GpuRenderer()
        {
            var availableOpenCLDevices = CudafyHost.GetDeviceProperties(eGPUType.OpenCL);

            if (availableOpenCLDevices.Any() == false)
            {
                throw new Exception("No OpenCL devices found...");
            }
            var device = availableOpenCLDevices.First();

            Module = CudafyTranslator.Cudafy(eArchitecture.OpenCL12);
            var blockSide =
                Enumerable
                .Range(1, 15)
                .Reverse()
                .First(count => count * count <= device.MaxThreadsPerBlock);

            BlockSize = new dim3(blockSide, blockSide);

            // Initialize gpu and load the module (avoids reloading every time)
            gpu = CudafyHost.GetDevice(eGPUType.OpenCL);
            gpu.LoadModule(Module);
        }
Пример #9
0
 public static bool cudaEnable()
 {
     if (!isCudaAvailable())
     {
         return(false);
     }
     try
     {
         CudafyModule km = CudafyTranslator.Cudafy(ARCH);
         Console.WriteLine("Translator OK");
         gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);
         Console.WriteLine("GPU OK");
         gpu.LoadModule(km);
         enabled = true;
         busy    = false;
         return(true);
     }
     catch (Exception ex)
     {
         errorMessage = ex.ToString();
         return(false);
     }
 }
Пример #10
0
        static void TempOpenCLVectorAddTest()
        {
            int[]  inputData1 = new int[N];
            int[]  inputData2 = new int[N];
            int[]  inputData3 = new int[N];
            int[]  outputData = new int[N];
            Random rand       = new Random();

            for (int i = 0; i < N; i++)
            {
                inputData1[i] = rand.Next(128);
                inputData2[i] = rand.Next(128);
                inputData3[i] = 2;
            }

            GPGPU gpu = CudafyHost.GetDevice(eGPUType.Cuda, 0);

            Console.WriteLine(gpu.GetDeviceProperties().Name);
            CudafyTranslator.Language = eLanguage.Cuda;
            var mod = CudafyTranslator.Cudafy(CudafyModes.Architecture, typeof(OpenCLTestClass));

            //mod.CudaSourceCode
            Console.WriteLine(mod.SourceCode);
            gpu.LoadModule(mod);
            int[] dev_data1 = gpu.CopyToDevice(inputData1);
            int[] dev_data2 = gpu.CopyToDevice(inputData2);
            gpu.CopyToConstantMemory(inputData3, OpenCLTestClass.ConstantMemory);
            int[] dev_res = gpu.Allocate <int>(N);
#warning Work group and local size mess! http://stackoverflow.com/questions/7996537/cl-invalid-work-group-size-error-should-be-solved-though
            gpu.Launch(2, 512).VectorAdd(dev_data1, dev_data2, dev_res);
            gpu.CopyFromDevice(dev_res, 0, outputData, 0, N);

            for (int i = 0; i < N; i++)
            {
                Assert.AreEqual((inputData1[i] + inputData2[i]) * inputData3[i], outputData[i], string.Format("Error at {0}", i));
            }
        }
        public void ExeTestKernel()
        {
            GPGPU         gpu  = CudafyHost.GetDevice(CudafyModes.Target, 0);
            eArchitecture arch = gpu.GetArchitecture();
            CudafyModule  km   = CudafyTranslator.Cudafy(arch);

            gpu.LoadModule(km);

            int[] host_results = new int[N];

            // Either assign a new block of memory to hold results on device
            var dev_results = gpu.Allocate <int>(N);

            // Or fill your array with values first and then
            for (int i = 0; i < N; i++)
            {
                host_results[i] = i * 3;
            }

            // Copy array with ints to device
            var dev_filled_results = gpu.CopyToDevice(host_results);

            // 64*16 = 1024 threads per block (which is max for sm_30)
            dim3 threadsPerBlock = new dim3(64, 16);

            // 8*8 = 64 blocks per grid , just for show so you get varying numbers
            // 64 blocks * 1024 threads = 65536
            // it's useful to align the number of threads with the amount of data (notice int[65536], i.e. 1 thread per int in the array)
            dim3 blocksPerGrid = new dim3(8, 8);

            //var threadsPerBlock = 1024; // this will only give you blockDim.x = 1024, .y = 0, .z = 0
            //var blocksPerGrid = 1;      // just for show

            gpu.Launch(blocksPerGrid, threadsPerBlock, "GenerateRipples", dev_results, dev_filled_results);

            gpu.CopyFromDevice(dev_results, host_results);
        }
Пример #12
0
        public static void Execute()
        {
            _gpu = CudafyHost.GetDevice(eGPUType.Cuda);

            CudafyModule km = CudafyTranslator.Cudafy(ePlatform.Auto, _gpu.GetArchitecture(), typeof(TextInsertion));

            Console.WriteLine(km.CompilerOutput);
            _gpu.LoadModule(km);

            int[] data   = new int[64];
            int[] data_d = _gpu.CopyToDevice(data);
            int[] res_d  = _gpu.Allocate(data);
            int[] res    = new int[64];
            _gpu.Launch(1, 1, "AHybridMethod", data_d, res_d);
            _gpu.CopyFromDevice(data_d, res);
            for (int i = 0; i < 64; i++)
            {
                if (data[i] != res[i])
                {
                    Console.WriteLine("Failed");
                    break;
                }
            }
        }
Пример #13
0
        public static void Execute()
        {
            var km = CudafyModule.TryDeserialize();

            if (km == null || !km.TryVerifyChecksums())
            {
                km = CudafyTranslator.Cudafy(ePlatform.Auto, eArchitecture.sm_20,
                                             typeof(ValueB),
                                             typeof(ValueA),
                                             typeof(StructTest));

                km.Serialize();
            }

            GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, 0);

            gpu.LoadModule(km);

            var value = new ValueA();

            value.valueB       = new ValueB();
            value.valueB.value = 56;

            var devOutput = gpu.Allocate <int>(1);

            gpu.Launch(1, 1, "StructTestKernel", value, devOutput);

            int output;

            // copy the array 'c' back from the GPU to the CPU
            gpu.CopyFromDevice(devOutput, out output);

            gpu.Free(devOutput);

            Console.WriteLine("Expected: {0} \t{1}", 56, 56 == output ? "PASSED" : "FAILED");
        }
        public static int Execute()
        {
            CudafyModule km = CudafyTranslator.Cudafy();

            GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);

            if (gpu is CudaGPU && gpu.GetDeviceProperties().Capability < new Version(1, 2))
            {
                Console.WriteLine("Compute capability 1.2 or higher required for atomics.");
                return(-1);
            }
            gpu.LoadModule(km);

            byte[] buffer = big_random_block(SIZE);

            // cudart.dll must be accessible!
            GPGPUProperties prop = null;

            try
            {
                prop = gpu.GetDeviceProperties(true);
            }
            catch (DllNotFoundException)
            {
                prop = gpu.GetDeviceProperties(false);
            }

            // capture the start time
            // starting the timer here so that we include the cost of
            // all of the operations on the GPU.  if the data were
            // already on the GPU and we just timed the kernel
            // the timing would drop from 74 ms to 15 ms.  Very fast.
            gpu.StartTimer();

            // allocate memory on the GPU for the file's data
            byte[] dev_buffer = gpu.CopyToDevice(buffer);
            uint[] dev_histo  = gpu.Allocate <uint>(256);
            gpu.Set(dev_histo);

            // kernel launch - 2x the number of mps gave best timing
            int blocks = prop.MultiProcessorCount;

            if (blocks == 0)
            {
                blocks = 16;
            }
            Console.WriteLine("Processors: {0}", blocks);
            gpu.Launch(blocks * 2, 256).histo_kernel(dev_buffer, SIZE, dev_histo);

            uint[] histo = new uint[256];
            gpu.CopyFromDevice(dev_histo, histo);

            // get stop time, and display the timing results
            float elapsedTime = gpu.StopTimer();

            Console.WriteLine("Time to generate: {0} ms", elapsedTime);

            long histoCount = 0;

            for (int i = 0; i < 256; i++)
            {
                histoCount += histo[i];
            }
            Console.WriteLine("Histogram Sum:  {0}", histoCount);

            // verify that we have the same counts via CPU
            for (int i = 0; i < SIZE; i++)
            {
                histo[buffer[i]]--;
            }
            for (int i = 0; i < 256; i++)
            {
                if (histo[i] != 0)
                {
                    Console.WriteLine("Failure at {0}!", i);
                }
            }

            gpu.FreeAll();

            return(0);
        }
Пример #15
0
 public static void prepareGPU()
 {
     km  = CudafyTranslator.Cudafy();
     gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);
     gpu.LoadModule(km);
 }
Пример #16
0
        public static IEnumerable <string> TestOpenCL()
        {
            yield return("Attempting to cudafy a kernel function.");

            //CudafyTranslator.Language = eLanguage.OpenCL;
            var mod = CudafyTranslator.Cudafy(ePlatform.x64, eArchitecture.OpenCL, null, false, typeof(CUDACheck));

            yield return("Successfully translated to OpenCL C.");

            for (int id = 0; id < CudafyHost.GetDeviceCount(eGPUType.OpenCL); id++)
            {
                yield return("Attempting to instantiate OpenCL device object (GPGPU).");

                var gpu = CudafyHost.GetDevice(eGPUType.OpenCL, id);
                yield return(string.Format("Successfully got OpenCL device {0}.", id));

                yield return("Name: " + gpu.GetDeviceProperties(false).Name);

                yield return("Attempting to load module.");

                gpu.LoadModule(mod);
                yield return("Successfully loaded module.");

                yield return("Attempting to transfer data to device.");

                int[]  a    = new int[1024];
                int[]  b    = new int[1024];
                int[]  c    = new int[1024];
                Random rand = new Random();
                for (int i = 0; i < 1024; i++)
                {
                    a[i] = rand.Next(16384);
                    b[i] = rand.Next(16384);
                }
                int[] dev_a = gpu.CopyToDevice(a);
                int[] dev_b = gpu.CopyToDevice(b);
                int[] dev_c = gpu.Allocate(c);
                yield return("Successfully transferred data to device.");

                yield return("Attempting to launch function on device.");

                gpu.Launch(4, 256).TestKernelFunction(dev_a, dev_b, dev_c);
                yield return("Successfully launched function on device.");

                yield return("Attempting to transfer results back from device.");

                gpu.CopyFromDevice(dev_c, c);
                yield return("Successfully transferred results from device.");

                yield return("Testing results.");

                int errors = 0;
                for (int i = 0; i < 1024; i++)
                {
                    if (a[i] + b[i] != c[i])
                    {
                        errors++;
                    }
                }
                if (errors == 0)
                {
                    yield return("Successfully tested results.\r\n\r\n");
                }
                else
                {
                    yield return("Test failed - results not as expected.\r\n\r\n");
                }
            }
        }
Пример #17
0
        public static char[] Execute(String[] keys, string I, int n)
        {
            GPGPU         gpu  = CudafyHost.GetDevice(CudafyModes.Target, 0);
            eArchitecture arch = gpu.GetArchitecture();
            CudafyModule  km   = CudafyTranslator.Cudafy(arch);

            gpu.LoadModule(km);

            Stopwatch xxxx = new Stopwatch();

            xxxx.Start();
            StringSearch abb      = new StringSearch(keys);
            string       alphabet = "ABCDEFGHI*KLMN*PQRST*VWXYZ";
            int          alpha    = alphabet.Length;

            int[,] table1 = new int[StringSearch.nodeCount, alpha];
            for (int i = 0; i < StringSearch.nodeCount; i++)
            {
                for (int j = 0; j < alpha; j++)
                {
                    table1[i, j] = -1;
                }
            }

            abb.build_table1(table1, abb._root);
            char[] input  = I.ToCharArray();
            int    length = I.Length;

            I = "";
            int[] output_table = new int[StringSearch.nodeCount];
            abb.build_tableO(output_table, abb._root);
            abb = new StringSearch();
            char[] matched_result = new char[length];
            xxxx.Stop();

            //CudafyModule km = CudafyModule.TryDeserialize();
            //if (km == null || !km.TryVerifyChecksums())
            //{
            //    km = CudafyTranslator.Cudafy();
            //    km.Serialize();
            //    gpu.LoadModule(km);
            //}

            gpu.SetCurrentContext();
            int[] tempas = new int[StringSearch.nodeCount];
            int[,] tempbab  = new int[StringSearch.nodeCount, alpha];
            int[,] table1_d = gpu.Allocate <int>(tempbab);
            int[]  output_table_d   = gpu.Allocate <int>(tempas);
            char[] matched_result_d = gpu.Allocate <char>(length);
            char[] input_d          = gpu.Allocate <char>(length);
            int[]  input_length_d   = gpu.Allocate <int>(1);
            int[]  input_length     = { length };
            gpu.CopyToDevice(table1, table1_d);
            gpu.CopyToDevice(output_table, output_table_d);
            gpu.CopyToDevice(matched_result, matched_result_d);
            gpu.CopyToDevice(input, input_d);
            gpu.CopyToDevice(input_length, input_length_d);
            int block = (int)Math.Ceiling((double)length / N);

            gpu.Launch(block, N).Dot(table1_d, output_table_d, matched_result_d, input_d, input_length_d);
            gpu.CopyFromDevice(matched_result_d, matched_result);
            gpu.FreeAll();
            return(matched_result);
        }
Пример #18
0
        public static void Execute()
        {
            int n = 2000000;

            Random r = new Random();

            int[] dx = new int[n];
            int[] dy = new int[n];
            int[] e = new int[n]; int[] eh = new int[n];

            // fills massives by random
            for (int i = 0; i < n; i++)
            {
                dx[i] = r.Next();
                dy[i] = r.Next();
            }

            double t2 = MeasureTime(() =>
            {
                for (int i = 0; i < n; i++)
                {
                    eh[i] = 2 * dy[i] - dx[i];
                }
            });

            CudafyModule km = CudafyTranslator.Cudafy(Program.testArchitecture);

            GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, 0);

            gpu.LoadModule(km);

            int[]  dev_dx = gpu.Allocate <int>(dx);
            int[]  dev_dy = gpu.Allocate <int>(dy);
            int[]  dev_e  = gpu.Allocate <int>(e);
            double t3     = 0;

            gpu.CopyToDevice(dx, dev_dx);
            gpu.CopyToDevice(dy, dev_dy);
            for (int x = 0; x < 2; x++)
            {
                t3 = MeasureTime(() =>
                {
                    //gpu.Launch(1, 1, "calc_e", n, dev_dx, dev_dy, dev_e);
                    //gpu.CopyToDevice(dx, dev_dx);
                    //gpu.CopyToDevice(dy, dev_dy);
                    gpu.Launch(n / 512, 512, "calc_e_v2", n, dev_dx, dev_dy, dev_e);
                    gpu.Synchronize();
                    //gpu.CopyFromDevice(dev_e, e);
                });
            }

            double t4 = MeasureTime(() =>
            {
                gpu.CopyFromDevice(dev_e, e);
            });

            for (int i = 0; i < n; i++)
            {
                Debug.Assert(e[i] == eh[i]);
            }
            Console.WriteLine(string.Format("n = {0}", n));
            Console.WriteLine(string.Format("CPU ::: e = 2 * dy - dx ::: Excecution time: {0} ms", t2 * 1000));
            Console.WriteLine(string.Format("CUDA ::: e = 2 * dy - dx ::: Excecution time: {0} ms", t3 * 1000));
            //Console.WriteLine(string.Format("CUDA copy to host {0} ms", t4 * 1000));
            //Console.ReadKey();
        }
Пример #19
0
        /// <summary>
        ///     Применение алгоритма медианного фильтра
        ///     Пример использования
        ///     lock (CudafyFilter.Semaphore)
        ///     {
        ///     CudafyFilter.SetBitmap( bitmap, 3, 1<<12);
        ///                                              CudafyFilter.MedianFilter();
        ///                                              bitmap= CudafyFilter.GetBitmap();
        ///     }
        /// </summary>
        public static void MedianFilter(int gridSize = 0, int blockSize = 0)
        {
            CudafyModule km = CudafyTranslator.Cudafy();

            GPGPU gpu = CudafyHost.GetDevice();

            gpu.LoadModule(km);

            byte[] devbytesA = gpu.Allocate <byte>(_videoMemorySize);
            byte[] devbytesB = gpu.Allocate <byte>(_videoMemorySize);

            byte[] devColor = gpu.Allocate(_color);

            int gridSize1 = (gridSize > 0)
                ? gridSize
                : Math.Min(15, (int)Math.Pow(_frameItemsCount, 0.333333333333));
            int blockSize1 = (blockSize > 0)
                ? blockSize
                : Math.Min(15, (int)Math.Pow(_frameItemsCount, 0.333333333333));

            int gridSize2 = (gridSize > 0)
                ? gridSize
                : Math.Min(15, (int)Math.Pow(_frameItemsCount * _itemSize, 0.333333333333));
            int blockSize2 = (blockSize > 0)
                ? blockSize
                : Math.Min(15, (int)Math.Pow(_frameItemsCount * _itemSize, 0.333333333333));

            int gridSize4 = (gridSize > 0)
                ? gridSize
                : Math.Min(15,
                           (int)
                           Math.Pow(
                               (_frameItemsCount * ((1 << (_ceilingItemSize - _ceilingMiddleSize)) + _ceilingMiddleSize)),
                               0.333333333333));
            int blockSize4 = (blockSize > 0)
                ? blockSize
                : Math.Min(15,
                           (int)
                           Math.Pow(
                               (_frameItemsCount * ((1 << (_ceilingItemSize - _ceilingMiddleSize)) + _ceilingMiddleSize)),
                               0.333333333333));

            // Цикл по цветам RGB - байтам
            // В видео памяти создаётся фрагмент изображения - фрейм с полями,
            // который мы в цикле перемещаем по всему изображению
            // фрейм с полями копируем в видео память
            // (два соседний фрейма без полей примыкают друг к другу и пересекаются полями)

            foreach (var pair in new Dictionary <byte[], byte[]> {
                { _r0, _r1 }, { _g0, _g1 }, { _b0, _b1 }
            })
            {
                for (int left = 0; left < (_width - _nh); left += _frameWidth - 2 * _nh)
                {
                    for (int top = 0; top < (_height - _nh); top += _frameHeight - 2 * _nh)
                    {
                        int width  = Math.Min(_frameWidth, _width - left);
                        int height = Math.Min(_frameHeight, _height - top);

                        int count = (width - 2 * _nh) * (height - 2 * _nh);

                        Debug.WriteLine("left:" + left + ",top:" + top + ",width:" + width + ",height:" + height +
                                        ",count:" + count);

                        // Копирование блока(фрейма) цветового слоя в видео память

                        for (int i = 0; i < width; i++)
                        {
                            for (int j = 0; j < height; j++)
                            {
                                _color[j * width + i] = pair.Key[(top + j) * _width + (left + i)];
                            }
                        }

                        gpu.CopyToDevice(_color, devColor);

                        // Формирование для каждой внутренней точки фрейма одномерного массива из _n*_n соседних точек
                        gpu.Launch(gridSize2, blockSize2).SelectColorBytes(devbytesA, devColor,
                                                                           _itemSize, count,
                                                                           width, height, _n, _nh);

                        // Выполнение чётно-нечётной сортировки параллельно для всех ранее созданных одномерных массивов

                        // Шаг 1 чётно-нечётной сортировки
                        // Выполнение сортировки слияниями
                        // На выходе отсортированные массивы размера до 1<<(_ceilingItemSize - _ceilingMiddleSize)
                        for (int i = 0; i < _ceilingItemSize - _ceilingMiddleSize; i++)
                        {
                            gpu.Launch(gridSize4, blockSize4)
                            .Merge(
                                ((i & 1) == 0) ? devbytesA : devbytesB,
                                ((i & 1) == 0) ? devbytesB : devbytesA,
                                i, 0, _ceilingItemSize, _itemSize, count);
                        }

                        // Шаг 2 чётно-нечётной сортировки
                        // запускаем задачи сортировки данных в двух соседних блоках
                        // чередуя соседние блоки
                        for (int i = 0; i < (1 << _ceilingMiddleSize); i++)
                        {
                            gpu.Launch(gridSize4, blockSize4)
                            .Merge(
                                ((i & 1) == ((_ceilingItemSize - _ceilingMiddleSize) & 1)) ? devbytesA : devbytesB,
                                ((i & 1) == ((_ceilingItemSize - _ceilingMiddleSize) & 1)) ? devbytesB : devbytesA,
                                _ceilingItemSize - _ceilingMiddleSize, i & 1,
                                _ceilingItemSize, _itemSize, count);
                        }

                        // Выделение средних элементов в массивах и копирование их выходное изображение
                        gpu.Launch(gridSize1, blockSize1).SelectNhBytes(devColor,
                                                                        (((1 << _ceilingMiddleSize) & 1) == ((_ceilingItemSize - _ceilingMiddleSize) & 1))
                                ? devbytesA
                                : devbytesB,
                                                                        _nhIndex,
                                                                        _itemSize, count,
                                                                        width, height, _n, _nh);

                        gpu.CopyFromDevice(devColor, _color);

                        for (int i = _nh; i < (width - _nh); i++)
                        {
                            for (int j = _nh; j < (height - _nh); j++)
                            {
                                pair.Value[(top + j) * _width + (left + i)] = _color[j * width + i];
                            }
                        }
                    }
                }
            }

            // free the memory allocated on the GPU
            gpu.FreeAll();
        }
Пример #20
0
        public List <short> GetDelta(Bitmap first, Bitmap second) //getting the 2 images
        {
            List <short> deltaList1 = new List <short>();
            List <short> deltaList2 = new List <short>();
            List <short> deltaList3 = new List <short>();
            List <short> deltaList4 = new List <short>();
            List <short> deltaList  = new List <short>();//creating the list to insert image data

            Stopwatch    stopWatch = Stopwatch.StartNew();
            CudafyModule km        = CudafyTranslator.Cudafy();
            GPGPU        gpu       = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);

            gpu.LoadModule(km);

            Console.WriteLine("load Module : " + stopWatch.ElapsedMilliseconds);

            Bitmap bmp1 = first;
            Bitmap bmp2 = second;

            stopWatch.Restart();
            Rectangle  area1       = new Rectangle(0, 0, bmp1.Width, bmp1.Height);
            BitmapData bitmapData1 = bmp1.LockBits(area1, ImageLockMode.ReadWrite, PixelFormat.Format32bppRgb);
            int        stride      = bitmapData1.Stride;
            IntPtr     ptr1        = bitmapData1.Scan0;
            int        numBytes    = Math.Abs(bitmapData1.Stride) * bmp1.Height;

            byte[] rgbValues1 = new byte[numBytes];
            Marshal.Copy(ptr1, rgbValues1, 0, numBytes);

            Rectangle  area2       = new Rectangle(0, 0, bmp2.Width, bmp2.Height);
            BitmapData bitmapData2 = bmp2.LockBits(area2, ImageLockMode.ReadWrite, PixelFormat.Format32bppRgb);
            int        stride2     = bitmapData2.Stride;
            IntPtr     ptr2        = bitmapData2.Scan0;
            int        numBytes2   = bitmapData2.Stride * bmp2.Height;

            byte[] rgbValues2 = new byte[numBytes2];
            Marshal.Copy(ptr2, rgbValues2, 0, numBytes2);

            stopWatch.Stop();

            Console.WriteLine("copy images to byte array : " + stopWatch.ElapsedMilliseconds);

            int[] count = new int[2];
            count[0] = 0;
            count[1] = 0;

            int[]  possition = new int[bmp1.Width * bmp1.Height * 2];
            byte[] results   = new byte[bmp1.Width * bmp1.Height * 4];

            int[] width = new int[2];
            width[0] = bmp1.Width;
            width[1] = bmp1.Height;

            dim3 dimBlock = new dim3(16, 16);
            int  yBlocks  = width[0] * 3 / dimBlock.y + ((width[0] * 3 % dimBlock.y) == 0 ? 0 : 1);
            int  xBlocks  = width[1] / dimBlock.x + ((width[1] % dimBlock.x) == 0 ? 0 : 1);
            dim3 dimGrid  = new dim3(xBlocks, yBlocks);

            stopWatch.Restart();

            int[]  imageWidth  = gpu.CopyToDevice <int>(width);
            int[]  dev_count   = gpu.CopyToDevice <int>(count);
            byte[] dev_bitmap1 = gpu.CopyToDevice <byte>(rgbValues1);
            byte[] dev_bitmap2 = gpu.CopyToDevice <byte>(rgbValues2);

            byte[] dev_result    = gpu.Allocate <byte>(results);
            int[]  dev_possition = gpu.CopyToDevice <int>(possition);

            stopWatch.Stop();

            Console.WriteLine("Copy to GPU : " + stopWatch.ElapsedMilliseconds);

            stopWatch.Restart();

            bmp1.UnlockBits(bitmapData1);
            bmp2.UnlockBits(bitmapData2);

            gpu.Launch(128, 1).calGPU(dev_bitmap1, dev_bitmap2, dev_result, imageWidth, dev_count, dev_possition);

            stopWatch.Stop();

            Console.WriteLine("func : " + stopWatch.ElapsedMilliseconds);
            stopWatch.Restart();
            Task.Factory.StartNew(() =>
            {
                for (int cnt = 0; cnt < possition.Length / 4; cnt++)
                {
                    deltaList1[6 * cnt + 0] = (short)possition[2 * cnt + 0];
                    deltaList1[6 * cnt + 1] = (short)possition[2 * cnt + 1];
                    deltaList1[6 * cnt + 2] = (short)results[4 * cnt + 2];
                    deltaList1[6 * cnt + 3] = (short)results[4 * cnt + 1];
                    deltaList1[6 * cnt + 4] = (short)results[4 * cnt + 0];
                    deltaList1[6 * cnt + 5] = (short)results[4 * cnt + 3];
                }
            }).Wait();
            Task.Factory.StartNew(() =>
            {
                for (int cnt = possition.Length / 4; cnt < possition.Length / 2; cnt++)
                {
                    deltaList2[6 * cnt + 0] = (short)possition[2 * cnt + 0];
                    deltaList2[6 * cnt + 1] = (short)possition[2 * cnt + 1];
                    deltaList2[6 * cnt + 2] = (short)results[4 * cnt + 2];
                    deltaList2[6 * cnt + 3] = (short)results[4 * cnt + 1];
                    deltaList2[6 * cnt + 4] = (short)results[4 * cnt + 0];
                    deltaList2[6 * cnt + 5] = (short)results[4 * cnt + 3];
                }
            }).Wait();
            Task.Factory.StartNew(() =>
            {
                for (int cnt = possition.Length / 2; cnt < 3 * possition.Length / 4; cnt++)
                {
                    deltaList3[6 * cnt + 0] = (short)possition[2 * cnt + 0];
                    deltaList3[6 * cnt + 1] = (short)possition[2 * cnt + 1];
                    deltaList3[6 * cnt + 2] = (short)results[4 * cnt + 2];
                    deltaList3[6 * cnt + 3] = (short)results[4 * cnt + 1];
                    deltaList3[6 * cnt + 4] = (short)results[4 * cnt + 0];
                    deltaList3[6 * cnt + 5] = (short)results[4 * cnt + 3];
                }
            }).Wait();
            Task.Factory.StartNew(() =>
            {
                for (int cnt = 3 * possition.Length / 4; cnt < possition.Length; cnt++)
                {
                    deltaList4[6 * cnt + 0] = (short)possition[2 * cnt + 0];
                    deltaList4[6 * cnt + 1] = (short)possition[2 * cnt + 1];
                    deltaList4[6 * cnt + 2] = (short)results[4 * cnt + 2];
                    deltaList4[6 * cnt + 3] = (short)results[4 * cnt + 1];
                    deltaList4[6 * cnt + 4] = (short)results[4 * cnt + 0];
                    deltaList4[6 * cnt + 5] = (short)results[4 * cnt + 3];
                }
            }).Wait();
            stopWatch.Stop();
            Console.WriteLine("Copy to the transmotting array: " + stopWatch.ElapsedMilliseconds);
            gpu.FreeAll();

            deltaList = deltaList1.Concat(deltaList2).Concat(deltaList3).Concat(deltaList4).ToList();
            return(deltaList);
        }
Пример #21
0
        public static void BlindPTM(List <double> experimentalSpectrum, double molW, List <ProteinDto> candidateProteinsList,
                                    double pepTol, double userHopThreshold, string pepUnit)
        {
            var stopwatch = new Stopwatch();

            // Data Preperation and Loading GPU Module
            stopwatch.Start();
            var peaks            = new List <double>();
            var aminoAcidList    = new List <string>();
            var modificationList = new List <string>();
            var startList        = new List <double>();
            var endList          = new List <double>();

            foreach (var peak in experimentalSpectrum)
            {
                peaks.Add(peak + 1.00727647);
                peaks.Add(molW - (peak + 1.00727647));
                //peaks.Add(peak);
                //peaks.Add(molW - (peak));
            }
            peaks.Sort();
            GPGPU        gpu = CudafyHost.GetDevice(CudafyModes.Target);
            CudafyModule km  = CudafyModule.TryDeserialize();

            if (km == null || !km.TryVerifyChecksums())
            {
                km = CudafyTranslator.Cudafy();
                km.Serialize();
            }
            gpu.LoadModule(km);
            stopwatch.Stop();
            Console.WriteLine("Data Preperation: " + stopwatch.Elapsed);

            // GPU Module
            stopwatch.Restart();
            var lengthSquared    = peaks.Count * peaks.Count;
            var peaksArray       = peaks.ToArray();
            var lengthOfPeakList = new int[1];

            lengthOfPeakList[0] = peaks.Count;
            var outputArray = new char[peaks.Count, peaks.Count, 37];
            var errorArray  = new double[peaks.Count, peaks.Count, 37];
            var modMassList = ModificationMass;

            char[,,] outputArrayDevice  = gpu.Allocate(outputArray);
            double[,,] errorArrayDevice = gpu.Allocate(errorArray);
            double[] peaksDevice            = gpu.Allocate <double>(peaksArray.Length);
            int[]    lengthOfPeakListDevice = gpu.Allocate <int>(lengthOfPeakList.Length);
            double[] ptmMassListDevice      = gpu.Allocate <double>(modMassList.Length);
            gpu.CopyToDevice(peaksArray, peaksDevice);
            gpu.CopyToDevice(lengthOfPeakList, lengthOfPeakListDevice);
            gpu.CopyToDevice(ModificationMass, ptmMassListDevice);
            int block = (int)Math.Ceiling((double)lengthSquared * 37 / N);

            gpu.Launch(block, N).PtmExtractor(peaksDevice, lengthOfPeakListDevice, ptmMassListDevice, outputArrayDevice,
                                              errorArrayDevice);
            gpu.CopyFromDevice(outputArrayDevice, outputArray);
            gpu.CopyFromDevice(errorArrayDevice, errorArray);
            gpu.FreeAll();

            for (var i = 0; i < peaks.Count; i++)
            {
                for (var j = 0; j < peaks.Count; j++)
                {
                    for (var k = 0; k < 37; k++)
                    {
                        if (outputArray[i, j, k] == '\0')
                        {
                            continue;
                        }
                        aminoAcidList.Add(ModificationAminoAcids[outputArray[i, j, k]].ToString());
                        modificationList.Add(ModificationName[outputArray[i, j, k]]);
                        startList.Add(peaks[i]);
                        endList.Add(peaks[j]);
                    }
                }
            }
            stopwatch.Stop();
            Console.WriteLine("GPU Generation: " + stopwatch.Elapsed);

            // PTM Shortlisting
            stopwatch.Restart();
            foreach (var protein in candidateProteinsList)
            {
                var sequence                = protein.Sequence.ToCharArray();
                var hopI                    = 0;
                var thrI                    = 0;
                var shortlistedAminoAcid    = new List <string>();
                var shortlistedModification = new List <string>();
                var shortlistedEnd          = new List <double>();
                var shortlistedStart        = new List <double>();
                var shortlistedIndex        = new List <int>();
                while (true)
                {
                    try
                    {
                        if (startList.Count > 0)
                        {
                            if (shortlistedStart.Count > 0)
                            {
                                if (shortlistedEnd[shortlistedEnd.Count - 1] > startList[hopI])
                                {
                                    hopI = hopI + 1;
                                    if (hopI == startList.Count)
                                    {
                                        break;
                                    }
                                    continue;
                                }
                            }
                            var diff = startList[hopI] - protein.InsilicoDetails.InsilicoMassLeft[thrI];
                            if (diff <= userHopThreshold && diff >= -userHopThreshold)
                            {
                                if (aminoAcidList[hopI] == sequence[thrI + 2].ToString())
                                {
                                    var temproray = modificationList[hopI].Split('_');
                                    var modMass   = AminoAcids.ModificationTable(temproray[0]);
                                    //var modMass = AminoAcids.ModTable(modificationList[hopI]);
                                    diff =
                                        Math.Abs(endList[hopI] -
                                                 (protein.InsilicoDetails.InsilicoMassLeft[thrI + 1
                                                  ] +
                                                  modMass));
                                    if (string.Compare(pepUnit, "ppm", StringComparison.Ordinal) == 0)
                                    {
                                        diff = (diff / molW) * 1000000;
                                    }
                                    else if (string.Compare(pepUnit, "%", StringComparison.Ordinal) == 0)
                                    {
                                        diff = (diff / molW) * 100;
                                    }
                                    if (diff < pepTol)
                                    {
                                        for (var i = thrI + 1;
                                             i < protein.InsilicoDetails.InsilicoMassLeft.Count;
                                             i++)
                                        {
                                            protein.InsilicoDetails.InsilicoMassLeft[i] =
                                                protein.InsilicoDetails.InsilicoMassLeft[i] +
                                                modMass;
                                        }
                                        protein.Mw = protein.Mw + modMass;
                                        shortlistedAminoAcid.Add(aminoAcidList[hopI]);
                                        shortlistedModification.Add(modificationList[hopI]);
                                        shortlistedEnd.Add(endList[hopI]);
                                        shortlistedStart.Add(startList[hopI]);
                                        shortlistedIndex.Add(thrI);
                                    }
                                }
                            }
                            else if (diff > userHopThreshold)
                            {
                                thrI = thrI + 1;
                                if (thrI == protein.InsilicoDetails.InsilicoMassLeft.Count - 1)
                                {
                                    break;
                                }
                                continue;
                            }
                            else if (diff < -userHopThreshold)
                            {
                                hopI = hopI + 1;
                                if (hopI == startList.Count)
                                {
                                    break;
                                }
                                continue;
                            }
                            hopI = hopI + 1;
                            if (hopI == startList.Count)
                            {
                                break;
                            }
                        }
                    }
                    catch (Exception exception)
                    {
                        Debug.WriteLine(exception.Message);
                    }
                }
                for (var hopIndex = 0; hopIndex < shortlistedStart.Count; hopIndex++)
                {
                    var site = new PostTranslationModificationsSiteDto
                    {
                        Index     = shortlistedIndex[hopIndex],
                        ModName   = shortlistedModification[hopIndex],
                        ModWeight = AminoAcids.ModificationTable(shortlistedModification[hopIndex]),
                        Site      = Convert.ToChar(shortlistedAminoAcid[hopIndex])
                    };
                    protein.PtmParticulars.Add(site);
                }
                var massError = Math.Abs(molW - protein.Mw);
                protein.MwScore = Math.Abs(massError) < 0 ? 1 : Math.Pow(massError, 0.5);
            }
            stopwatch.Stop();
            Console.WriteLine("Shortlisting :" + stopwatch.Elapsed);
        }
Пример #22
0
        public static IEnumerable <string> TestCUDASDK()
        {
            StringBuilder sb = new StringBuilder();

            NvccCompilerOptions nvcc = null;

            if (IntPtr.Size == 8)
            {
                nvcc = NvccCompilerOptions.Createx64();
            }
            else
            {
                nvcc = NvccCompilerOptions.Createx86();
            }
            yield return(string.Format("Platform={0}", nvcc.Platform));

            yield return("Checking for CUDA SDK at " + nvcc.CompilerPath);

            if (!nvcc.TryTest())
            {
                yield return("Could not locate CUDA Include directory.");
            }
            else
            {
                yield return(string.Format("CUDA SDK Version={0}", nvcc.Version));

                yield return("Attempting to cudafy a kernel function.");

                var mod = CudafyTranslator.Cudafy(nvcc.Platform, eArchitecture.sm_11, nvcc.Version, false, typeof(CUDACheck));
                yield return("Successfully translated to CUDA C.");

                yield return("Attempting to compile CUDA C code.");

                string s = mod.Compile(eGPUCompiler.CudaNvcc, true);
                yield return("Successfully compiled CUDA C into a module.");

                if (CudafyHost.GetDeviceCount(eGPUType.Cuda) > 0)
                {
                    yield return("Attempting to instantiate CUDA device object (GPGPU).");

                    var gpu = CudafyHost.GetDevice(eGPUType.Cuda, 0);
                    yield return("Successfully got CUDA device 0.");

                    yield return("Attempting to load module.");

                    gpu.LoadModule(mod);
                    yield return("Successfully loaded module.");

                    yield return("Attempting to transfer data to GPU.");

                    int[]  a    = new int[1024];
                    int[]  b    = new int[1024];
                    int[]  c    = new int[1024];
                    Random rand = new Random();
                    for (int i = 0; i < 1024; i++)
                    {
                        a[i] = rand.Next(16384);
                        b[i] = rand.Next(16384);
                    }
                    int[] dev_a = gpu.CopyToDevice(a);
                    int[] dev_b = gpu.CopyToDevice(b);
                    int[] dev_c = gpu.Allocate(c);
                    yield return("Successfully transferred data to GPU.");

                    yield return("Attempting to launch function on GPU.");

                    gpu.Launch(1, 1024).TestKernelFunction(dev_a, dev_b, dev_c);
                    yield return("Successfully launched function on GPU.");

                    yield return("Attempting to transfer results back from GPU.");

                    gpu.CopyFromDevice(dev_c, c);
                    yield return("Successfully transferred results from GPU.");

                    yield return("Testing results.");

                    int errors = 0;
                    for (int i = 0; i < 1024; i++)
                    {
                        if (a[i] + b[i] != c[i])
                        {
                            errors++;
                        }
                    }
                    if (errors == 0)
                    {
                        yield return("Successfully tested results.");
                    }
                    else
                    {
                        yield return("Test failed - results not as expected.");
                    }

                    yield return("Checking for math libraries (FFT, BLAS, SPARSE, RAND).");

                    var fft     = GPGPUFFT.Create(gpu);
                    int version = fft.GetVersion();
                    if (version > 0)
                    {
                        yield return("Successfully detected.");
                    }
                }
            }
        }
Пример #23
0
        public static void Execute()
        {
            GPGPU         gpu  = CudafyHost.GetDevice(CudafyModes.Target, 0);
            eArchitecture arch = Program.testArchitecture;
            CudafyModule  km   = CudafyTranslator.Cudafy(arch);

            gpu.LoadModule(km);

            int[] a = new int[N];
            int[] b = new int[N];
            int[] c = new int[N];

            // allocate the memory on the GPU
            int[] dev_a = gpu.Allocate <int>(a);
            int[] dev_b = gpu.Allocate <int>(b);
            int[] dev_c = gpu.Allocate <int>(c);

            // fill the arrays 'a' and 'b' on the CPU
            for (int i = 0; i < N; i++)
            {
                a[i] = i;
                b[i] = 2 * i;
            }

            for (int l = 0; l < km.Functions.Count; l++)
            {
                string function = "add_" + l.ToString();
                Console.WriteLine(function);

                // copy the arrays 'a' and 'b' to the GPU
                gpu.CopyToDevice(a, dev_a);
                gpu.CopyToDevice(b, dev_b);

                gpu.Launch(128, 1, function, dev_a, dev_b, dev_c);

                // copy the array 'c' back from the GPU to the CPU
                gpu.CopyFromDevice(dev_c, c);

                // verify that the GPU did the work we requested
                bool success = true;
                for (int i = 0; i < N; i++)
                {
                    if ((a[i] + b[i]) != c[i])
                    {
                        Console.WriteLine("{0} + {1} != {2}", a[i], b[i], c[i]);
                        success = false;
                        break;
                    }
                }
                if (success)
                {
                    Console.WriteLine("We did it!");
                }
            }

            // free the memory allocated on the GPU
            gpu.Free(dev_a);
            gpu.Free(dev_b);
            gpu.Free(dev_c);

            // free the memory we allocated on the CPU
            // Not necessary, this is .NET
        }
Пример #24
0
        public static TestOutput CorrectColour(ForeGroundStrucuture[] foregorungRGB_CPU, BackGroundStrucuture[] BackgroundXYZ_CPU)
        {
            //rgb = System.Drawing.Color.FromArgb(69, 77, 217);
            //X = 0.0630982813175294;
            //Y = 0.616476271122916;
            //Z = 0.667048468232457;

            const int image_size = 1024 * 768;

            //cuda intializer
            CudafyModule km = CudafyModule.TryDeserialize();

            if (km == null || !km.TryVerifyChecksums())
            {
                // km = CudafyTranslator.Cudafy((typeof(ForeGroundStrucuture)), (typeof(BackGroundStrucuture)), typeof(Color));
                km = CudafyTranslator.Cudafy((typeof(ProfileStrucuture)), (typeof(ForeGroundStrucuture)), (typeof(BackGroundStrucuture)), (typeof(SampleStructure)), typeof(quick_corr));
                km.TrySerialize();
            }

            CudafyTranslator.GenerateDebug = true;
            // cuda or emulator
            GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);

            //GPGPU gpu = CudafyHost.GetDevice(eGPUType.Emulator);
            Console.WriteLine("Running quick correction using {0}", gpu.GetDeviceProperties(false).Name);
            gpu.LoadModule(km);

            ForeGroundStrucuture[] distance_CPU = new ForeGroundStrucuture[image_size];

            // allocate memory on the GPU for the bitmap (same size as ptr)
            #region
            DataTable profile = new DataTable();
            try
            {
                // add the csv bin file
                using (GenericParserAdapter parser = new GenericParserAdapter(@"C:\lev\STColorCorrection\Data\PROFILE\p3700.csv"))
                {
                    System.Data.DataSet dsResult = parser.GetDataSet();
                    profile = dsResult.Tables[0];
                }
            }
            catch (Exception ex)
            { Console.WriteLine(ex); }
            #endregion

            // allocate temp memory, initialize it, copy to constant memory on the GPU
            // L 0-21 A 0-41 B 0-45

            ProfileStrucuture[, ,] profiles_CPU  = new ProfileStrucuture[21, 41, 45];
            SampleStructure[,] samples_CPU       = new SampleStructure[image_size, 6];
            FGLookupStructure[, ,] fg_loopup_CPU = new FGLookupStructure[256, 256, 256];

            //profile inicialization
            #region
            for (int indexL = 0; indexL < 21; indexL++)
            {
                for (int indexA = 0; indexA < 41; indexA++)
                {
                    for (int indexB = 0; indexB < 45; indexB++)
                    {
                        profiles_CPU[indexL, indexA, indexB].L = indexL;
                        profiles_CPU[indexL, indexA, indexB].A = indexA;
                        profiles_CPU[indexL, indexA, indexB].B = indexB;
                        //profiles_CPU[indexL, indexA, indexB].Given_R = 0;
                        //profiles_CPU[indexL, indexA, indexB].Given_G = 0;
                        //profiles_CPU[indexL, indexA, indexB].Given_B = 0;
                        profiles_CPU[indexL, indexA, indexB].ML       = 0;
                        profiles_CPU[indexL, indexA, indexB].MA       = 0;
                        profiles_CPU[indexL, indexA, indexB].MB       = 0;
                        profiles_CPU[indexL, indexA, indexB].MX       = 0;
                        profiles_CPU[indexL, indexA, indexB].MY       = 0;
                        profiles_CPU[indexL, indexA, indexB].MZ       = 0;
                        profiles_CPU[indexL, indexA, indexB].distance = -1.0;
                        profiles_CPU[indexL, indexA, indexB].weight   = -1.0;

                        profiles_CPU[indexL, indexA, indexB].isempty = TRUE;
                        profiles_CPU[indexL, indexA, indexB].isMoreAccurateThanOrigin = FALSE;
                    }
                }
            }



            int lvalue, avalue, bvalue;
            try
            {
                for (int i = 1; i < profile.Rows.Count; i++)
                {
                    lvalue = Convert.ToInt32(profile.Rows[i][0].ToString());
                    avalue = Convert.ToInt32(profile.Rows[i][1].ToString());
                    bvalue = Convert.ToInt32(profile.Rows[i][2].ToString());

                    lvalue = (int)(lvalue * 0.2);
                    avalue = (int)(avalue * 0.2) + 20;
                    bvalue = (int)(bvalue * 0.2) + 22;

                    profiles_CPU[lvalue, avalue, bvalue].L = lvalue;
                    profiles_CPU[lvalue, avalue, bvalue].A = avalue;
                    profiles_CPU[lvalue, avalue, bvalue].B = bvalue;

                    //profiles_CPU[lvalue, avalue, bvalue].Given_R = (byte)Convert.ToByte(profile.Rows[i][9].ToString());
                    //profiles_CPU[lvalue, avalue, bvalue].Given_G = (byte)Convert.ToByte(profile.Rows[i][10].ToString());
                    //profiles_CPU[lvalue, avalue, bvalue].Given_B = (byte)Convert.ToByte(profile.Rows[i][11].ToString());

                    profiles_CPU[lvalue, avalue, bvalue].ML = (double)Convert.ToDouble(profile.Rows[i][3].ToString());
                    profiles_CPU[lvalue, avalue, bvalue].MA = (double)Convert.ToDouble(profile.Rows[i][4].ToString());
                    profiles_CPU[lvalue, avalue, bvalue].MB = (double)Convert.ToDouble(profile.Rows[i][5].ToString());

                    profiles_CPU[lvalue, avalue, bvalue].MX = (double)Convert.ToDouble(profile.Rows[i][6].ToString());
                    profiles_CPU[lvalue, avalue, bvalue].MY = (double)Convert.ToDouble(profile.Rows[i][7].ToString());
                    profiles_CPU[lvalue, avalue, bvalue].MZ = (double)Convert.ToDouble(profile.Rows[i][8].ToString());


                    profiles_CPU[lvalue, avalue, bvalue].isempty = FALSE;
                }
            }
            catch (Exception ex)
            { Console.WriteLine(ex); }
            #endregion

            //fg lookup inicialization
            #region
            for (int r = 0; r < 255; r++)
            {
                for (int g = 0; g < 255; g++)
                {
                    for (int b = 0; b < 255; b++)
                    {
                        Point3D foregroundLAB = ToLAB(new ForeGroundStrucuture((byte)r, (byte)g, (byte)b));

                        int binL = ((int)Math.Round(foregroundLAB.X / 5.0)) * 5;
                        int binA = ((int)Math.Round(foregroundLAB.Y / 5.0)) * 5;
                        int binB = ((int)Math.Round(foregroundLAB.Z / 5.0)) * 5;

                        if (binL > 100)
                        {
                            binL = 100;
                        }
                        if (binA < -86.17385493791946)
                        {
                            binA = -85;
                        }
                        if (binA > 98.2448002875424)
                        {
                            binA = 100;
                        }
                        if (binB < -107.8619171648283)
                        {
                            binB = -110;
                        }
                        if (binB > 94.47705120353054)
                        {
                            binB = 95;
                        }

                        fg_loopup_CPU[r, g, b].L = (int)(binL * 0.2) + 0;
                        fg_loopup_CPU[r, g, b].A = (int)(binA * 0.2) + 20;
                        fg_loopup_CPU[r, g, b].B = (int)(binB * 0.2) + 22;
                    }
                }
            }
            #endregion


            //grab the colors
            ProfileStrucuture[, ,] profile_GPU   = gpu.CopyToDevice(profiles_CPU);
            SampleStructure[,] samples_GPU       = gpu.CopyToDevice(samples_CPU);
            FGLookupStructure[, ,] fg_loopup_GPU = gpu.CopyToDevice(fg_loopup_CPU);

            //begin execution
            // capture the start time
            gpu.StartTimer();
            ForeGroundStrucuture[] foregorungRGB_GPU = gpu.CopyToDevice(foregorungRGB_CPU);
            BackGroundStrucuture[] BackgroundXYZ_GPU = gpu.CopyToDevice(BackgroundXYZ_CPU);

            //out put
            ForeGroundStrucuture[] distance_GPU = gpu.Allocate(distance_CPU);

            // generate a bitmap from our sphere data
            //Image size: 1024 x 768

            dim3 grids   = new dim3(1024 / 16, 768 / 16);
            dim3 threads = new dim3(16, 16);

            //dim3 grids = new dim3(1, 1);
            //dim3 threads = new dim3(1, 1);

            //quick_correct
            //gpu.Launch(grids, threads, ((Action<GThread, ProfileStrucuture[, ,], ForeGroundStrucuture[], BackGroundStrucuture[], ProfileStrucuture[], SampleStructure[,]>)QuickCorr), profile_GPU, foregorungRGB_GPU, BackgroundXYZ_GPU, distance_GPU, samples_GPU);

            //quick correct - testing
            //gpu.Launch(grids, threads, ((Action<GThread, ProfileStrucuture[, ,], ForeGroundStrucuture[], BackGroundStrucuture[], ForeGroundStrucuture[], SampleStructure[,]>)QuickCorr), profile_GPU, foregorungRGB_GPU, BackgroundXYZ_GPU, distance_GPU, samples_GPU);

            gpu.Launch(grids, threads, ((Action <GThread, ProfileStrucuture[, , ], ForeGroundStrucuture[], BackGroundStrucuture[], ForeGroundStrucuture[], SampleStructure[, ], FGLookupStructure[, , ]>)QuickCorr), profile_GPU, foregorungRGB_GPU, BackgroundXYZ_GPU, distance_GPU, samples_GPU, fg_loopup_GPU);


            // copy our bitmap back from the GPU for display
            gpu.CopyFromDevice(distance_GPU, distance_CPU);


            // get stop time, and display the timing results
            double     elapsedTime = gpu.StopTimer();
            TestOutput to_return   = new TestOutput();
            to_return.output_image = distance_CPU;
            to_return.timeTaken    = elapsedTime;
            Console.WriteLine("Time to generate: {0} ms", elapsedTime);
            gpu.Free(foregorungRGB_GPU);
            gpu.Free(BackgroundXYZ_GPU);
            gpu.Free(distance_GPU);
            gpu.FreeAll();

            return(to_return);
        }
Пример #25
0
        private void InitializeGPUs()
        {
            eGPUType[]  gpuTypes  = new eGPUType[] { eGPUType.Cuda, eGPUType.OpenCL, eGPUType.Emulator };
            eLanguage[] languages = new eLanguage[] { eLanguage.Cuda, eLanguage.OpenCL };

            foreach (eGPUType gpuType in gpuTypes)
            {
                try
                {
                    int numberOfAvailableDevices = CudafyHost.GetDeviceCount(gpuType);

                    for (int deviceNumber = 0; deviceNumber < numberOfAvailableDevices; deviceNumber++)
                    {
                        GPGPU           gpgpu           = CudafyHost.GetDevice(gpuType, deviceNumber);
                        GPGPUProperties gpgpuProperties = gpgpu.GetDeviceProperties(true);
                        CudafyModes.Target = gpuType;

                        foreach (eLanguage language in languages)
                        {
                            string cudaRandomFilename = Path.GetRandomFileName();

                            try
                            {
                                CudafyTranslator.Language = language;

                                CompileProperties compileProperties = CompilerHelper.Create(ePlatform.Auto, eArchitecture.Unknown, eCudafyCompileMode.Default, CudafyTranslator.WorkingDirectory, CudafyTranslator.GenerateDebug);

                                // Use a random filename to prevent conflict on default temp file when multithreading (unit tests)
                                compileProperties.InputFile = cudaRandomFilename;

                                // If this line fails with NCrunch/Unit tests, there probably is a new version of Cudafy.NET
                                // and it needs to be registered in the GAC like this: gacutil -i Cudafy.NET.dll
                                CudafyModule cudafyModule = CudafyTranslator.Cudafy(compileProperties, typeof(Primitives));

                                if (!gpgpu.IsModuleLoaded(cudafyModule.Name))
                                {
                                    gpgpu.LoadModule(cudafyModule);
                                }

                                gpgpu.EnableMultithreading();

                                string gpuName = gpgpuProperties.Name.Trim() + " - " + gpuType.ToString() + " - " + language.ToString();

                                ////this.gpgpus.Add(gpuName, gpgpu);
                                ////this.gpgpuProperties.Add(gpuName, gpgpuProperties);
                                ////this.gpuTypes.Add(gpuName, gpuType);
                            }
                            catch (CudafyCompileException)
                            {
                                // Language not supported
                            }
                            finally
                            {
                                File.Delete(cudaRandomFilename);

                                // ncrunch: no coverage start
                            }
                        }
                    }
                }
                catch (DllNotFoundException)
                {
                }
                catch (InvalidOperationException)
                {
                    // Language not supported
                }
                catch (Cloo.ComputeException)
                {
                    // Language not supported
                } // ncrunch: no coverage end
            }
        }
Пример #26
0
 public void SetUp()
 {
     _cm = CudafyTranslator.Cudafy(eArchitecture.sm_20);//typeof(RelectorAddInFunctionsTests));
 }
Пример #27
0
        public void Test_smartCopyToDevice()
        {
            if (_gpu is OpenCLDevice)
            {
                Console.WriteLine("Device not supporting smart copy, so skip.");
                return;
            }
            var mod = CudafyModule.TryDeserialize();

            if (mod == null || !mod.TryVerifyChecksums())
            {
                mod = CudafyTranslator.Cudafy(CudafyModes.Architecture);
                mod.Serialize();
            }
            _gpu.LoadModule(mod);
            _gpuuintBufferIn  = _gpu.Allocate <uint>(N);
            _gpuuintBufferOut = _gpu.Allocate <uint>(N);
            int       batchSize = 8;
            int       loops     = 6;
            Stopwatch sw        = Stopwatch.StartNew();

            for (int x = 0; x < loops; x++)
            {
                for (int i = 0; i < batchSize; i++)
                {
                    _gpu.CopyToDevice(_uintBufferIn, 0, _gpuuintBufferIn, 0, N);
                    _gpu.Launch(N / 512, 512, "DoubleAllValues", _gpuuintBufferIn, _gpuuintBufferOut);
                    _gpu.CopyFromDevice(_gpuuintBufferOut, 0, _uintBufferOut, 0, N);
                }
            }
            long time = sw.ElapsedMilliseconds;

            Console.WriteLine(time);
            IntPtr[] stagingPostIn  = new IntPtr[batchSize];
            IntPtr[] stagingPostOut = new IntPtr[batchSize];
            for (int i = 0; i < batchSize; i++)
            {
                stagingPostIn[i]  = _gpu.HostAllocate <uint>(N);
                stagingPostOut[i] = _gpu.HostAllocate <uint>(N);
            }
            _gpu.EnableSmartCopy();
            sw.Restart();
            for (int x = 0; x < loops; x++)
            {
                for (int i = 0; i < batchSize; i++)
                {
                    _gpu.CopyToDeviceAsync(_uintBufferIn, 0, _gpuuintBufferIn, 0, N, i + 1, stagingPostIn[i]);
                }
                for (int i = 0; i < batchSize; i++)
                {
                    _gpu.LaunchAsync(N / 256, 256, i + 1, "DoubleAllValues", _gpuuintBufferIn, _gpuuintBufferOut);
                }
                for (int i = 0; i < batchSize; i++)
                {
                    _gpu.CopyFromDeviceAsync(_gpuuintBufferOut, 0, _uintBufferOut, 0, N, i + 1, stagingPostOut[i]);
                }
                for (int i = 0; i < batchSize; i++)
                {
                    _gpu.SynchronizeStream(i + 1);
                }
                //for (int i = 0; i < batchSize; i++)
                //{
                //    _gpu.CopyToDeviceAsync(stagingPostIn[i], 0, _gpuuintBufferIn, 0, N, i+1);
                //    _gpu.LaunchAsync(N / 512, 512, i + 1, "DoubleAllValues", _gpuuintBufferIn, _gpuuintBufferOut);
                //    _gpu.CopyFromDeviceAsync(_gpuuintBufferOut, 0, stagingPostOut[i], 0, N, i + 1);
                //}
                for (int i = 0; i < batchSize; i++)
                {
                    _gpu.SynchronizeStream(i + 1);
                }
            }

            time = sw.ElapsedMilliseconds;
            Console.WriteLine(time);
            _gpu.DisableSmartCopy();
            for (int i = 0; i < N; i++)
            {
                _uintBufferIn[i] *= 2;
            }
            Assert.IsTrue(Compare(_uintBufferIn, _uintBufferOut));

            ClearOutputsAndGPU();
        }
Пример #28
0
        public override void VTrain(VMatrix features, VMatrix labels, double[] colMin, double[] colMax)
        {
            if ((m_lCount == null) || (m_lCount.Length < 3))
            {
                m_lCount = new int[3] {
                    0, features.Cols() * 2, 0
                };
            }

            List <Node> nodes = new List <Node>();

            // add the input nodes
            m_lCount[0] = features.Cols();
            for (var n = 0; n < m_lCount[0]; n++)
            {
                nodes.Add(new Node(-1, -1, 0, 0, 0));
            }

            int numWeights = m_lCount[0] + 1;
            int wBegIdx    = 0;

            // add the nodes for the hidden layers
            for (var layer = 1; layer < m_lCount.Length - 1; layer++)
            {
                for (var n = 0; n < m_lCount[layer]; n++)
                {
                    nodes.Add(new Node(wBegIdx, wBegIdx + numWeights - 1, 0, 0, 0));
                    wBegIdx += numWeights;
                }

                numWeights = m_lCount[layer] + 1;
            }

            // figure out how many outputs we need
            int oCount = 0;

            for (var col = 0; col < labels.Cols(); col++)
            {
                var labelValueCount = labels.ValueCount(col);

                if (labelValueCount < 2)
                {
                    // continuous
                    oCount++;
                }
                else
                {
                    oCount += labelValueCount;
                }
            }

            // update the layer arrays
            m_lCount[m_lCount.Length - 1] = oCount;
            m_lBegIdx = new int[m_lCount.Length];
            for (var i = 0; i < m_lCount.Length; i++)
            {
                if (i == 0)
                {
                    m_lBegIdx[i] = 0;
                }
                else
                {
                    m_lBegIdx[i] = m_lBegIdx[i - 1] + m_lCount[i - 1];
                }
            }

            // add the output nodes
            for (var col = 0; col < labels.Cols(); col++)
            {
                var labelValueCount = labels.ValueCount(col);

                if (labelValueCount < 2)
                {
                    // continuous
                    nodes.Add(new Node(wBegIdx, wBegIdx + numWeights - 1, 1, col, -1));
                    wBegIdx += numWeights;
                }
                else
                {
                    for (var n = 0; n < labelValueCount; n++)
                    {
                        nodes.Add(new Node(wBegIdx, wBegIdx + numWeights - 1, 0, col, n));
                        wBegIdx += numWeights;
                    }
                }
            }

            m_nodes = nodes.ToArray();

            // create the weights
            m_weights     = new double[wBegIdx];
            m_bestWeights = new double[wBegIdx];
            m_deltas      = new double[wBegIdx];
            for (var i = 0; i < wBegIdx; i++)
            {
                m_weights[i]     = (double)(0.1 - (m_rand.NextDouble() * 0.2));
                m_bestWeights[i] = m_weights[i];
                m_deltas[i]      = 0;
            }

            //m_weights[0] = 1.0;
            //m_weights[1] = 0.5;
            //m_weights[2] = 0;
            //m_weights[3] = 1.2;
            //m_weights[4] = 0.5;
            //m_weights[5] = 0.5;
            //m_weights[6] = 0.1;
            //m_weights[7] = -0.8;
            //m_weights[8] = -1.3;

            if (!string.IsNullOrEmpty(OutputFileName))
            {
                m_outputFile = File.AppendText(OutputFileName);
            }

            int trainSize = (int)(0.75 * features.Rows());

            double[,] trainFeatures = new double[trainSize, features.Cols()];
            for (int r = 0; r < trainSize; r++)
            {
                for (int c = 0; c < features.Cols(); c++)
                {
                    trainFeatures[r, c] = features.Get(r, c);
                }
            }

            double[,] trainLabels = new double[trainSize, labels.Cols()];
            for (int r = 0; r < trainSize; r++)
            {
                for (int c = 0; c < labels.Cols(); c++)
                {
                    trainLabels[r, c] = labels.Get(r, c);
                }
            }

            int[] fIdx = new int[trainSize];
            for (int i = 0; i < fIdx.Length; i++)
            {
                fIdx[i] = i;
            }

            VMatrix validationFeatures = new VMatrix(features, trainSize, 0, features.Rows() - trainSize, features.Cols());
            VMatrix validationLabels   = new VMatrix(labels, trainSize, 0, labels.Rows() - trainSize, labels.Cols());

            int    epoch        = 0;                            // current epoch number
            int    bestEpoch    = 0;                            // epoch number of best MSE
            int    eCount       = 0;                            // number of epochs since the best MSE
            bool   checkDone    = false;                        // if true, check to see if we're done
            double bestMSE      = double.MaxValue;              // best validation MSE so far
            double bestAccuracy = double.MaxValue;              // best validationa accuracy so far

            Console.WriteLine("Epoch\tMSE (validation)\taccuracy (validation)");
            if (m_outputFile != null)
            {
                m_outputFile.Write("Layers: ");
                for (var l = 0; l < m_lCount.Length - 1; l++)
                {
                    m_outputFile.Write(m_lCount[l]);
                    m_outputFile.Write('x');
                }
                m_outputFile.WriteLine(m_lCount[m_lCount.Length - 1]);
                m_outputFile.WriteLine("Momentum: " + m_momentum);
                m_outputFile.WriteLine();
                m_outputFile.WriteLine("Weights");
                PrintWeights();
                m_outputFile.WriteLine("Epoch\tMSE (validation)\taccuracy (validation)");
            }

            CudafyModule km = CudafyTranslator.Cudafy();

            GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);

            gpu.LoadModule(km);

            for (; ;)
            {
                // shuffle the training set
                Shuffle(ref fIdx, m_rand);

                double[,] g_trainFeatures = gpu.CopyToDevice(trainFeatures);
                double[,] g_trainLabels   = gpu.CopyToDevice(trainLabels);
                int[]    g_fIdx    = gpu.CopyToDevice(fIdx);
                int[]    g_lCount  = gpu.CopyToDevice(m_lCount);
                int[]    g_lBegIdx = gpu.CopyToDevice(m_lBegIdx);
                Node[]   g_nodes   = gpu.CopyToDevice(m_nodes);
                double[] g_weights = gpu.CopyToDevice(m_weights);
                double[] g_deltas  = gpu.CopyToDevice(m_deltas);

                //// Launch trainSize blocks of 1 thread each
                gpu.Launch(trainSize / 256, 256).TrainEpoch(g_trainFeatures, g_trainLabels, g_fIdx, g_lCount, g_lBegIdx, g_nodes, g_weights, g_deltas, m_rate, m_momentum);

                //// copy the arrays back from the GPU to the CPU
                gpu.CopyFromDevice(g_weights, m_weights);
                gpu.CopyFromDevice(g_deltas, m_deltas);
                gpu.CopyFromDevice(g_fIdx, fIdx);

                // free the memory allocated on the GPU
                gpu.FreeAll();

                //TrainEpoch(trainFeatures, trainLabels, fIdx, m_lCount, m_lBegIdx, m_nodes, ref m_weights, ref m_deltas, m_rate, m_momentum, ref trainMSE);

                // check the MSE after this epoch
                double mse = VGetMSE(validationFeatures, validationLabels);

                // check the validation accuracy after this epoch
                double accuracy = VMeasureAccuracy(validationFeatures, validationLabels, null);

                Console.WriteLine(string.Format("{0}-{1}\t{2}\t{3}", epoch, eCount, mse, accuracy));
                if (m_outputFile != null)
                {
                    m_outputFile.WriteLine(string.Format("{0}-{1}\t{2}\t{3}", epoch, eCount, mse, accuracy));
                    m_outputFile.Flush();
                }

                if ((mse == 0.0) || (epoch > 10000))
                {
                    break;
                }
                else if ((epoch == 1) || (mse < bestMSE))
                {
                    if (epoch == 1)
                    {
                        // save the initial MSE
                        bestMSE = mse;
                    }
                    else if ((mse / bestMSE) > 0.99)
                    {
                        if (!checkDone)
                        {
                            checkDone = true;
                            eCount    = 0;
                        }
                    }
                    else
                    {
                        checkDone = false;
                        eCount    = 0;
                    }

                    // save the best for later
                    bestMSE      = mse;
                    bestAccuracy = accuracy;
                    bestEpoch    = epoch;
                    SaveBestWeights();
                }
                else if (!checkDone)
                {
                    checkDone = true;
                    eCount    = 0;
                }

                if (checkDone)
                {
                    // check to see if we're done
                    eCount++;
                    if (eCount >= 20)
                    {
                        break;
                    }
                }
            }
            ;

            if (m_outputFile != null)
            {
                m_outputFile.WriteLine();
                m_outputFile.WriteLine("Weights");
                PrintWeights();
            }

            if ((bestEpoch > 0) && (bestEpoch != epoch))
            {
                RestoreBestWeights();
                if (m_outputFile != null)
                {
                    m_outputFile.WriteLine();
                    m_outputFile.WriteLine(string.Format("Best Weights (from Epoch {0}, valMSE={1}, valAcc={2})", bestEpoch, bestMSE, bestAccuracy));
                    PrintWeights();
                }
            }

            if (m_outputFile != null)
            {
                m_outputFile.Close();
            }
        }
Пример #29
0
        static void Main(string[] args)
        {
            try
            {
                CudafyModule km = CudafyModule.TryDeserialize();
                if (km == null || !km.TryVerifyChecksums())
                {
                    km = CudafyTranslator.Cudafy();
                    km.TrySerialize();
                }

                CudafyTranslator.GenerateDebug = true;
                // cuda or emulator
                GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);
                gpu.LoadModule(km);



                //set up color profile to have a measure LAB lookup working
                #region

                Matrix3D navigationMatrix = new Matrix3D();
                navigationMatrix.Translate(new Vector3D(0, 100, 110));
                navigationMatrix.Scale(new Vector3D((double)1 / 5, (double)1 / 5, (double)1 / 5));

                //2- Load the profile in a three dimensional array
                Bin[, ,] p3700 = new Bin[RANGEL, RANGEA, RANGEB];
                for (int l = 0; l < RANGEL; l++)
                {
                    for (int a = 0; a < RANGEA; a++)
                    {
                        for (int b = 0; b < RANGEB; b++)
                        {
                            p3700[l, a, b] = new Bin(l, a, b);
                        }
                    }
                }

                try
                {
                    // add the csv bin file
                    using (GenericParserAdapter parser = new GenericParserAdapter(@"C:\lev\STColorCorrection\Data\PROFILE\p3700.csv"))
                    {
                        System.Data.DataSet dsResult = parser.GetDataSet();
                        profile = dsResult.Tables[0];
                    }
                }
                catch
                { }



                //  #region
                for (int i = 1; i < profile.Rows.Count; i++)
                {
                    //lab vale as got form profile index
                    Point3D labBin = new Point3D();
                    labBin.X = Convert.ToDouble(profile.Rows[i][0].ToString());
                    labBin.Y = Convert.ToDouble(profile.Rows[i][1].ToString());
                    labBin.Z = Convert.ToDouble(profile.Rows[i][2].ToString());


                    //trasfered points
                    Point3D labCoordinate = navigationMatrix.Transform(labBin);
                    if (labCoordinate.X == 20 && labCoordinate.Y == 20 && labCoordinate.Z == 22)
                    {
                        Console.WriteLine("empty");
                    }

                    //gets the bin to fill up
                    Bin actualBin = GetProfileBin(p3700, labCoordinate);

                    //bin RGB Value
                    actualBin.binRGB.X = Convert.ToByte(profile.Rows[i][9].ToString());
                    actualBin.binRGB.Y = Convert.ToByte(profile.Rows[i][10].ToString());
                    actualBin.binRGB.Z = Convert.ToByte(profile.Rows[i][11].ToString());

                    //Measure Lab Values
                    actualBin.measuredLAB.X = Convert.ToDouble(profile.Rows[i][3].ToString());
                    actualBin.measuredLAB.Y = Convert.ToDouble(profile.Rows[i][4].ToString());
                    actualBin.measuredLAB.Z = Convert.ToDouble(profile.Rows[i][5].ToString());

                    //measured XYZ Values
                    actualBin.measuredXYZ.X = Convert.ToDouble(profile.Rows[i][6].ToString());
                    actualBin.measuredXYZ.Y = Convert.ToDouble(profile.Rows[i][7].ToString());
                    actualBin.measuredXYZ.Z = Convert.ToDouble(profile.Rows[i][8].ToString());

                    //is empty check
                    actualBin.isEmpty = false;
                }



                #endregion

                //CVS FILE CREATING AND INICIALIZATION
                #region
                //create the CSV file
                CsvFileWriter output_file_1 = new CsvFileWriter(@"C:\lev\STColorCorrection\Data\CUDA performance analysis\out_file1.csv");
                CsvFileWriter output_file_2 = new CsvFileWriter(@"C:\lev\STColorCorrection\Data\CUDA performance analysis\out_file2.csv");

                //create the header
                CsvRow header = new CsvRow();
                header.Add("R_fg_in");
                header.Add("G_fg_in");
                header.Add("B_fg_in");
                header.Add("L_fg_in");
                header.Add("A_fg_in");
                header.Add("B_fg_in");

                header.Add("X_bg_in");
                header.Add("Y_bg_in");
                header.Add("Z_bg_in");

                header.Add("BF_Dist");
                header.Add("Cuda BF time");
                header.Add("QC_Dist");
                header.Add("Cuda QC time");
                header.Add("Snake_Dist");
                header.Add("Cuda Snake time");
                header.Add("DecreaseStep_DS");
                header.Add("Cuda DS time");

                output_file_1.WriteRow(header);
                header = new CsvRow();
                header.Add("R_fg_in");
                header.Add("G_fg_in");
                header.Add("B_fg_in");
                header.Add("L_fg_in");
                header.Add("A_fg_in");
                header.Add("B_fg_in");

                header.Add("X_bg_in");
                header.Add("Y_bg_in");
                header.Add("Z_bg_in");

                header.Add("BF_Dist");
                header.Add("R_fg_out");
                header.Add("G_fg_out");
                header.Add("B_fg_out");

                header.Add("QC_Dist");
                header.Add("R_fg_out");
                header.Add("G_fg_out");
                header.Add("B_fg_out");

                header.Add("Snake_Dist");
                header.Add("R_fg_out");
                header.Add("G_fg_out");
                header.Add("B_fg_out");

                header.Add("DecreaseStep_DS");
                header.Add("R_fg_out");
                header.Add("G_fg_out");
                header.Add("B_fg_out");
                output_file_2.WriteRow(header);
                //write the header to the CSV file
                #endregion


                Random randomGenerater = new Random();
                for (int num_colors = 0; num_colors < 500; num_colors++)
                {
                    //create a new csv row
                    CsvRow new_row_file_1 = new CsvRow();
                    CsvRow new_row_file_2 = new CsvRow();



                    //colour selection
                    Byte[] rgb = new Byte[3];
                    randomGenerater.NextBytes(rgb);
                    System.Drawing.Color foreground = System.Drawing.Color.FromArgb(rgb[0], rgb[1], rgb[2]);

                    Point3D backgroundCIEXYZ = new Point3D(0, 0, 0);
                    backgroundCIEXYZ.X = randomGenerater.NextDouble() * 0.9504;
                    backgroundCIEXYZ.Y = randomGenerater.NextDouble() * 1.0000;
                    backgroundCIEXYZ.Z = randomGenerater.NextDouble() * 1.0888;
                    Point3D background = new Point3D(backgroundCIEXYZ.X, backgroundCIEXYZ.Y, backgroundCIEXYZ.Z);

                    Bin foregroundBin = FindForegroundBin(p3700, navigationMatrix, foreground);
                    PerceptionLib.Color foregroundLAB = new PerceptionLib.Color();

                    foregroundLAB.LA = foregroundBin.measuredLAB.X;
                    foregroundLAB.A  = foregroundBin.measuredLAB.Y;
                    foregroundLAB.B  = foregroundBin.measuredLAB.Z;

                    //write the input colors
                    #region
                    new_row_file_1.Add(foreground.R.ToString());
                    new_row_file_1.Add(foreground.G.ToString());
                    new_row_file_1.Add(foreground.B.ToString());

                    new_row_file_1.Add(foregroundLAB.LA.ToString());
                    new_row_file_1.Add(foregroundLAB.A.ToString());
                    new_row_file_1.Add(foregroundLAB.B.ToString());

                    new_row_file_1.Add(background.X.ToString());
                    new_row_file_1.Add(background.Y.ToString());
                    new_row_file_1.Add(background.Z.ToString());

                    new_row_file_2.Add(foreground.R.ToString());
                    new_row_file_2.Add(foreground.G.ToString());
                    new_row_file_2.Add(foreground.B.ToString());

                    new_row_file_2.Add(foregroundLAB.LA.ToString());
                    new_row_file_2.Add(foregroundLAB.A.ToString());
                    new_row_file_2.Add(foregroundLAB.B.ToString());

                    new_row_file_2.Add(background.X.ToString());
                    new_row_file_2.Add(background.Y.ToString());
                    new_row_file_2.Add(background.Z.ToString());
                    #endregion


                    //get the brute force values
                    Color.TestingStructure[] results_brute_force = Color.CorrectColour(foreground, background.X, background.Y, background.Z);

                    new_row_file_1.Add(results_brute_force[0].distance.ToString());
                    new_row_file_1.Add(results_brute_force[0].execution_time.ToString());

                    Point3D labBin = new Point3D();
                    labBin.X = results_brute_force[0].Given_R;
                    labBin.Y = results_brute_force[0].Given_G;
                    labBin.Z = results_brute_force[0].Given_B;

                    Bin actualBin = GetProfileBin(p3700, labBin);

                    new_row_file_2.Add(results_brute_force[0].distance.ToString());
                    new_row_file_2.Add(actualBin.binRGB.X.ToString());
                    new_row_file_2.Add(actualBin.binRGB.Y.ToString());
                    new_row_file_2.Add(actualBin.binRGB.Z.ToString());


                    quick_corr.TestingStructure[] results_quick_corr = quick_corr.CorrectColour(foreground, background.X, background.Y, background.Z);

                    new_row_file_1.Add(results_quick_corr[0].distance.ToString());
                    new_row_file_1.Add(results_quick_corr[0].execution_time.ToString());

                    labBin   = new Point3D();
                    labBin.X = results_quick_corr[0].Given_R;
                    labBin.Y = results_quick_corr[0].Given_G;
                    labBin.Z = results_quick_corr[0].Given_B;

                    actualBin = GetProfileBin(p3700, labBin);

                    new_row_file_2.Add(results_quick_corr[0].distance.ToString());
                    new_row_file_2.Add(actualBin.binRGB.X.ToString());
                    new_row_file_2.Add(actualBin.binRGB.Y.ToString());
                    new_row_file_2.Add(actualBin.binRGB.Z.ToString());


                    snake.TestingStructure[] results_snake = snake.CorrectColour(foreground, background.X, background.Y, background.Z);

                    new_row_file_1.Add(results_snake[0].distance.ToString());
                    new_row_file_1.Add(results_snake[0].execution_time.ToString());

                    labBin   = new Point3D();
                    labBin.X = results_snake[0].Given_R;
                    labBin.Y = results_snake[0].Given_G;
                    labBin.Z = results_snake[0].Given_B;

                    actualBin = GetProfileBin(p3700, labBin);

                    new_row_file_2.Add(results_snake[0].distance.ToString());
                    new_row_file_2.Add(actualBin.binRGB.X.ToString());
                    new_row_file_2.Add(actualBin.binRGB.Y.ToString());
                    new_row_file_2.Add(actualBin.binRGB.Z.ToString());

                    half_step.TestingStructure[] results_half_step = half_step.CorrectColour(foreground, background.X, background.Y, background.Z);

                    new_row_file_1.Add(results_half_step[0].distance.ToString());
                    new_row_file_1.Add(results_half_step[0].execution_time.ToString());

                    labBin   = new Point3D();
                    labBin.X = results_half_step[0].Given_R;
                    labBin.Y = results_half_step[0].Given_G;
                    labBin.Z = results_half_step[0].Given_B;

                    actualBin = GetProfileBin(p3700, labBin);

                    new_row_file_2.Add(results_half_step[0].distance.ToString());
                    new_row_file_2.Add(actualBin.binRGB.X.ToString());
                    new_row_file_2.Add(actualBin.binRGB.Y.ToString());
                    new_row_file_2.Add(actualBin.binRGB.Z.ToString());



                    //write the results
                    output_file_1.WriteRow(new_row_file_1);
                    output_file_2.WriteRow(new_row_file_2);
                }



                //Color.Execute();
                //quick_corr.Execute();

                //close the CSV files
                output_file_1.Close();
                output_file_2.Close();

                Console.WriteLine("Done!");
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex);
            }
theEnd:
            Console.ReadKey();
        }
Пример #30
0
        public static TestOutput CorrectColour(ForeGroundStrucuture[] foregorungRGB_CPU, BackGroundStrucuture[] BackgroundXYZ_CPU)
        {
            //set these to constant if you want testing

            //rgb = System.Drawing.Color.FromArgb(65, 108, 20);
            //X = 0.613829950099918;
            //Y = 0.938638756488747;
            //Z = 1.08019833591292;


            const int image_size = 960 * 540;

            //cuda intializer
            CudafyModule km = CudafyModule.TryDeserialize();

            if (km == null || !km.TryVerifyChecksums())
            {
                // km = CudafyTranslator.Cudafy((typeof(ForeGroundStrucuture)), (typeof(BackGroundStrucuture)), typeof(Color));
                km = CudafyTranslator.Cudafy(typeof(ProfileStrucuture), typeof(ForeGroundStrucuture), typeof(BackGroundStrucuture), typeof(bf));
                km.TrySerialize();
            }

            CudafyTranslator.GenerateDebug = true;
            // cuda or emulator
            GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);

            //sGPGPU gpu = CudafyHost.GetDevice(eGPUType.Emulator);
            gpu.LoadModule(km);
            Console.WriteLine("Running brute force correction using {0}", gpu.GetDeviceProperties(false).Name);

            ForeGroundStrucuture[] output_image_CPU = new ForeGroundStrucuture[image_size];

            // allocate memory on the GPU for the bitmap (same size as ptr)


            DataTable profile = new DataTable();

            try
            {
                // add the csv bin file
                using (GenericParserAdapter parser = new GenericParserAdapter(@"C:\lev\STColorCorrection\Data\PROFILE\p3700.csv"))
                {
                    System.Data.DataSet dsResult = parser.GetDataSet();
                    profile = dsResult.Tables[0];
                }
            }
            catch (Exception ex)
            { Console.WriteLine(ex); }


            // allocate temp memory, initialize it, copy to constant memory on the GPU
            // L 0-21 A 0-41 B 0-45

            ProfileStrucuture[ , , ] profiles_CPU = new ProfileStrucuture[21, 41, 45];
            //ForeGroundStrucuture[] foregorungRGB_CPU = new ForeGroundStrucuture[image_size];
            //BackGroundStrucuture[] BackgroundXYZ_CPU = new BackGroundStrucuture[image_size];

            for (int indexL = 0; indexL < 21; indexL++)
            {
                for (int indexA = 0; indexA < 41; indexA++)
                {
                    for (int indexB = 0; indexB < 45; indexB++)
                    {
                        profiles_CPU[indexL, indexA, indexB].L       = indexL;
                        profiles_CPU[indexL, indexA, indexB].A       = indexA;
                        profiles_CPU[indexL, indexA, indexB].B       = indexB;
                        profiles_CPU[indexL, indexA, indexB].Given_R = 0;
                        profiles_CPU[indexL, indexA, indexB].Given_G = 0;
                        profiles_CPU[indexL, indexA, indexB].Given_B = 0;
                        profiles_CPU[indexL, indexA, indexB].ML      = 0;
                        profiles_CPU[indexL, indexA, indexB].MA      = 0;
                        profiles_CPU[indexL, indexA, indexB].MB      = 0;
                        profiles_CPU[indexL, indexA, indexB].MX      = 0;
                        profiles_CPU[indexL, indexA, indexB].MY      = 0;
                        profiles_CPU[indexL, indexA, indexB].MZ      = 0;

                        profiles_CPU[indexL, indexA, indexB].isempty = TRUE;
                        profiles_CPU[indexL, indexA, indexB].isMoreAccurateThanOrigin = -1;
                    }
                }
            }

            int lvalue, avalue, bvalue;

            try
            {
                for (int i = 1; i < profile.Rows.Count; i++)
                {
                    lvalue = Convert.ToInt32(profile.Rows[i][0].ToString());
                    avalue = Convert.ToInt32(profile.Rows[i][1].ToString());
                    bvalue = Convert.ToInt32(profile.Rows[i][2].ToString());

                    lvalue = (int)(lvalue * 0.2);
                    avalue = (int)(avalue * 0.2) + 20;
                    bvalue = (int)(bvalue * 0.2) + 22;

                    profiles_CPU[lvalue, avalue, bvalue].L = lvalue;
                    profiles_CPU[lvalue, avalue, bvalue].A = avalue;
                    profiles_CPU[lvalue, avalue, bvalue].B = bvalue;

                    profiles_CPU[lvalue, avalue, bvalue].Given_R = (byte)Convert.ToByte(profile.Rows[i][9].ToString());
                    profiles_CPU[lvalue, avalue, bvalue].Given_G = (byte)Convert.ToByte(profile.Rows[i][10].ToString());
                    profiles_CPU[lvalue, avalue, bvalue].Given_B = (byte)Convert.ToByte(profile.Rows[i][11].ToString());

                    profiles_CPU[lvalue, avalue, bvalue].ML = (double)Convert.ToDouble(profile.Rows[i][3].ToString());
                    profiles_CPU[lvalue, avalue, bvalue].MA = (double)Convert.ToDouble(profile.Rows[i][4].ToString());
                    profiles_CPU[lvalue, avalue, bvalue].MB = (double)Convert.ToDouble(profile.Rows[i][5].ToString());

                    profiles_CPU[lvalue, avalue, bvalue].MX = (double)Convert.ToDouble(profile.Rows[i][6].ToString());
                    profiles_CPU[lvalue, avalue, bvalue].MY = (double)Convert.ToDouble(profile.Rows[i][7].ToString());
                    profiles_CPU[lvalue, avalue, bvalue].MZ = (double)Convert.ToDouble(profile.Rows[i][8].ToString());


                    profiles_CPU[lvalue, avalue, bvalue].isempty = FALSE;
                }
            }
            catch (Exception ex)
            { Console.WriteLine(ex); }

            //foreground and background image inicialization
            #region
            //try
            //{
            //    for (int i = 0; i < 1; i++)
            //    {
            //        foregorungRGB_CPU[i].R = rgb.R;
            //        foregorungRGB_CPU[i].G = rgb.G;
            //        foregorungRGB_CPU[i].B = rgb.B;

            //        BackgroundXYZ_CPU[i].X = X;
            //        BackgroundXYZ_CPU[i].Y = Y;
            //        BackgroundXYZ_CPU[i].Z = Z;
            //    }
            //}
            //catch (Exception ex)
            //{ Console.WriteLine(ex); }
            #endregion

            ProfileStrucuture[, ,] profile_GPU = gpu.CopyToDevice(profiles_CPU);


            // capture the start time
            gpu.StartTimer();
            ForeGroundStrucuture[] foregorungRGB_GPU = gpu.CopyToDevice(foregorungRGB_CPU);
            BackGroundStrucuture[] BackgroundXYZ_GPU = gpu.CopyToDevice(BackgroundXYZ_CPU);


            //out put
            ForeGroundStrucuture[] distance_GPU = gpu.Allocate(output_image_CPU);

            // generate a bitmap from our sphere data
            //Image size: 1024 x 768

            //dim3 grids = new dim3(1, 1);
            //dim3 threads = new dim3(1,1);

            dim3 grids   = new dim3(24, 675);
            dim3 threads = new dim3(8, 4);

            gpu.Launch(grids, threads, ((Action <GThread, ProfileStrucuture[, , ], ForeGroundStrucuture[], BackGroundStrucuture[], ForeGroundStrucuture[]>)Bruteforce), profile_GPU, foregorungRGB_GPU, BackgroundXYZ_GPU, distance_GPU);

            //gpu.Launch(grids, threads, ((Action<GThread, ForeGroundStrucuture[], BackGroundStrucuture[], double[]>)Bruteforce), foregorungRGB_GPU, BackgroundXYZ_GPU, distance_GPU);

            // copy our bitmap back from the GPU for display
            gpu.CopyFromDevice(distance_GPU, output_image_CPU);


            // get stop time, and display the timing results
            double     elapsedTime = gpu.StopTimer();
            TestOutput to_return   = new TestOutput();
            to_return.output_image = output_image_CPU;
            to_return.timeTaken    = elapsedTime;

            //encapsulte the output image into a class

            //output_image_CPU[0].execution_time = elapsedTime;
            Console.WriteLine("Time to generate: {0} ms", elapsedTime);

            gpu.Free(foregorungRGB_GPU);
            gpu.Free(BackgroundXYZ_GPU);
            gpu.Free(distance_GPU);
            gpu.FreeAll();
            return(to_return);
        }