internal override Answer GetAnswer() { var stopWatchLoad = Stopwatch.StartNew(); using (var gpu = CudafyHost.GetDevice()) { gpu.LoadModule(CudafyTranslator.Cudafy()); LoadTime = stopWatchLoad.ElapsedMilliseconds; var stopWatchRun = Stopwatch.StartNew(); var gpuLatLong = gpu.CopyToDevice(_latLong.ToArray()); var answer = new AnswerStruct[_blocksPerGrid];; var gpuAnswer = gpu.Allocate(answer); gpu.SafeLaunch(_blocksPerGrid, _threadsPerBlock, GpuFindPathDistance, (int)_permutations, gpuLatLong, gpuAnswer); gpu.Synchronize(); gpu.CopyFromDevice(gpuAnswer, answer); gpu.FreeAll(); var bestDistance = float.MaxValue; var bestPermutation = 0; for (var i = 0; i < _blocksPerGrid; i++) { if (answer[i].distance < bestDistance) { bestDistance = answer[i].distance; bestPermutation = answer[i].pathNo; } } return(new Answer { Distance = bestDistance, Permutation = bestPermutation, msLoadTime = LoadTime, msRunTime = stopWatchRun.ElapsedMilliseconds }); } }
public static void Execute() { CudafyModule km = CudafyTranslator.Cudafy(); GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId); gpu.LoadModule(km); int c; int[] dev_c = gpu.Allocate <int>(); // cudaMalloc one Int32 gpu.Launch().add(2, 7, dev_c); // or gpu.Launch(1, 1, "add", 2, 7, dev_c); gpu.CopyFromDevice(dev_c, out c); Console.WriteLine("2 + 7 = {0}", c); gpu.Launch().sub(2, 7, dev_c); gpu.CopyFromDevice(dev_c, out c); Console.WriteLine("2 - 7 = {0}", c); gpu.Free(dev_c); }
public static bool InitGPU(PictureBox passedViewport) { viewport = passedViewport; CudafyModes.Target = eGPUType.OpenCL; // To use OpenCL, change this enum CudafyModes.DeviceId = 0; CudafyTranslator.Language = CudafyModes.Target == eGPUType.OpenCL ? eLanguage.OpenCL : eLanguage.Cuda; CudafyModule km = null; try { int deviceCount = CudafyHost.GetDeviceCount(CudafyModes.Target); if (deviceCount == 0) { Console.WriteLine("No suitable {0} devices found.", CudafyModes.Target); return(false); } gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId); Console.WriteLine("Device Name: {0}", gpu.GetDeviceProperties(false).Name); var result = gpu.GetDeviceProperties(true); // diagnostic data km = CudafyTranslator.Cudafy(); gpu.LoadModule(km); } catch (Exception ex) { Console.WriteLine(ex); Console.WriteLine(km.SourceCode); Debugger.Break(); return(false); } InitDevicePointers(); return(true); }
public int Init() { this.m_km = CudafyTranslator.Cudafy(); CudafyModes.Target = eGPUType.Cuda; var tgCount = CudafyHost.GetDeviceCount(CudafyModes.Target); if (tgCount <= 0) { CudafyModes.Target = eGPUType.OpenCL; tgCount = CudafyHost.GetDeviceCount(CudafyModes.Target); } if (tgCount <= 0) { CudafyModes.Target = eGPUType.Emulator; tgCount = CudafyHost.GetDeviceCount(CudafyModes.Target); } if (tgCount <= 0) { throw new CtkCudafyCannotUseException("無法使用Cudafy"); } for (int idx = 0; idx < tgCount; idx++) { try { this.m_gpu = CudafyHost.GetDevice(CudafyModes.Target, idx); this.m_gpu.LoadModule(Km); return(0); } catch (Cudafy.CudafyCompileException) { } } throw new Exception("Cudafy buidling fail."); }
public static void Execute() { CudafyModule km = CudafyTranslator.Cudafy(); GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId); gpu.LoadModule(km); int[] a = new int[N]; int[] b = new int[N]; int[] c = new int[N]; // allocate the memory on the GPU int[] dev_c = gpu.Allocate <int>(c); // fill the arrays 'a' and 'b' on the CPU for (int i = 0; i < N; i++) { a[i] = -i; b[i] = i * i; } // copy the arrays 'a' and 'b' to the GPU int[] dev_a = gpu.CopyToDevice(a); int[] dev_b = gpu.CopyToDevice(b); gpu.Launch(N, 1).add(dev_a, dev_b, dev_c); // copy the array 'c' back from the GPU to the CPU gpu.CopyFromDevice(dev_c, c); // display the results for (int i = 0; i < N; i++) { Console.WriteLine("{0} + {1} = {2}", a[i], b[i], c[i]); } // free the memory allocated on the GPU gpu.FreeAll(); }
private unsafe static void Main(string[] args) { GPGPU gpuCuda = CudafyHost.GetDevice(eGPUType.Cuda, 0); CudafyModule km = CudafyTranslator.Cudafy(); gpuCuda.LoadModule(km); TestStruct[] host_array = new TestStruct[1]; host_array[0] = new TestStruct(); int[] host_intArray = new[] { 1, 8, 3 }; int[] dev_intArray = gpuCuda.CopyToDevice(host_intArray); DevicePtrEx p = gpuCuda.GetDeviceMemory(dev_intArray); IntPtr pointer = p.Pointer; host_array[0].dataPointer = pointer.ToInt64(); TestStruct[] dev_array = gpuCuda.Allocate(host_array); gpuCuda.CopyToDevice(host_array, dev_array); gpuCuda.Launch().kernelTest(dev_array, dev_intArray); gpuCuda.CopyFromDevice(dev_array, host_array); Console.WriteLine(host_array[0].value); Console.ReadKey(); }
/// <summary> /// Вызов и исполнение одной элементарной функции по имени функции /// </summary> /// <param name="function"></param> public static void Execute(string function) { CudafyModule km = CudafyTranslator.Cudafy(); GPGPU gpu = CudafyHost.GetDevice(); gpu.LoadModule(km); int[] devA = gpu.Allocate(_a); int[] devB = gpu.Allocate(_b); int[] devC = gpu.Allocate(_c); int[] devD = gpu.Allocate(D); gpu.CopyToDevice(_a, devA); gpu.Launch(_gridSize, _blockSize, function, devA, devB, devC, devD, 1); gpu.Launch(1, 1, function, devA, devB, devC, devD, 2); gpu.CopyFromDevice(devD, D); // free the memory allocated on the GPU gpu.FreeAll(); }
public GpuRenderer() { var availableOpenCLDevices = CudafyHost.GetDeviceProperties(eGPUType.OpenCL); if (availableOpenCLDevices.Any() == false) { throw new Exception("No OpenCL devices found..."); } var device = availableOpenCLDevices.First(); Module = CudafyTranslator.Cudafy(eArchitecture.OpenCL12); var blockSide = Enumerable .Range(1, 15) .Reverse() .First(count => count * count <= device.MaxThreadsPerBlock); BlockSize = new dim3(blockSide, blockSide); // Initialize gpu and load the module (avoids reloading every time) gpu = CudafyHost.GetDevice(eGPUType.OpenCL); gpu.LoadModule(Module); }
public static bool cudaEnable() { if (!isCudaAvailable()) { return(false); } try { CudafyModule km = CudafyTranslator.Cudafy(ARCH); Console.WriteLine("Translator OK"); gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId); Console.WriteLine("GPU OK"); gpu.LoadModule(km); enabled = true; busy = false; return(true); } catch (Exception ex) { errorMessage = ex.ToString(); return(false); } }
static void TempOpenCLVectorAddTest() { int[] inputData1 = new int[N]; int[] inputData2 = new int[N]; int[] inputData3 = new int[N]; int[] outputData = new int[N]; Random rand = new Random(); for (int i = 0; i < N; i++) { inputData1[i] = rand.Next(128); inputData2[i] = rand.Next(128); inputData3[i] = 2; } GPGPU gpu = CudafyHost.GetDevice(eGPUType.Cuda, 0); Console.WriteLine(gpu.GetDeviceProperties().Name); CudafyTranslator.Language = eLanguage.Cuda; var mod = CudafyTranslator.Cudafy(CudafyModes.Architecture, typeof(OpenCLTestClass)); //mod.CudaSourceCode Console.WriteLine(mod.SourceCode); gpu.LoadModule(mod); int[] dev_data1 = gpu.CopyToDevice(inputData1); int[] dev_data2 = gpu.CopyToDevice(inputData2); gpu.CopyToConstantMemory(inputData3, OpenCLTestClass.ConstantMemory); int[] dev_res = gpu.Allocate <int>(N); #warning Work group and local size mess! http://stackoverflow.com/questions/7996537/cl-invalid-work-group-size-error-should-be-solved-though gpu.Launch(2, 512).VectorAdd(dev_data1, dev_data2, dev_res); gpu.CopyFromDevice(dev_res, 0, outputData, 0, N); for (int i = 0; i < N; i++) { Assert.AreEqual((inputData1[i] + inputData2[i]) * inputData3[i], outputData[i], string.Format("Error at {0}", i)); } }
public void ExeTestKernel() { GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, 0); eArchitecture arch = gpu.GetArchitecture(); CudafyModule km = CudafyTranslator.Cudafy(arch); gpu.LoadModule(km); int[] host_results = new int[N]; // Either assign a new block of memory to hold results on device var dev_results = gpu.Allocate <int>(N); // Or fill your array with values first and then for (int i = 0; i < N; i++) { host_results[i] = i * 3; } // Copy array with ints to device var dev_filled_results = gpu.CopyToDevice(host_results); // 64*16 = 1024 threads per block (which is max for sm_30) dim3 threadsPerBlock = new dim3(64, 16); // 8*8 = 64 blocks per grid , just for show so you get varying numbers // 64 blocks * 1024 threads = 65536 // it's useful to align the number of threads with the amount of data (notice int[65536], i.e. 1 thread per int in the array) dim3 blocksPerGrid = new dim3(8, 8); //var threadsPerBlock = 1024; // this will only give you blockDim.x = 1024, .y = 0, .z = 0 //var blocksPerGrid = 1; // just for show gpu.Launch(blocksPerGrid, threadsPerBlock, "GenerateRipples", dev_results, dev_filled_results); gpu.CopyFromDevice(dev_results, host_results); }
public static void Execute() { _gpu = CudafyHost.GetDevice(eGPUType.Cuda); CudafyModule km = CudafyTranslator.Cudafy(ePlatform.Auto, _gpu.GetArchitecture(), typeof(TextInsertion)); Console.WriteLine(km.CompilerOutput); _gpu.LoadModule(km); int[] data = new int[64]; int[] data_d = _gpu.CopyToDevice(data); int[] res_d = _gpu.Allocate(data); int[] res = new int[64]; _gpu.Launch(1, 1, "AHybridMethod", data_d, res_d); _gpu.CopyFromDevice(data_d, res); for (int i = 0; i < 64; i++) { if (data[i] != res[i]) { Console.WriteLine("Failed"); break; } } }
public static void Execute() { var km = CudafyModule.TryDeserialize(); if (km == null || !km.TryVerifyChecksums()) { km = CudafyTranslator.Cudafy(ePlatform.Auto, eArchitecture.sm_20, typeof(ValueB), typeof(ValueA), typeof(StructTest)); km.Serialize(); } GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, 0); gpu.LoadModule(km); var value = new ValueA(); value.valueB = new ValueB(); value.valueB.value = 56; var devOutput = gpu.Allocate <int>(1); gpu.Launch(1, 1, "StructTestKernel", value, devOutput); int output; // copy the array 'c' back from the GPU to the CPU gpu.CopyFromDevice(devOutput, out output); gpu.Free(devOutput); Console.WriteLine("Expected: {0} \t{1}", 56, 56 == output ? "PASSED" : "FAILED"); }
public static int Execute() { CudafyModule km = CudafyTranslator.Cudafy(); GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId); if (gpu is CudaGPU && gpu.GetDeviceProperties().Capability < new Version(1, 2)) { Console.WriteLine("Compute capability 1.2 or higher required for atomics."); return(-1); } gpu.LoadModule(km); byte[] buffer = big_random_block(SIZE); // cudart.dll must be accessible! GPGPUProperties prop = null; try { prop = gpu.GetDeviceProperties(true); } catch (DllNotFoundException) { prop = gpu.GetDeviceProperties(false); } // capture the start time // starting the timer here so that we include the cost of // all of the operations on the GPU. if the data were // already on the GPU and we just timed the kernel // the timing would drop from 74 ms to 15 ms. Very fast. gpu.StartTimer(); // allocate memory on the GPU for the file's data byte[] dev_buffer = gpu.CopyToDevice(buffer); uint[] dev_histo = gpu.Allocate <uint>(256); gpu.Set(dev_histo); // kernel launch - 2x the number of mps gave best timing int blocks = prop.MultiProcessorCount; if (blocks == 0) { blocks = 16; } Console.WriteLine("Processors: {0}", blocks); gpu.Launch(blocks * 2, 256).histo_kernel(dev_buffer, SIZE, dev_histo); uint[] histo = new uint[256]; gpu.CopyFromDevice(dev_histo, histo); // get stop time, and display the timing results float elapsedTime = gpu.StopTimer(); Console.WriteLine("Time to generate: {0} ms", elapsedTime); long histoCount = 0; for (int i = 0; i < 256; i++) { histoCount += histo[i]; } Console.WriteLine("Histogram Sum: {0}", histoCount); // verify that we have the same counts via CPU for (int i = 0; i < SIZE; i++) { histo[buffer[i]]--; } for (int i = 0; i < 256; i++) { if (histo[i] != 0) { Console.WriteLine("Failure at {0}!", i); } } gpu.FreeAll(); return(0); }
public static void prepareGPU() { km = CudafyTranslator.Cudafy(); gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId); gpu.LoadModule(km); }
public static IEnumerable <string> TestOpenCL() { yield return("Attempting to cudafy a kernel function."); //CudafyTranslator.Language = eLanguage.OpenCL; var mod = CudafyTranslator.Cudafy(ePlatform.x64, eArchitecture.OpenCL, null, false, typeof(CUDACheck)); yield return("Successfully translated to OpenCL C."); for (int id = 0; id < CudafyHost.GetDeviceCount(eGPUType.OpenCL); id++) { yield return("Attempting to instantiate OpenCL device object (GPGPU)."); var gpu = CudafyHost.GetDevice(eGPUType.OpenCL, id); yield return(string.Format("Successfully got OpenCL device {0}.", id)); yield return("Name: " + gpu.GetDeviceProperties(false).Name); yield return("Attempting to load module."); gpu.LoadModule(mod); yield return("Successfully loaded module."); yield return("Attempting to transfer data to device."); int[] a = new int[1024]; int[] b = new int[1024]; int[] c = new int[1024]; Random rand = new Random(); for (int i = 0; i < 1024; i++) { a[i] = rand.Next(16384); b[i] = rand.Next(16384); } int[] dev_a = gpu.CopyToDevice(a); int[] dev_b = gpu.CopyToDevice(b); int[] dev_c = gpu.Allocate(c); yield return("Successfully transferred data to device."); yield return("Attempting to launch function on device."); gpu.Launch(4, 256).TestKernelFunction(dev_a, dev_b, dev_c); yield return("Successfully launched function on device."); yield return("Attempting to transfer results back from device."); gpu.CopyFromDevice(dev_c, c); yield return("Successfully transferred results from device."); yield return("Testing results."); int errors = 0; for (int i = 0; i < 1024; i++) { if (a[i] + b[i] != c[i]) { errors++; } } if (errors == 0) { yield return("Successfully tested results.\r\n\r\n"); } else { yield return("Test failed - results not as expected.\r\n\r\n"); } } }
public static char[] Execute(String[] keys, string I, int n) { GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, 0); eArchitecture arch = gpu.GetArchitecture(); CudafyModule km = CudafyTranslator.Cudafy(arch); gpu.LoadModule(km); Stopwatch xxxx = new Stopwatch(); xxxx.Start(); StringSearch abb = new StringSearch(keys); string alphabet = "ABCDEFGHI*KLMN*PQRST*VWXYZ"; int alpha = alphabet.Length; int[,] table1 = new int[StringSearch.nodeCount, alpha]; for (int i = 0; i < StringSearch.nodeCount; i++) { for (int j = 0; j < alpha; j++) { table1[i, j] = -1; } } abb.build_table1(table1, abb._root); char[] input = I.ToCharArray(); int length = I.Length; I = ""; int[] output_table = new int[StringSearch.nodeCount]; abb.build_tableO(output_table, abb._root); abb = new StringSearch(); char[] matched_result = new char[length]; xxxx.Stop(); //CudafyModule km = CudafyModule.TryDeserialize(); //if (km == null || !km.TryVerifyChecksums()) //{ // km = CudafyTranslator.Cudafy(); // km.Serialize(); // gpu.LoadModule(km); //} gpu.SetCurrentContext(); int[] tempas = new int[StringSearch.nodeCount]; int[,] tempbab = new int[StringSearch.nodeCount, alpha]; int[,] table1_d = gpu.Allocate <int>(tempbab); int[] output_table_d = gpu.Allocate <int>(tempas); char[] matched_result_d = gpu.Allocate <char>(length); char[] input_d = gpu.Allocate <char>(length); int[] input_length_d = gpu.Allocate <int>(1); int[] input_length = { length }; gpu.CopyToDevice(table1, table1_d); gpu.CopyToDevice(output_table, output_table_d); gpu.CopyToDevice(matched_result, matched_result_d); gpu.CopyToDevice(input, input_d); gpu.CopyToDevice(input_length, input_length_d); int block = (int)Math.Ceiling((double)length / N); gpu.Launch(block, N).Dot(table1_d, output_table_d, matched_result_d, input_d, input_length_d); gpu.CopyFromDevice(matched_result_d, matched_result); gpu.FreeAll(); return(matched_result); }
public static void Execute() { int n = 2000000; Random r = new Random(); int[] dx = new int[n]; int[] dy = new int[n]; int[] e = new int[n]; int[] eh = new int[n]; // fills massives by random for (int i = 0; i < n; i++) { dx[i] = r.Next(); dy[i] = r.Next(); } double t2 = MeasureTime(() => { for (int i = 0; i < n; i++) { eh[i] = 2 * dy[i] - dx[i]; } }); CudafyModule km = CudafyTranslator.Cudafy(Program.testArchitecture); GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, 0); gpu.LoadModule(km); int[] dev_dx = gpu.Allocate <int>(dx); int[] dev_dy = gpu.Allocate <int>(dy); int[] dev_e = gpu.Allocate <int>(e); double t3 = 0; gpu.CopyToDevice(dx, dev_dx); gpu.CopyToDevice(dy, dev_dy); for (int x = 0; x < 2; x++) { t3 = MeasureTime(() => { //gpu.Launch(1, 1, "calc_e", n, dev_dx, dev_dy, dev_e); //gpu.CopyToDevice(dx, dev_dx); //gpu.CopyToDevice(dy, dev_dy); gpu.Launch(n / 512, 512, "calc_e_v2", n, dev_dx, dev_dy, dev_e); gpu.Synchronize(); //gpu.CopyFromDevice(dev_e, e); }); } double t4 = MeasureTime(() => { gpu.CopyFromDevice(dev_e, e); }); for (int i = 0; i < n; i++) { Debug.Assert(e[i] == eh[i]); } Console.WriteLine(string.Format("n = {0}", n)); Console.WriteLine(string.Format("CPU ::: e = 2 * dy - dx ::: Excecution time: {0} ms", t2 * 1000)); Console.WriteLine(string.Format("CUDA ::: e = 2 * dy - dx ::: Excecution time: {0} ms", t3 * 1000)); //Console.WriteLine(string.Format("CUDA copy to host {0} ms", t4 * 1000)); //Console.ReadKey(); }
/// <summary> /// Применение алгоритма медианного фильтра /// Пример использования /// lock (CudafyFilter.Semaphore) /// { /// CudafyFilter.SetBitmap( bitmap, 3, 1<<12); /// CudafyFilter.MedianFilter(); /// bitmap= CudafyFilter.GetBitmap(); /// } /// </summary> public static void MedianFilter(int gridSize = 0, int blockSize = 0) { CudafyModule km = CudafyTranslator.Cudafy(); GPGPU gpu = CudafyHost.GetDevice(); gpu.LoadModule(km); byte[] devbytesA = gpu.Allocate <byte>(_videoMemorySize); byte[] devbytesB = gpu.Allocate <byte>(_videoMemorySize); byte[] devColor = gpu.Allocate(_color); int gridSize1 = (gridSize > 0) ? gridSize : Math.Min(15, (int)Math.Pow(_frameItemsCount, 0.333333333333)); int blockSize1 = (blockSize > 0) ? blockSize : Math.Min(15, (int)Math.Pow(_frameItemsCount, 0.333333333333)); int gridSize2 = (gridSize > 0) ? gridSize : Math.Min(15, (int)Math.Pow(_frameItemsCount * _itemSize, 0.333333333333)); int blockSize2 = (blockSize > 0) ? blockSize : Math.Min(15, (int)Math.Pow(_frameItemsCount * _itemSize, 0.333333333333)); int gridSize4 = (gridSize > 0) ? gridSize : Math.Min(15, (int) Math.Pow( (_frameItemsCount * ((1 << (_ceilingItemSize - _ceilingMiddleSize)) + _ceilingMiddleSize)), 0.333333333333)); int blockSize4 = (blockSize > 0) ? blockSize : Math.Min(15, (int) Math.Pow( (_frameItemsCount * ((1 << (_ceilingItemSize - _ceilingMiddleSize)) + _ceilingMiddleSize)), 0.333333333333)); // Цикл по цветам RGB - байтам // В видео памяти создаётся фрагмент изображения - фрейм с полями, // который мы в цикле перемещаем по всему изображению // фрейм с полями копируем в видео память // (два соседний фрейма без полей примыкают друг к другу и пересекаются полями) foreach (var pair in new Dictionary <byte[], byte[]> { { _r0, _r1 }, { _g0, _g1 }, { _b0, _b1 } }) { for (int left = 0; left < (_width - _nh); left += _frameWidth - 2 * _nh) { for (int top = 0; top < (_height - _nh); top += _frameHeight - 2 * _nh) { int width = Math.Min(_frameWidth, _width - left); int height = Math.Min(_frameHeight, _height - top); int count = (width - 2 * _nh) * (height - 2 * _nh); Debug.WriteLine("left:" + left + ",top:" + top + ",width:" + width + ",height:" + height + ",count:" + count); // Копирование блока(фрейма) цветового слоя в видео память for (int i = 0; i < width; i++) { for (int j = 0; j < height; j++) { _color[j * width + i] = pair.Key[(top + j) * _width + (left + i)]; } } gpu.CopyToDevice(_color, devColor); // Формирование для каждой внутренней точки фрейма одномерного массива из _n*_n соседних точек gpu.Launch(gridSize2, blockSize2).SelectColorBytes(devbytesA, devColor, _itemSize, count, width, height, _n, _nh); // Выполнение чётно-нечётной сортировки параллельно для всех ранее созданных одномерных массивов // Шаг 1 чётно-нечётной сортировки // Выполнение сортировки слияниями // На выходе отсортированные массивы размера до 1<<(_ceilingItemSize - _ceilingMiddleSize) for (int i = 0; i < _ceilingItemSize - _ceilingMiddleSize; i++) { gpu.Launch(gridSize4, blockSize4) .Merge( ((i & 1) == 0) ? devbytesA : devbytesB, ((i & 1) == 0) ? devbytesB : devbytesA, i, 0, _ceilingItemSize, _itemSize, count); } // Шаг 2 чётно-нечётной сортировки // запускаем задачи сортировки данных в двух соседних блоках // чередуя соседние блоки for (int i = 0; i < (1 << _ceilingMiddleSize); i++) { gpu.Launch(gridSize4, blockSize4) .Merge( ((i & 1) == ((_ceilingItemSize - _ceilingMiddleSize) & 1)) ? devbytesA : devbytesB, ((i & 1) == ((_ceilingItemSize - _ceilingMiddleSize) & 1)) ? devbytesB : devbytesA, _ceilingItemSize - _ceilingMiddleSize, i & 1, _ceilingItemSize, _itemSize, count); } // Выделение средних элементов в массивах и копирование их выходное изображение gpu.Launch(gridSize1, blockSize1).SelectNhBytes(devColor, (((1 << _ceilingMiddleSize) & 1) == ((_ceilingItemSize - _ceilingMiddleSize) & 1)) ? devbytesA : devbytesB, _nhIndex, _itemSize, count, width, height, _n, _nh); gpu.CopyFromDevice(devColor, _color); for (int i = _nh; i < (width - _nh); i++) { for (int j = _nh; j < (height - _nh); j++) { pair.Value[(top + j) * _width + (left + i)] = _color[j * width + i]; } } } } } // free the memory allocated on the GPU gpu.FreeAll(); }
public List <short> GetDelta(Bitmap first, Bitmap second) //getting the 2 images { List <short> deltaList1 = new List <short>(); List <short> deltaList2 = new List <short>(); List <short> deltaList3 = new List <short>(); List <short> deltaList4 = new List <short>(); List <short> deltaList = new List <short>();//creating the list to insert image data Stopwatch stopWatch = Stopwatch.StartNew(); CudafyModule km = CudafyTranslator.Cudafy(); GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId); gpu.LoadModule(km); Console.WriteLine("load Module : " + stopWatch.ElapsedMilliseconds); Bitmap bmp1 = first; Bitmap bmp2 = second; stopWatch.Restart(); Rectangle area1 = new Rectangle(0, 0, bmp1.Width, bmp1.Height); BitmapData bitmapData1 = bmp1.LockBits(area1, ImageLockMode.ReadWrite, PixelFormat.Format32bppRgb); int stride = bitmapData1.Stride; IntPtr ptr1 = bitmapData1.Scan0; int numBytes = Math.Abs(bitmapData1.Stride) * bmp1.Height; byte[] rgbValues1 = new byte[numBytes]; Marshal.Copy(ptr1, rgbValues1, 0, numBytes); Rectangle area2 = new Rectangle(0, 0, bmp2.Width, bmp2.Height); BitmapData bitmapData2 = bmp2.LockBits(area2, ImageLockMode.ReadWrite, PixelFormat.Format32bppRgb); int stride2 = bitmapData2.Stride; IntPtr ptr2 = bitmapData2.Scan0; int numBytes2 = bitmapData2.Stride * bmp2.Height; byte[] rgbValues2 = new byte[numBytes2]; Marshal.Copy(ptr2, rgbValues2, 0, numBytes2); stopWatch.Stop(); Console.WriteLine("copy images to byte array : " + stopWatch.ElapsedMilliseconds); int[] count = new int[2]; count[0] = 0; count[1] = 0; int[] possition = new int[bmp1.Width * bmp1.Height * 2]; byte[] results = new byte[bmp1.Width * bmp1.Height * 4]; int[] width = new int[2]; width[0] = bmp1.Width; width[1] = bmp1.Height; dim3 dimBlock = new dim3(16, 16); int yBlocks = width[0] * 3 / dimBlock.y + ((width[0] * 3 % dimBlock.y) == 0 ? 0 : 1); int xBlocks = width[1] / dimBlock.x + ((width[1] % dimBlock.x) == 0 ? 0 : 1); dim3 dimGrid = new dim3(xBlocks, yBlocks); stopWatch.Restart(); int[] imageWidth = gpu.CopyToDevice <int>(width); int[] dev_count = gpu.CopyToDevice <int>(count); byte[] dev_bitmap1 = gpu.CopyToDevice <byte>(rgbValues1); byte[] dev_bitmap2 = gpu.CopyToDevice <byte>(rgbValues2); byte[] dev_result = gpu.Allocate <byte>(results); int[] dev_possition = gpu.CopyToDevice <int>(possition); stopWatch.Stop(); Console.WriteLine("Copy to GPU : " + stopWatch.ElapsedMilliseconds); stopWatch.Restart(); bmp1.UnlockBits(bitmapData1); bmp2.UnlockBits(bitmapData2); gpu.Launch(128, 1).calGPU(dev_bitmap1, dev_bitmap2, dev_result, imageWidth, dev_count, dev_possition); stopWatch.Stop(); Console.WriteLine("func : " + stopWatch.ElapsedMilliseconds); stopWatch.Restart(); Task.Factory.StartNew(() => { for (int cnt = 0; cnt < possition.Length / 4; cnt++) { deltaList1[6 * cnt + 0] = (short)possition[2 * cnt + 0]; deltaList1[6 * cnt + 1] = (short)possition[2 * cnt + 1]; deltaList1[6 * cnt + 2] = (short)results[4 * cnt + 2]; deltaList1[6 * cnt + 3] = (short)results[4 * cnt + 1]; deltaList1[6 * cnt + 4] = (short)results[4 * cnt + 0]; deltaList1[6 * cnt + 5] = (short)results[4 * cnt + 3]; } }).Wait(); Task.Factory.StartNew(() => { for (int cnt = possition.Length / 4; cnt < possition.Length / 2; cnt++) { deltaList2[6 * cnt + 0] = (short)possition[2 * cnt + 0]; deltaList2[6 * cnt + 1] = (short)possition[2 * cnt + 1]; deltaList2[6 * cnt + 2] = (short)results[4 * cnt + 2]; deltaList2[6 * cnt + 3] = (short)results[4 * cnt + 1]; deltaList2[6 * cnt + 4] = (short)results[4 * cnt + 0]; deltaList2[6 * cnt + 5] = (short)results[4 * cnt + 3]; } }).Wait(); Task.Factory.StartNew(() => { for (int cnt = possition.Length / 2; cnt < 3 * possition.Length / 4; cnt++) { deltaList3[6 * cnt + 0] = (short)possition[2 * cnt + 0]; deltaList3[6 * cnt + 1] = (short)possition[2 * cnt + 1]; deltaList3[6 * cnt + 2] = (short)results[4 * cnt + 2]; deltaList3[6 * cnt + 3] = (short)results[4 * cnt + 1]; deltaList3[6 * cnt + 4] = (short)results[4 * cnt + 0]; deltaList3[6 * cnt + 5] = (short)results[4 * cnt + 3]; } }).Wait(); Task.Factory.StartNew(() => { for (int cnt = 3 * possition.Length / 4; cnt < possition.Length; cnt++) { deltaList4[6 * cnt + 0] = (short)possition[2 * cnt + 0]; deltaList4[6 * cnt + 1] = (short)possition[2 * cnt + 1]; deltaList4[6 * cnt + 2] = (short)results[4 * cnt + 2]; deltaList4[6 * cnt + 3] = (short)results[4 * cnt + 1]; deltaList4[6 * cnt + 4] = (short)results[4 * cnt + 0]; deltaList4[6 * cnt + 5] = (short)results[4 * cnt + 3]; } }).Wait(); stopWatch.Stop(); Console.WriteLine("Copy to the transmotting array: " + stopWatch.ElapsedMilliseconds); gpu.FreeAll(); deltaList = deltaList1.Concat(deltaList2).Concat(deltaList3).Concat(deltaList4).ToList(); return(deltaList); }
public static void BlindPTM(List <double> experimentalSpectrum, double molW, List <ProteinDto> candidateProteinsList, double pepTol, double userHopThreshold, string pepUnit) { var stopwatch = new Stopwatch(); // Data Preperation and Loading GPU Module stopwatch.Start(); var peaks = new List <double>(); var aminoAcidList = new List <string>(); var modificationList = new List <string>(); var startList = new List <double>(); var endList = new List <double>(); foreach (var peak in experimentalSpectrum) { peaks.Add(peak + 1.00727647); peaks.Add(molW - (peak + 1.00727647)); //peaks.Add(peak); //peaks.Add(molW - (peak)); } peaks.Sort(); GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target); CudafyModule km = CudafyModule.TryDeserialize(); if (km == null || !km.TryVerifyChecksums()) { km = CudafyTranslator.Cudafy(); km.Serialize(); } gpu.LoadModule(km); stopwatch.Stop(); Console.WriteLine("Data Preperation: " + stopwatch.Elapsed); // GPU Module stopwatch.Restart(); var lengthSquared = peaks.Count * peaks.Count; var peaksArray = peaks.ToArray(); var lengthOfPeakList = new int[1]; lengthOfPeakList[0] = peaks.Count; var outputArray = new char[peaks.Count, peaks.Count, 37]; var errorArray = new double[peaks.Count, peaks.Count, 37]; var modMassList = ModificationMass; char[,,] outputArrayDevice = gpu.Allocate(outputArray); double[,,] errorArrayDevice = gpu.Allocate(errorArray); double[] peaksDevice = gpu.Allocate <double>(peaksArray.Length); int[] lengthOfPeakListDevice = gpu.Allocate <int>(lengthOfPeakList.Length); double[] ptmMassListDevice = gpu.Allocate <double>(modMassList.Length); gpu.CopyToDevice(peaksArray, peaksDevice); gpu.CopyToDevice(lengthOfPeakList, lengthOfPeakListDevice); gpu.CopyToDevice(ModificationMass, ptmMassListDevice); int block = (int)Math.Ceiling((double)lengthSquared * 37 / N); gpu.Launch(block, N).PtmExtractor(peaksDevice, lengthOfPeakListDevice, ptmMassListDevice, outputArrayDevice, errorArrayDevice); gpu.CopyFromDevice(outputArrayDevice, outputArray); gpu.CopyFromDevice(errorArrayDevice, errorArray); gpu.FreeAll(); for (var i = 0; i < peaks.Count; i++) { for (var j = 0; j < peaks.Count; j++) { for (var k = 0; k < 37; k++) { if (outputArray[i, j, k] == '\0') { continue; } aminoAcidList.Add(ModificationAminoAcids[outputArray[i, j, k]].ToString()); modificationList.Add(ModificationName[outputArray[i, j, k]]); startList.Add(peaks[i]); endList.Add(peaks[j]); } } } stopwatch.Stop(); Console.WriteLine("GPU Generation: " + stopwatch.Elapsed); // PTM Shortlisting stopwatch.Restart(); foreach (var protein in candidateProteinsList) { var sequence = protein.Sequence.ToCharArray(); var hopI = 0; var thrI = 0; var shortlistedAminoAcid = new List <string>(); var shortlistedModification = new List <string>(); var shortlistedEnd = new List <double>(); var shortlistedStart = new List <double>(); var shortlistedIndex = new List <int>(); while (true) { try { if (startList.Count > 0) { if (shortlistedStart.Count > 0) { if (shortlistedEnd[shortlistedEnd.Count - 1] > startList[hopI]) { hopI = hopI + 1; if (hopI == startList.Count) { break; } continue; } } var diff = startList[hopI] - protein.InsilicoDetails.InsilicoMassLeft[thrI]; if (diff <= userHopThreshold && diff >= -userHopThreshold) { if (aminoAcidList[hopI] == sequence[thrI + 2].ToString()) { var temproray = modificationList[hopI].Split('_'); var modMass = AminoAcids.ModificationTable(temproray[0]); //var modMass = AminoAcids.ModTable(modificationList[hopI]); diff = Math.Abs(endList[hopI] - (protein.InsilicoDetails.InsilicoMassLeft[thrI + 1 ] + modMass)); if (string.Compare(pepUnit, "ppm", StringComparison.Ordinal) == 0) { diff = (diff / molW) * 1000000; } else if (string.Compare(pepUnit, "%", StringComparison.Ordinal) == 0) { diff = (diff / molW) * 100; } if (diff < pepTol) { for (var i = thrI + 1; i < protein.InsilicoDetails.InsilicoMassLeft.Count; i++) { protein.InsilicoDetails.InsilicoMassLeft[i] = protein.InsilicoDetails.InsilicoMassLeft[i] + modMass; } protein.Mw = protein.Mw + modMass; shortlistedAminoAcid.Add(aminoAcidList[hopI]); shortlistedModification.Add(modificationList[hopI]); shortlistedEnd.Add(endList[hopI]); shortlistedStart.Add(startList[hopI]); shortlistedIndex.Add(thrI); } } } else if (diff > userHopThreshold) { thrI = thrI + 1; if (thrI == protein.InsilicoDetails.InsilicoMassLeft.Count - 1) { break; } continue; } else if (diff < -userHopThreshold) { hopI = hopI + 1; if (hopI == startList.Count) { break; } continue; } hopI = hopI + 1; if (hopI == startList.Count) { break; } } } catch (Exception exception) { Debug.WriteLine(exception.Message); } } for (var hopIndex = 0; hopIndex < shortlistedStart.Count; hopIndex++) { var site = new PostTranslationModificationsSiteDto { Index = shortlistedIndex[hopIndex], ModName = shortlistedModification[hopIndex], ModWeight = AminoAcids.ModificationTable(shortlistedModification[hopIndex]), Site = Convert.ToChar(shortlistedAminoAcid[hopIndex]) }; protein.PtmParticulars.Add(site); } var massError = Math.Abs(molW - protein.Mw); protein.MwScore = Math.Abs(massError) < 0 ? 1 : Math.Pow(massError, 0.5); } stopwatch.Stop(); Console.WriteLine("Shortlisting :" + stopwatch.Elapsed); }
public static IEnumerable <string> TestCUDASDK() { StringBuilder sb = new StringBuilder(); NvccCompilerOptions nvcc = null; if (IntPtr.Size == 8) { nvcc = NvccCompilerOptions.Createx64(); } else { nvcc = NvccCompilerOptions.Createx86(); } yield return(string.Format("Platform={0}", nvcc.Platform)); yield return("Checking for CUDA SDK at " + nvcc.CompilerPath); if (!nvcc.TryTest()) { yield return("Could not locate CUDA Include directory."); } else { yield return(string.Format("CUDA SDK Version={0}", nvcc.Version)); yield return("Attempting to cudafy a kernel function."); var mod = CudafyTranslator.Cudafy(nvcc.Platform, eArchitecture.sm_11, nvcc.Version, false, typeof(CUDACheck)); yield return("Successfully translated to CUDA C."); yield return("Attempting to compile CUDA C code."); string s = mod.Compile(eGPUCompiler.CudaNvcc, true); yield return("Successfully compiled CUDA C into a module."); if (CudafyHost.GetDeviceCount(eGPUType.Cuda) > 0) { yield return("Attempting to instantiate CUDA device object (GPGPU)."); var gpu = CudafyHost.GetDevice(eGPUType.Cuda, 0); yield return("Successfully got CUDA device 0."); yield return("Attempting to load module."); gpu.LoadModule(mod); yield return("Successfully loaded module."); yield return("Attempting to transfer data to GPU."); int[] a = new int[1024]; int[] b = new int[1024]; int[] c = new int[1024]; Random rand = new Random(); for (int i = 0; i < 1024; i++) { a[i] = rand.Next(16384); b[i] = rand.Next(16384); } int[] dev_a = gpu.CopyToDevice(a); int[] dev_b = gpu.CopyToDevice(b); int[] dev_c = gpu.Allocate(c); yield return("Successfully transferred data to GPU."); yield return("Attempting to launch function on GPU."); gpu.Launch(1, 1024).TestKernelFunction(dev_a, dev_b, dev_c); yield return("Successfully launched function on GPU."); yield return("Attempting to transfer results back from GPU."); gpu.CopyFromDevice(dev_c, c); yield return("Successfully transferred results from GPU."); yield return("Testing results."); int errors = 0; for (int i = 0; i < 1024; i++) { if (a[i] + b[i] != c[i]) { errors++; } } if (errors == 0) { yield return("Successfully tested results."); } else { yield return("Test failed - results not as expected."); } yield return("Checking for math libraries (FFT, BLAS, SPARSE, RAND)."); var fft = GPGPUFFT.Create(gpu); int version = fft.GetVersion(); if (version > 0) { yield return("Successfully detected."); } } } }
public static void Execute() { GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, 0); eArchitecture arch = Program.testArchitecture; CudafyModule km = CudafyTranslator.Cudafy(arch); gpu.LoadModule(km); int[] a = new int[N]; int[] b = new int[N]; int[] c = new int[N]; // allocate the memory on the GPU int[] dev_a = gpu.Allocate <int>(a); int[] dev_b = gpu.Allocate <int>(b); int[] dev_c = gpu.Allocate <int>(c); // fill the arrays 'a' and 'b' on the CPU for (int i = 0; i < N; i++) { a[i] = i; b[i] = 2 * i; } for (int l = 0; l < km.Functions.Count; l++) { string function = "add_" + l.ToString(); Console.WriteLine(function); // copy the arrays 'a' and 'b' to the GPU gpu.CopyToDevice(a, dev_a); gpu.CopyToDevice(b, dev_b); gpu.Launch(128, 1, function, dev_a, dev_b, dev_c); // copy the array 'c' back from the GPU to the CPU gpu.CopyFromDevice(dev_c, c); // verify that the GPU did the work we requested bool success = true; for (int i = 0; i < N; i++) { if ((a[i] + b[i]) != c[i]) { Console.WriteLine("{0} + {1} != {2}", a[i], b[i], c[i]); success = false; break; } } if (success) { Console.WriteLine("We did it!"); } } // free the memory allocated on the GPU gpu.Free(dev_a); gpu.Free(dev_b); gpu.Free(dev_c); // free the memory we allocated on the CPU // Not necessary, this is .NET }
public static TestOutput CorrectColour(ForeGroundStrucuture[] foregorungRGB_CPU, BackGroundStrucuture[] BackgroundXYZ_CPU) { //rgb = System.Drawing.Color.FromArgb(69, 77, 217); //X = 0.0630982813175294; //Y = 0.616476271122916; //Z = 0.667048468232457; const int image_size = 1024 * 768; //cuda intializer CudafyModule km = CudafyModule.TryDeserialize(); if (km == null || !km.TryVerifyChecksums()) { // km = CudafyTranslator.Cudafy((typeof(ForeGroundStrucuture)), (typeof(BackGroundStrucuture)), typeof(Color)); km = CudafyTranslator.Cudafy((typeof(ProfileStrucuture)), (typeof(ForeGroundStrucuture)), (typeof(BackGroundStrucuture)), (typeof(SampleStructure)), typeof(quick_corr)); km.TrySerialize(); } CudafyTranslator.GenerateDebug = true; // cuda or emulator GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId); //GPGPU gpu = CudafyHost.GetDevice(eGPUType.Emulator); Console.WriteLine("Running quick correction using {0}", gpu.GetDeviceProperties(false).Name); gpu.LoadModule(km); ForeGroundStrucuture[] distance_CPU = new ForeGroundStrucuture[image_size]; // allocate memory on the GPU for the bitmap (same size as ptr) #region DataTable profile = new DataTable(); try { // add the csv bin file using (GenericParserAdapter parser = new GenericParserAdapter(@"C:\lev\STColorCorrection\Data\PROFILE\p3700.csv")) { System.Data.DataSet dsResult = parser.GetDataSet(); profile = dsResult.Tables[0]; } } catch (Exception ex) { Console.WriteLine(ex); } #endregion // allocate temp memory, initialize it, copy to constant memory on the GPU // L 0-21 A 0-41 B 0-45 ProfileStrucuture[, ,] profiles_CPU = new ProfileStrucuture[21, 41, 45]; SampleStructure[,] samples_CPU = new SampleStructure[image_size, 6]; FGLookupStructure[, ,] fg_loopup_CPU = new FGLookupStructure[256, 256, 256]; //profile inicialization #region for (int indexL = 0; indexL < 21; indexL++) { for (int indexA = 0; indexA < 41; indexA++) { for (int indexB = 0; indexB < 45; indexB++) { profiles_CPU[indexL, indexA, indexB].L = indexL; profiles_CPU[indexL, indexA, indexB].A = indexA; profiles_CPU[indexL, indexA, indexB].B = indexB; //profiles_CPU[indexL, indexA, indexB].Given_R = 0; //profiles_CPU[indexL, indexA, indexB].Given_G = 0; //profiles_CPU[indexL, indexA, indexB].Given_B = 0; profiles_CPU[indexL, indexA, indexB].ML = 0; profiles_CPU[indexL, indexA, indexB].MA = 0; profiles_CPU[indexL, indexA, indexB].MB = 0; profiles_CPU[indexL, indexA, indexB].MX = 0; profiles_CPU[indexL, indexA, indexB].MY = 0; profiles_CPU[indexL, indexA, indexB].MZ = 0; profiles_CPU[indexL, indexA, indexB].distance = -1.0; profiles_CPU[indexL, indexA, indexB].weight = -1.0; profiles_CPU[indexL, indexA, indexB].isempty = TRUE; profiles_CPU[indexL, indexA, indexB].isMoreAccurateThanOrigin = FALSE; } } } int lvalue, avalue, bvalue; try { for (int i = 1; i < profile.Rows.Count; i++) { lvalue = Convert.ToInt32(profile.Rows[i][0].ToString()); avalue = Convert.ToInt32(profile.Rows[i][1].ToString()); bvalue = Convert.ToInt32(profile.Rows[i][2].ToString()); lvalue = (int)(lvalue * 0.2); avalue = (int)(avalue * 0.2) + 20; bvalue = (int)(bvalue * 0.2) + 22; profiles_CPU[lvalue, avalue, bvalue].L = lvalue; profiles_CPU[lvalue, avalue, bvalue].A = avalue; profiles_CPU[lvalue, avalue, bvalue].B = bvalue; //profiles_CPU[lvalue, avalue, bvalue].Given_R = (byte)Convert.ToByte(profile.Rows[i][9].ToString()); //profiles_CPU[lvalue, avalue, bvalue].Given_G = (byte)Convert.ToByte(profile.Rows[i][10].ToString()); //profiles_CPU[lvalue, avalue, bvalue].Given_B = (byte)Convert.ToByte(profile.Rows[i][11].ToString()); profiles_CPU[lvalue, avalue, bvalue].ML = (double)Convert.ToDouble(profile.Rows[i][3].ToString()); profiles_CPU[lvalue, avalue, bvalue].MA = (double)Convert.ToDouble(profile.Rows[i][4].ToString()); profiles_CPU[lvalue, avalue, bvalue].MB = (double)Convert.ToDouble(profile.Rows[i][5].ToString()); profiles_CPU[lvalue, avalue, bvalue].MX = (double)Convert.ToDouble(profile.Rows[i][6].ToString()); profiles_CPU[lvalue, avalue, bvalue].MY = (double)Convert.ToDouble(profile.Rows[i][7].ToString()); profiles_CPU[lvalue, avalue, bvalue].MZ = (double)Convert.ToDouble(profile.Rows[i][8].ToString()); profiles_CPU[lvalue, avalue, bvalue].isempty = FALSE; } } catch (Exception ex) { Console.WriteLine(ex); } #endregion //fg lookup inicialization #region for (int r = 0; r < 255; r++) { for (int g = 0; g < 255; g++) { for (int b = 0; b < 255; b++) { Point3D foregroundLAB = ToLAB(new ForeGroundStrucuture((byte)r, (byte)g, (byte)b)); int binL = ((int)Math.Round(foregroundLAB.X / 5.0)) * 5; int binA = ((int)Math.Round(foregroundLAB.Y / 5.0)) * 5; int binB = ((int)Math.Round(foregroundLAB.Z / 5.0)) * 5; if (binL > 100) { binL = 100; } if (binA < -86.17385493791946) { binA = -85; } if (binA > 98.2448002875424) { binA = 100; } if (binB < -107.8619171648283) { binB = -110; } if (binB > 94.47705120353054) { binB = 95; } fg_loopup_CPU[r, g, b].L = (int)(binL * 0.2) + 0; fg_loopup_CPU[r, g, b].A = (int)(binA * 0.2) + 20; fg_loopup_CPU[r, g, b].B = (int)(binB * 0.2) + 22; } } } #endregion //grab the colors ProfileStrucuture[, ,] profile_GPU = gpu.CopyToDevice(profiles_CPU); SampleStructure[,] samples_GPU = gpu.CopyToDevice(samples_CPU); FGLookupStructure[, ,] fg_loopup_GPU = gpu.CopyToDevice(fg_loopup_CPU); //begin execution // capture the start time gpu.StartTimer(); ForeGroundStrucuture[] foregorungRGB_GPU = gpu.CopyToDevice(foregorungRGB_CPU); BackGroundStrucuture[] BackgroundXYZ_GPU = gpu.CopyToDevice(BackgroundXYZ_CPU); //out put ForeGroundStrucuture[] distance_GPU = gpu.Allocate(distance_CPU); // generate a bitmap from our sphere data //Image size: 1024 x 768 dim3 grids = new dim3(1024 / 16, 768 / 16); dim3 threads = new dim3(16, 16); //dim3 grids = new dim3(1, 1); //dim3 threads = new dim3(1, 1); //quick_correct //gpu.Launch(grids, threads, ((Action<GThread, ProfileStrucuture[, ,], ForeGroundStrucuture[], BackGroundStrucuture[], ProfileStrucuture[], SampleStructure[,]>)QuickCorr), profile_GPU, foregorungRGB_GPU, BackgroundXYZ_GPU, distance_GPU, samples_GPU); //quick correct - testing //gpu.Launch(grids, threads, ((Action<GThread, ProfileStrucuture[, ,], ForeGroundStrucuture[], BackGroundStrucuture[], ForeGroundStrucuture[], SampleStructure[,]>)QuickCorr), profile_GPU, foregorungRGB_GPU, BackgroundXYZ_GPU, distance_GPU, samples_GPU); gpu.Launch(grids, threads, ((Action <GThread, ProfileStrucuture[, , ], ForeGroundStrucuture[], BackGroundStrucuture[], ForeGroundStrucuture[], SampleStructure[, ], FGLookupStructure[, , ]>)QuickCorr), profile_GPU, foregorungRGB_GPU, BackgroundXYZ_GPU, distance_GPU, samples_GPU, fg_loopup_GPU); // copy our bitmap back from the GPU for display gpu.CopyFromDevice(distance_GPU, distance_CPU); // get stop time, and display the timing results double elapsedTime = gpu.StopTimer(); TestOutput to_return = new TestOutput(); to_return.output_image = distance_CPU; to_return.timeTaken = elapsedTime; Console.WriteLine("Time to generate: {0} ms", elapsedTime); gpu.Free(foregorungRGB_GPU); gpu.Free(BackgroundXYZ_GPU); gpu.Free(distance_GPU); gpu.FreeAll(); return(to_return); }
private void InitializeGPUs() { eGPUType[] gpuTypes = new eGPUType[] { eGPUType.Cuda, eGPUType.OpenCL, eGPUType.Emulator }; eLanguage[] languages = new eLanguage[] { eLanguage.Cuda, eLanguage.OpenCL }; foreach (eGPUType gpuType in gpuTypes) { try { int numberOfAvailableDevices = CudafyHost.GetDeviceCount(gpuType); for (int deviceNumber = 0; deviceNumber < numberOfAvailableDevices; deviceNumber++) { GPGPU gpgpu = CudafyHost.GetDevice(gpuType, deviceNumber); GPGPUProperties gpgpuProperties = gpgpu.GetDeviceProperties(true); CudafyModes.Target = gpuType; foreach (eLanguage language in languages) { string cudaRandomFilename = Path.GetRandomFileName(); try { CudafyTranslator.Language = language; CompileProperties compileProperties = CompilerHelper.Create(ePlatform.Auto, eArchitecture.Unknown, eCudafyCompileMode.Default, CudafyTranslator.WorkingDirectory, CudafyTranslator.GenerateDebug); // Use a random filename to prevent conflict on default temp file when multithreading (unit tests) compileProperties.InputFile = cudaRandomFilename; // If this line fails with NCrunch/Unit tests, there probably is a new version of Cudafy.NET // and it needs to be registered in the GAC like this: gacutil -i Cudafy.NET.dll CudafyModule cudafyModule = CudafyTranslator.Cudafy(compileProperties, typeof(Primitives)); if (!gpgpu.IsModuleLoaded(cudafyModule.Name)) { gpgpu.LoadModule(cudafyModule); } gpgpu.EnableMultithreading(); string gpuName = gpgpuProperties.Name.Trim() + " - " + gpuType.ToString() + " - " + language.ToString(); ////this.gpgpus.Add(gpuName, gpgpu); ////this.gpgpuProperties.Add(gpuName, gpgpuProperties); ////this.gpuTypes.Add(gpuName, gpuType); } catch (CudafyCompileException) { // Language not supported } finally { File.Delete(cudaRandomFilename); // ncrunch: no coverage start } } } } catch (DllNotFoundException) { } catch (InvalidOperationException) { // Language not supported } catch (Cloo.ComputeException) { // Language not supported } // ncrunch: no coverage end } }
public void SetUp() { _cm = CudafyTranslator.Cudafy(eArchitecture.sm_20);//typeof(RelectorAddInFunctionsTests)); }
public void Test_smartCopyToDevice() { if (_gpu is OpenCLDevice) { Console.WriteLine("Device not supporting smart copy, so skip."); return; } var mod = CudafyModule.TryDeserialize(); if (mod == null || !mod.TryVerifyChecksums()) { mod = CudafyTranslator.Cudafy(CudafyModes.Architecture); mod.Serialize(); } _gpu.LoadModule(mod); _gpuuintBufferIn = _gpu.Allocate <uint>(N); _gpuuintBufferOut = _gpu.Allocate <uint>(N); int batchSize = 8; int loops = 6; Stopwatch sw = Stopwatch.StartNew(); for (int x = 0; x < loops; x++) { for (int i = 0; i < batchSize; i++) { _gpu.CopyToDevice(_uintBufferIn, 0, _gpuuintBufferIn, 0, N); _gpu.Launch(N / 512, 512, "DoubleAllValues", _gpuuintBufferIn, _gpuuintBufferOut); _gpu.CopyFromDevice(_gpuuintBufferOut, 0, _uintBufferOut, 0, N); } } long time = sw.ElapsedMilliseconds; Console.WriteLine(time); IntPtr[] stagingPostIn = new IntPtr[batchSize]; IntPtr[] stagingPostOut = new IntPtr[batchSize]; for (int i = 0; i < batchSize; i++) { stagingPostIn[i] = _gpu.HostAllocate <uint>(N); stagingPostOut[i] = _gpu.HostAllocate <uint>(N); } _gpu.EnableSmartCopy(); sw.Restart(); for (int x = 0; x < loops; x++) { for (int i = 0; i < batchSize; i++) { _gpu.CopyToDeviceAsync(_uintBufferIn, 0, _gpuuintBufferIn, 0, N, i + 1, stagingPostIn[i]); } for (int i = 0; i < batchSize; i++) { _gpu.LaunchAsync(N / 256, 256, i + 1, "DoubleAllValues", _gpuuintBufferIn, _gpuuintBufferOut); } for (int i = 0; i < batchSize; i++) { _gpu.CopyFromDeviceAsync(_gpuuintBufferOut, 0, _uintBufferOut, 0, N, i + 1, stagingPostOut[i]); } for (int i = 0; i < batchSize; i++) { _gpu.SynchronizeStream(i + 1); } //for (int i = 0; i < batchSize; i++) //{ // _gpu.CopyToDeviceAsync(stagingPostIn[i], 0, _gpuuintBufferIn, 0, N, i+1); // _gpu.LaunchAsync(N / 512, 512, i + 1, "DoubleAllValues", _gpuuintBufferIn, _gpuuintBufferOut); // _gpu.CopyFromDeviceAsync(_gpuuintBufferOut, 0, stagingPostOut[i], 0, N, i + 1); //} for (int i = 0; i < batchSize; i++) { _gpu.SynchronizeStream(i + 1); } } time = sw.ElapsedMilliseconds; Console.WriteLine(time); _gpu.DisableSmartCopy(); for (int i = 0; i < N; i++) { _uintBufferIn[i] *= 2; } Assert.IsTrue(Compare(_uintBufferIn, _uintBufferOut)); ClearOutputsAndGPU(); }
public override void VTrain(VMatrix features, VMatrix labels, double[] colMin, double[] colMax) { if ((m_lCount == null) || (m_lCount.Length < 3)) { m_lCount = new int[3] { 0, features.Cols() * 2, 0 }; } List <Node> nodes = new List <Node>(); // add the input nodes m_lCount[0] = features.Cols(); for (var n = 0; n < m_lCount[0]; n++) { nodes.Add(new Node(-1, -1, 0, 0, 0)); } int numWeights = m_lCount[0] + 1; int wBegIdx = 0; // add the nodes for the hidden layers for (var layer = 1; layer < m_lCount.Length - 1; layer++) { for (var n = 0; n < m_lCount[layer]; n++) { nodes.Add(new Node(wBegIdx, wBegIdx + numWeights - 1, 0, 0, 0)); wBegIdx += numWeights; } numWeights = m_lCount[layer] + 1; } // figure out how many outputs we need int oCount = 0; for (var col = 0; col < labels.Cols(); col++) { var labelValueCount = labels.ValueCount(col); if (labelValueCount < 2) { // continuous oCount++; } else { oCount += labelValueCount; } } // update the layer arrays m_lCount[m_lCount.Length - 1] = oCount; m_lBegIdx = new int[m_lCount.Length]; for (var i = 0; i < m_lCount.Length; i++) { if (i == 0) { m_lBegIdx[i] = 0; } else { m_lBegIdx[i] = m_lBegIdx[i - 1] + m_lCount[i - 1]; } } // add the output nodes for (var col = 0; col < labels.Cols(); col++) { var labelValueCount = labels.ValueCount(col); if (labelValueCount < 2) { // continuous nodes.Add(new Node(wBegIdx, wBegIdx + numWeights - 1, 1, col, -1)); wBegIdx += numWeights; } else { for (var n = 0; n < labelValueCount; n++) { nodes.Add(new Node(wBegIdx, wBegIdx + numWeights - 1, 0, col, n)); wBegIdx += numWeights; } } } m_nodes = nodes.ToArray(); // create the weights m_weights = new double[wBegIdx]; m_bestWeights = new double[wBegIdx]; m_deltas = new double[wBegIdx]; for (var i = 0; i < wBegIdx; i++) { m_weights[i] = (double)(0.1 - (m_rand.NextDouble() * 0.2)); m_bestWeights[i] = m_weights[i]; m_deltas[i] = 0; } //m_weights[0] = 1.0; //m_weights[1] = 0.5; //m_weights[2] = 0; //m_weights[3] = 1.2; //m_weights[4] = 0.5; //m_weights[5] = 0.5; //m_weights[6] = 0.1; //m_weights[7] = -0.8; //m_weights[8] = -1.3; if (!string.IsNullOrEmpty(OutputFileName)) { m_outputFile = File.AppendText(OutputFileName); } int trainSize = (int)(0.75 * features.Rows()); double[,] trainFeatures = new double[trainSize, features.Cols()]; for (int r = 0; r < trainSize; r++) { for (int c = 0; c < features.Cols(); c++) { trainFeatures[r, c] = features.Get(r, c); } } double[,] trainLabels = new double[trainSize, labels.Cols()]; for (int r = 0; r < trainSize; r++) { for (int c = 0; c < labels.Cols(); c++) { trainLabels[r, c] = labels.Get(r, c); } } int[] fIdx = new int[trainSize]; for (int i = 0; i < fIdx.Length; i++) { fIdx[i] = i; } VMatrix validationFeatures = new VMatrix(features, trainSize, 0, features.Rows() - trainSize, features.Cols()); VMatrix validationLabels = new VMatrix(labels, trainSize, 0, labels.Rows() - trainSize, labels.Cols()); int epoch = 0; // current epoch number int bestEpoch = 0; // epoch number of best MSE int eCount = 0; // number of epochs since the best MSE bool checkDone = false; // if true, check to see if we're done double bestMSE = double.MaxValue; // best validation MSE so far double bestAccuracy = double.MaxValue; // best validationa accuracy so far Console.WriteLine("Epoch\tMSE (validation)\taccuracy (validation)"); if (m_outputFile != null) { m_outputFile.Write("Layers: "); for (var l = 0; l < m_lCount.Length - 1; l++) { m_outputFile.Write(m_lCount[l]); m_outputFile.Write('x'); } m_outputFile.WriteLine(m_lCount[m_lCount.Length - 1]); m_outputFile.WriteLine("Momentum: " + m_momentum); m_outputFile.WriteLine(); m_outputFile.WriteLine("Weights"); PrintWeights(); m_outputFile.WriteLine("Epoch\tMSE (validation)\taccuracy (validation)"); } CudafyModule km = CudafyTranslator.Cudafy(); GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId); gpu.LoadModule(km); for (; ;) { // shuffle the training set Shuffle(ref fIdx, m_rand); double[,] g_trainFeatures = gpu.CopyToDevice(trainFeatures); double[,] g_trainLabels = gpu.CopyToDevice(trainLabels); int[] g_fIdx = gpu.CopyToDevice(fIdx); int[] g_lCount = gpu.CopyToDevice(m_lCount); int[] g_lBegIdx = gpu.CopyToDevice(m_lBegIdx); Node[] g_nodes = gpu.CopyToDevice(m_nodes); double[] g_weights = gpu.CopyToDevice(m_weights); double[] g_deltas = gpu.CopyToDevice(m_deltas); //// Launch trainSize blocks of 1 thread each gpu.Launch(trainSize / 256, 256).TrainEpoch(g_trainFeatures, g_trainLabels, g_fIdx, g_lCount, g_lBegIdx, g_nodes, g_weights, g_deltas, m_rate, m_momentum); //// copy the arrays back from the GPU to the CPU gpu.CopyFromDevice(g_weights, m_weights); gpu.CopyFromDevice(g_deltas, m_deltas); gpu.CopyFromDevice(g_fIdx, fIdx); // free the memory allocated on the GPU gpu.FreeAll(); //TrainEpoch(trainFeatures, trainLabels, fIdx, m_lCount, m_lBegIdx, m_nodes, ref m_weights, ref m_deltas, m_rate, m_momentum, ref trainMSE); // check the MSE after this epoch double mse = VGetMSE(validationFeatures, validationLabels); // check the validation accuracy after this epoch double accuracy = VMeasureAccuracy(validationFeatures, validationLabels, null); Console.WriteLine(string.Format("{0}-{1}\t{2}\t{3}", epoch, eCount, mse, accuracy)); if (m_outputFile != null) { m_outputFile.WriteLine(string.Format("{0}-{1}\t{2}\t{3}", epoch, eCount, mse, accuracy)); m_outputFile.Flush(); } if ((mse == 0.0) || (epoch > 10000)) { break; } else if ((epoch == 1) || (mse < bestMSE)) { if (epoch == 1) { // save the initial MSE bestMSE = mse; } else if ((mse / bestMSE) > 0.99) { if (!checkDone) { checkDone = true; eCount = 0; } } else { checkDone = false; eCount = 0; } // save the best for later bestMSE = mse; bestAccuracy = accuracy; bestEpoch = epoch; SaveBestWeights(); } else if (!checkDone) { checkDone = true; eCount = 0; } if (checkDone) { // check to see if we're done eCount++; if (eCount >= 20) { break; } } } ; if (m_outputFile != null) { m_outputFile.WriteLine(); m_outputFile.WriteLine("Weights"); PrintWeights(); } if ((bestEpoch > 0) && (bestEpoch != epoch)) { RestoreBestWeights(); if (m_outputFile != null) { m_outputFile.WriteLine(); m_outputFile.WriteLine(string.Format("Best Weights (from Epoch {0}, valMSE={1}, valAcc={2})", bestEpoch, bestMSE, bestAccuracy)); PrintWeights(); } } if (m_outputFile != null) { m_outputFile.Close(); } }
static void Main(string[] args) { try { CudafyModule km = CudafyModule.TryDeserialize(); if (km == null || !km.TryVerifyChecksums()) { km = CudafyTranslator.Cudafy(); km.TrySerialize(); } CudafyTranslator.GenerateDebug = true; // cuda or emulator GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId); gpu.LoadModule(km); //set up color profile to have a measure LAB lookup working #region Matrix3D navigationMatrix = new Matrix3D(); navigationMatrix.Translate(new Vector3D(0, 100, 110)); navigationMatrix.Scale(new Vector3D((double)1 / 5, (double)1 / 5, (double)1 / 5)); //2- Load the profile in a three dimensional array Bin[, ,] p3700 = new Bin[RANGEL, RANGEA, RANGEB]; for (int l = 0; l < RANGEL; l++) { for (int a = 0; a < RANGEA; a++) { for (int b = 0; b < RANGEB; b++) { p3700[l, a, b] = new Bin(l, a, b); } } } try { // add the csv bin file using (GenericParserAdapter parser = new GenericParserAdapter(@"C:\lev\STColorCorrection\Data\PROFILE\p3700.csv")) { System.Data.DataSet dsResult = parser.GetDataSet(); profile = dsResult.Tables[0]; } } catch { } // #region for (int i = 1; i < profile.Rows.Count; i++) { //lab vale as got form profile index Point3D labBin = new Point3D(); labBin.X = Convert.ToDouble(profile.Rows[i][0].ToString()); labBin.Y = Convert.ToDouble(profile.Rows[i][1].ToString()); labBin.Z = Convert.ToDouble(profile.Rows[i][2].ToString()); //trasfered points Point3D labCoordinate = navigationMatrix.Transform(labBin); if (labCoordinate.X == 20 && labCoordinate.Y == 20 && labCoordinate.Z == 22) { Console.WriteLine("empty"); } //gets the bin to fill up Bin actualBin = GetProfileBin(p3700, labCoordinate); //bin RGB Value actualBin.binRGB.X = Convert.ToByte(profile.Rows[i][9].ToString()); actualBin.binRGB.Y = Convert.ToByte(profile.Rows[i][10].ToString()); actualBin.binRGB.Z = Convert.ToByte(profile.Rows[i][11].ToString()); //Measure Lab Values actualBin.measuredLAB.X = Convert.ToDouble(profile.Rows[i][3].ToString()); actualBin.measuredLAB.Y = Convert.ToDouble(profile.Rows[i][4].ToString()); actualBin.measuredLAB.Z = Convert.ToDouble(profile.Rows[i][5].ToString()); //measured XYZ Values actualBin.measuredXYZ.X = Convert.ToDouble(profile.Rows[i][6].ToString()); actualBin.measuredXYZ.Y = Convert.ToDouble(profile.Rows[i][7].ToString()); actualBin.measuredXYZ.Z = Convert.ToDouble(profile.Rows[i][8].ToString()); //is empty check actualBin.isEmpty = false; } #endregion //CVS FILE CREATING AND INICIALIZATION #region //create the CSV file CsvFileWriter output_file_1 = new CsvFileWriter(@"C:\lev\STColorCorrection\Data\CUDA performance analysis\out_file1.csv"); CsvFileWriter output_file_2 = new CsvFileWriter(@"C:\lev\STColorCorrection\Data\CUDA performance analysis\out_file2.csv"); //create the header CsvRow header = new CsvRow(); header.Add("R_fg_in"); header.Add("G_fg_in"); header.Add("B_fg_in"); header.Add("L_fg_in"); header.Add("A_fg_in"); header.Add("B_fg_in"); header.Add("X_bg_in"); header.Add("Y_bg_in"); header.Add("Z_bg_in"); header.Add("BF_Dist"); header.Add("Cuda BF time"); header.Add("QC_Dist"); header.Add("Cuda QC time"); header.Add("Snake_Dist"); header.Add("Cuda Snake time"); header.Add("DecreaseStep_DS"); header.Add("Cuda DS time"); output_file_1.WriteRow(header); header = new CsvRow(); header.Add("R_fg_in"); header.Add("G_fg_in"); header.Add("B_fg_in"); header.Add("L_fg_in"); header.Add("A_fg_in"); header.Add("B_fg_in"); header.Add("X_bg_in"); header.Add("Y_bg_in"); header.Add("Z_bg_in"); header.Add("BF_Dist"); header.Add("R_fg_out"); header.Add("G_fg_out"); header.Add("B_fg_out"); header.Add("QC_Dist"); header.Add("R_fg_out"); header.Add("G_fg_out"); header.Add("B_fg_out"); header.Add("Snake_Dist"); header.Add("R_fg_out"); header.Add("G_fg_out"); header.Add("B_fg_out"); header.Add("DecreaseStep_DS"); header.Add("R_fg_out"); header.Add("G_fg_out"); header.Add("B_fg_out"); output_file_2.WriteRow(header); //write the header to the CSV file #endregion Random randomGenerater = new Random(); for (int num_colors = 0; num_colors < 500; num_colors++) { //create a new csv row CsvRow new_row_file_1 = new CsvRow(); CsvRow new_row_file_2 = new CsvRow(); //colour selection Byte[] rgb = new Byte[3]; randomGenerater.NextBytes(rgb); System.Drawing.Color foreground = System.Drawing.Color.FromArgb(rgb[0], rgb[1], rgb[2]); Point3D backgroundCIEXYZ = new Point3D(0, 0, 0); backgroundCIEXYZ.X = randomGenerater.NextDouble() * 0.9504; backgroundCIEXYZ.Y = randomGenerater.NextDouble() * 1.0000; backgroundCIEXYZ.Z = randomGenerater.NextDouble() * 1.0888; Point3D background = new Point3D(backgroundCIEXYZ.X, backgroundCIEXYZ.Y, backgroundCIEXYZ.Z); Bin foregroundBin = FindForegroundBin(p3700, navigationMatrix, foreground); PerceptionLib.Color foregroundLAB = new PerceptionLib.Color(); foregroundLAB.LA = foregroundBin.measuredLAB.X; foregroundLAB.A = foregroundBin.measuredLAB.Y; foregroundLAB.B = foregroundBin.measuredLAB.Z; //write the input colors #region new_row_file_1.Add(foreground.R.ToString()); new_row_file_1.Add(foreground.G.ToString()); new_row_file_1.Add(foreground.B.ToString()); new_row_file_1.Add(foregroundLAB.LA.ToString()); new_row_file_1.Add(foregroundLAB.A.ToString()); new_row_file_1.Add(foregroundLAB.B.ToString()); new_row_file_1.Add(background.X.ToString()); new_row_file_1.Add(background.Y.ToString()); new_row_file_1.Add(background.Z.ToString()); new_row_file_2.Add(foreground.R.ToString()); new_row_file_2.Add(foreground.G.ToString()); new_row_file_2.Add(foreground.B.ToString()); new_row_file_2.Add(foregroundLAB.LA.ToString()); new_row_file_2.Add(foregroundLAB.A.ToString()); new_row_file_2.Add(foregroundLAB.B.ToString()); new_row_file_2.Add(background.X.ToString()); new_row_file_2.Add(background.Y.ToString()); new_row_file_2.Add(background.Z.ToString()); #endregion //get the brute force values Color.TestingStructure[] results_brute_force = Color.CorrectColour(foreground, background.X, background.Y, background.Z); new_row_file_1.Add(results_brute_force[0].distance.ToString()); new_row_file_1.Add(results_brute_force[0].execution_time.ToString()); Point3D labBin = new Point3D(); labBin.X = results_brute_force[0].Given_R; labBin.Y = results_brute_force[0].Given_G; labBin.Z = results_brute_force[0].Given_B; Bin actualBin = GetProfileBin(p3700, labBin); new_row_file_2.Add(results_brute_force[0].distance.ToString()); new_row_file_2.Add(actualBin.binRGB.X.ToString()); new_row_file_2.Add(actualBin.binRGB.Y.ToString()); new_row_file_2.Add(actualBin.binRGB.Z.ToString()); quick_corr.TestingStructure[] results_quick_corr = quick_corr.CorrectColour(foreground, background.X, background.Y, background.Z); new_row_file_1.Add(results_quick_corr[0].distance.ToString()); new_row_file_1.Add(results_quick_corr[0].execution_time.ToString()); labBin = new Point3D(); labBin.X = results_quick_corr[0].Given_R; labBin.Y = results_quick_corr[0].Given_G; labBin.Z = results_quick_corr[0].Given_B; actualBin = GetProfileBin(p3700, labBin); new_row_file_2.Add(results_quick_corr[0].distance.ToString()); new_row_file_2.Add(actualBin.binRGB.X.ToString()); new_row_file_2.Add(actualBin.binRGB.Y.ToString()); new_row_file_2.Add(actualBin.binRGB.Z.ToString()); snake.TestingStructure[] results_snake = snake.CorrectColour(foreground, background.X, background.Y, background.Z); new_row_file_1.Add(results_snake[0].distance.ToString()); new_row_file_1.Add(results_snake[0].execution_time.ToString()); labBin = new Point3D(); labBin.X = results_snake[0].Given_R; labBin.Y = results_snake[0].Given_G; labBin.Z = results_snake[0].Given_B; actualBin = GetProfileBin(p3700, labBin); new_row_file_2.Add(results_snake[0].distance.ToString()); new_row_file_2.Add(actualBin.binRGB.X.ToString()); new_row_file_2.Add(actualBin.binRGB.Y.ToString()); new_row_file_2.Add(actualBin.binRGB.Z.ToString()); half_step.TestingStructure[] results_half_step = half_step.CorrectColour(foreground, background.X, background.Y, background.Z); new_row_file_1.Add(results_half_step[0].distance.ToString()); new_row_file_1.Add(results_half_step[0].execution_time.ToString()); labBin = new Point3D(); labBin.X = results_half_step[0].Given_R; labBin.Y = results_half_step[0].Given_G; labBin.Z = results_half_step[0].Given_B; actualBin = GetProfileBin(p3700, labBin); new_row_file_2.Add(results_half_step[0].distance.ToString()); new_row_file_2.Add(actualBin.binRGB.X.ToString()); new_row_file_2.Add(actualBin.binRGB.Y.ToString()); new_row_file_2.Add(actualBin.binRGB.Z.ToString()); //write the results output_file_1.WriteRow(new_row_file_1); output_file_2.WriteRow(new_row_file_2); } //Color.Execute(); //quick_corr.Execute(); //close the CSV files output_file_1.Close(); output_file_2.Close(); Console.WriteLine("Done!"); } catch (Exception ex) { Console.WriteLine(ex); } theEnd: Console.ReadKey(); }
public static TestOutput CorrectColour(ForeGroundStrucuture[] foregorungRGB_CPU, BackGroundStrucuture[] BackgroundXYZ_CPU) { //set these to constant if you want testing //rgb = System.Drawing.Color.FromArgb(65, 108, 20); //X = 0.613829950099918; //Y = 0.938638756488747; //Z = 1.08019833591292; const int image_size = 960 * 540; //cuda intializer CudafyModule km = CudafyModule.TryDeserialize(); if (km == null || !km.TryVerifyChecksums()) { // km = CudafyTranslator.Cudafy((typeof(ForeGroundStrucuture)), (typeof(BackGroundStrucuture)), typeof(Color)); km = CudafyTranslator.Cudafy(typeof(ProfileStrucuture), typeof(ForeGroundStrucuture), typeof(BackGroundStrucuture), typeof(bf)); km.TrySerialize(); } CudafyTranslator.GenerateDebug = true; // cuda or emulator GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId); //sGPGPU gpu = CudafyHost.GetDevice(eGPUType.Emulator); gpu.LoadModule(km); Console.WriteLine("Running brute force correction using {0}", gpu.GetDeviceProperties(false).Name); ForeGroundStrucuture[] output_image_CPU = new ForeGroundStrucuture[image_size]; // allocate memory on the GPU for the bitmap (same size as ptr) DataTable profile = new DataTable(); try { // add the csv bin file using (GenericParserAdapter parser = new GenericParserAdapter(@"C:\lev\STColorCorrection\Data\PROFILE\p3700.csv")) { System.Data.DataSet dsResult = parser.GetDataSet(); profile = dsResult.Tables[0]; } } catch (Exception ex) { Console.WriteLine(ex); } // allocate temp memory, initialize it, copy to constant memory on the GPU // L 0-21 A 0-41 B 0-45 ProfileStrucuture[ , , ] profiles_CPU = new ProfileStrucuture[21, 41, 45]; //ForeGroundStrucuture[] foregorungRGB_CPU = new ForeGroundStrucuture[image_size]; //BackGroundStrucuture[] BackgroundXYZ_CPU = new BackGroundStrucuture[image_size]; for (int indexL = 0; indexL < 21; indexL++) { for (int indexA = 0; indexA < 41; indexA++) { for (int indexB = 0; indexB < 45; indexB++) { profiles_CPU[indexL, indexA, indexB].L = indexL; profiles_CPU[indexL, indexA, indexB].A = indexA; profiles_CPU[indexL, indexA, indexB].B = indexB; profiles_CPU[indexL, indexA, indexB].Given_R = 0; profiles_CPU[indexL, indexA, indexB].Given_G = 0; profiles_CPU[indexL, indexA, indexB].Given_B = 0; profiles_CPU[indexL, indexA, indexB].ML = 0; profiles_CPU[indexL, indexA, indexB].MA = 0; profiles_CPU[indexL, indexA, indexB].MB = 0; profiles_CPU[indexL, indexA, indexB].MX = 0; profiles_CPU[indexL, indexA, indexB].MY = 0; profiles_CPU[indexL, indexA, indexB].MZ = 0; profiles_CPU[indexL, indexA, indexB].isempty = TRUE; profiles_CPU[indexL, indexA, indexB].isMoreAccurateThanOrigin = -1; } } } int lvalue, avalue, bvalue; try { for (int i = 1; i < profile.Rows.Count; i++) { lvalue = Convert.ToInt32(profile.Rows[i][0].ToString()); avalue = Convert.ToInt32(profile.Rows[i][1].ToString()); bvalue = Convert.ToInt32(profile.Rows[i][2].ToString()); lvalue = (int)(lvalue * 0.2); avalue = (int)(avalue * 0.2) + 20; bvalue = (int)(bvalue * 0.2) + 22; profiles_CPU[lvalue, avalue, bvalue].L = lvalue; profiles_CPU[lvalue, avalue, bvalue].A = avalue; profiles_CPU[lvalue, avalue, bvalue].B = bvalue; profiles_CPU[lvalue, avalue, bvalue].Given_R = (byte)Convert.ToByte(profile.Rows[i][9].ToString()); profiles_CPU[lvalue, avalue, bvalue].Given_G = (byte)Convert.ToByte(profile.Rows[i][10].ToString()); profiles_CPU[lvalue, avalue, bvalue].Given_B = (byte)Convert.ToByte(profile.Rows[i][11].ToString()); profiles_CPU[lvalue, avalue, bvalue].ML = (double)Convert.ToDouble(profile.Rows[i][3].ToString()); profiles_CPU[lvalue, avalue, bvalue].MA = (double)Convert.ToDouble(profile.Rows[i][4].ToString()); profiles_CPU[lvalue, avalue, bvalue].MB = (double)Convert.ToDouble(profile.Rows[i][5].ToString()); profiles_CPU[lvalue, avalue, bvalue].MX = (double)Convert.ToDouble(profile.Rows[i][6].ToString()); profiles_CPU[lvalue, avalue, bvalue].MY = (double)Convert.ToDouble(profile.Rows[i][7].ToString()); profiles_CPU[lvalue, avalue, bvalue].MZ = (double)Convert.ToDouble(profile.Rows[i][8].ToString()); profiles_CPU[lvalue, avalue, bvalue].isempty = FALSE; } } catch (Exception ex) { Console.WriteLine(ex); } //foreground and background image inicialization #region //try //{ // for (int i = 0; i < 1; i++) // { // foregorungRGB_CPU[i].R = rgb.R; // foregorungRGB_CPU[i].G = rgb.G; // foregorungRGB_CPU[i].B = rgb.B; // BackgroundXYZ_CPU[i].X = X; // BackgroundXYZ_CPU[i].Y = Y; // BackgroundXYZ_CPU[i].Z = Z; // } //} //catch (Exception ex) //{ Console.WriteLine(ex); } #endregion ProfileStrucuture[, ,] profile_GPU = gpu.CopyToDevice(profiles_CPU); // capture the start time gpu.StartTimer(); ForeGroundStrucuture[] foregorungRGB_GPU = gpu.CopyToDevice(foregorungRGB_CPU); BackGroundStrucuture[] BackgroundXYZ_GPU = gpu.CopyToDevice(BackgroundXYZ_CPU); //out put ForeGroundStrucuture[] distance_GPU = gpu.Allocate(output_image_CPU); // generate a bitmap from our sphere data //Image size: 1024 x 768 //dim3 grids = new dim3(1, 1); //dim3 threads = new dim3(1,1); dim3 grids = new dim3(24, 675); dim3 threads = new dim3(8, 4); gpu.Launch(grids, threads, ((Action <GThread, ProfileStrucuture[, , ], ForeGroundStrucuture[], BackGroundStrucuture[], ForeGroundStrucuture[]>)Bruteforce), profile_GPU, foregorungRGB_GPU, BackgroundXYZ_GPU, distance_GPU); //gpu.Launch(grids, threads, ((Action<GThread, ForeGroundStrucuture[], BackGroundStrucuture[], double[]>)Bruteforce), foregorungRGB_GPU, BackgroundXYZ_GPU, distance_GPU); // copy our bitmap back from the GPU for display gpu.CopyFromDevice(distance_GPU, output_image_CPU); // get stop time, and display the timing results double elapsedTime = gpu.StopTimer(); TestOutput to_return = new TestOutput(); to_return.output_image = output_image_CPU; to_return.timeTaken = elapsedTime; //encapsulte the output image into a class //output_image_CPU[0].execution_time = elapsedTime; Console.WriteLine("Time to generate: {0} ms", elapsedTime); gpu.Free(foregorungRGB_GPU); gpu.Free(BackgroundXYZ_GPU); gpu.Free(distance_GPU); gpu.FreeAll(); return(to_return); }