public static void SimpleMultiply() { for (var iter = 1; iter <= 3; ++iter) { Console.WriteLine("====> Test SimpleMultiply with CUDAfy C# (#.{0}) <====", iter); var timer = Stopwatch.StartNew(); var gpu = CudafyHost.GetDevice(); Console.WriteLine("GPU: {0}", gpu.GetDeviceProperties().Name); timer.Stop(); Console.WriteLine("Step 1) Runtime setup {0} ms", timer.Elapsed.TotalMilliseconds); timer.Restart(); var module = CudafyTranslator.Cudafy(); timer.Stop(); Console.WriteLine("Step 2) Compile {0} ms", timer.Elapsed.TotalMilliseconds); timer.Restart(); gpu.LoadModule(module); timer.Stop(); Console.WriteLine("Step 3) Load module {0} ms", timer.Elapsed.TotalMilliseconds); const int factor = 8; var a = Util.RandomMatrix(100 * factor, 200 * factor); var b = Util.RandomMatrix(200 * factor, 300 * factor); var c = new float[a.GetLength(0), b.GetLength(1)]; var devA = gpu.CopyToDevice(a); var devB = gpu.CopyToDevice(b); var devC = gpu.Allocate<float>(a.GetLength(0), b.GetLength(1)); var gridDim = new dim3(Util.Divup(b.GetLength(1), TileSize), Util.Divup(a.GetLength(0), TileSize)); var blockDim = new dim3(TileSize, TileSize); // measure first kernel execution, need sync worker timer.Restart(); gpu.Launch(gridDim, blockDim, SimpleMultiplyKernel, devA, devB, devC); gpu.Synchronize(); timer.Stop(); Console.WriteLine("Kernel launch first time {0} ms", timer.Elapsed.TotalMilliseconds); // launch 50 kernels, and sync at last (1 sync only) const int repetitions = 50; timer.Restart(); for (var i = 0; i < repetitions; ++i) { gpu.Launch(gridDim, blockDim, SimpleMultiplyKernel, devA, devB, devC); } gpu.Synchronize(); timer.Stop(); Console.WriteLine("Kernel launch average time {0} ms", timer.Elapsed.TotalMilliseconds / (float)repetitions); gpu.CopyFromDevice(devC, c); gpu.Free(devA); gpu.Free(devB); gpu.Free(devC); Util.VerifyResult(a, b, c); } }
public static int MA(int[,] A, int[,] B, int[,] C, GPGPU gpu, int maxTheadBlockSize, int Size) { // allocate the memory on the GPU int[,] GPU_A = gpu.Allocate<int>(A); int[,] GPU_B = gpu.Allocate<int>(B); int[,] GPU_C = gpu.Allocate<int>(C); // copy the arrays 'a' and 'b' to the GPU gpu.CopyToDevice(A, GPU_A); gpu.CopyToDevice(B, GPU_B); dim3 threadsPerBlock; // find the number of threads and blocks if (Size < maxTheadBlockSize) { threadsPerBlock = new dim3(Size, Size); } else { threadsPerBlock = new dim3(maxTheadBlockSize, maxTheadBlockSize); } dim3 block = new dim3(Size, Size); // launch GPU_MA gpu.Launch(block, threadsPerBlock, "GPU_MA", GPU_A, GPU_B, GPU_C, Size); // copy the array 'c' back from the GPU to the CPU gpu.CopyFromDevice(GPU_C, C); gpu.Free(GPU_A); gpu.Free(GPU_B); gpu.Free(GPU_C); return 1; }
public Bitmap Render(Rendering.ExecutionOptions options, Action<string> log) { try { var result = new Bitmap(options.Width, options.Height); int width = options.Width; int height = options.Height; log("Initializing and copying data to GPU memory"); int[,] iterations = new int[height, width]; var dev_iterations = gpu.CopyToDevice(iterations); var gridSize = new dim3(height, width); var blockSize = BlockSize; var minX = (float)options.MinX; var maxX = (float)options.MaxX; var minY = (float)options.MinY; var maxY = (float)options.MaxY; var stepX = (maxX - minX) / ((float)width); var stepY = (maxY - minY) / ((float)height); log("Launching Mandelbrot calculations"); gpu.Launch(gridSize, blockSize, "CalculateMandelbrot", minX, maxY, stepX, stepY, dev_iterations); log("Mandelbrot calculations done, fetching results from GPU memory"); gpu.CopyFromDevice(dev_iterations, iterations); log("Generating the final image"); Rendering.fastDrawBitmap(result, iterations); return result; } finally { gpu.FreeAll(); } }
public double[] transpose(double[] inputArray) { GPGPU gpu = CudafyHost.GetDevice(eGPUType.Cuda); CudafyModule km = CudafyTranslator.Cudafy(eArchitecture.sm_35); gpu.LoadModule(km); dim3 grid = new dim3(1000); gpu.Launch(); return new double[1]; }
public void Initialize(int bytes) { CudafyModule km = CudafyTranslator.Cudafy(); _gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId); _gpu.LoadModule(km); _dev_bitmap = _gpu.Allocate<byte>(bytes); _blocks = new dim3(DIM / 16, DIM / 16); _threads = new dim3(16, 16); }
public static void Execute(byte[] bitmap) { CudafyModule km = CudafyModule.TryDeserialize(); if (km == null || !km.TryVerifyChecksums()) { km = CudafyTranslator.Cudafy(); km.TrySerialize(); } GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId); gpu.LoadModule(km); // capture the start time gpu.StartTimer(); // allocate memory on the GPU for the bitmap (same size as ptr) byte[] dev_bitmap = gpu.Allocate(bitmap); // allocate temp memory, initialize it, copy to constant memory on the GPU NestedSphere[] temp_s = new NestedSphere[SPHERES]; for (int i = 0; i < SPHERES; i++) { temp_s[i].r = rnd(1.0f); temp_s[i].g = rnd(1.0f); temp_s[i].b = rnd(1.0f); temp_s[i].x = rnd(1000.0f) - 500; temp_s[i].y = rnd(1000.0f) - 500; temp_s[i].z = rnd(1000.0f) - 500; temp_s[i].radius = rnd(100.0f) + 20; } gpu.CopyToConstantMemory(temp_s, s); // generate a bitmap from our sphere data dim3 grids = new dim3(ray_gui.DIM / 16, ray_gui.DIM / 16); dim3 threads = new dim3(16, 16); //gpu.Launch(grids, threads).kernel(dev_bitmap); // Dynamic gpu.Launch(grids, threads, kernel, dev_bitmap); // Strongly typed- compiler infers types from arguments // copy our bitmap back from the GPU for display gpu.CopyFromDevice(dev_bitmap, bitmap); // get stop time, and display the timing results float elapsedTime = gpu.StopTimer(); Console.WriteLine("Time to generate: {0} ms", elapsedTime); gpu.FreeAll(); }
public GpuRenderer() { var availableOpenCLDevices = CudafyHost.GetDeviceProperties(eGPUType.OpenCL); if (availableOpenCLDevices.Any() == false) { throw new Exception("No OpenCL devices found..."); } var device = availableOpenCLDevices.First(); Module = CudafyTranslator.Cudafy(eArchitecture.OpenCL12); var blockSide = Enumerable .Range(1, 15) .Reverse() .First(count => count * count <= device.MaxThreadsPerBlock); BlockSize = new dim3(blockSide, blockSide); // Initialize gpu and load the module (avoids reloading every time) gpu = CudafyHost.GetDevice(eGPUType.OpenCL); gpu.LoadModule(Module); }
public static TestOutput CorrectColour(ForeGroundStrucuture[] foregorungRGB_CPU, BackGroundStrucuture[] BackgroundXYZ_CPU) { //rgb = System.Drawing.Color.FromArgb(69, 77, 217); //X = 0.0630982813175294; //Y = 0.616476271122916; //Z = 0.667048468232457; const int image_size = 960 * 540; //cuda intializer CudafyModule km = CudafyModule.TryDeserialize(); if (km == null || !km.TryVerifyChecksums()) { // km = CudafyTranslator.Cudafy((typeof(ForeGroundStrucuture)), (typeof(BackGroundStrucuture)), typeof(Color)); km = CudafyTranslator.Cudafy((typeof(ProfileStrucuture)), (typeof(ForeGroundStrucuture)), (typeof(BackGroundStrucuture)), (typeof(SampleStructure)), typeof(snake)); km.TrySerialize(); } CudafyTranslator.GenerateDebug = true; // cuda or emulator GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId); //GPGPU gpu = CudafyHost.GetDevice(eGPUType.Emulator); Console.WriteLine("Running quick correction using {0}", gpu.GetDeviceProperties(false).Name); gpu.LoadModule(km); ForeGroundStrucuture[] distance_CPU = new ForeGroundStrucuture[image_size]; // allocate memory on the GPU for the bitmap (same size as ptr) #region DataTable profile = new DataTable(); try { // add the csv bin file using (GenericParserAdapter parser = new GenericParserAdapter(@"C:\lev\STColorCorrection\Data\PROFILE\p3700.csv")) { System.Data.DataSet dsResult = parser.GetDataSet(); profile = dsResult.Tables[0]; } } catch (Exception ex) { Console.WriteLine(ex); } #endregion // allocate temp memory, initialize it, copy to constant memory on the GPU // L 0-21 A 0-41 B 0-45 ProfileStrucuture[, ,] profiles_CPU = new ProfileStrucuture[21, 41, 45]; SampleStructure[,] samples_CPU = new SampleStructure[image_size, 6]; //profile inicialization #region for (int indexL = 0; indexL < 21; indexL++) { for (int indexA = 0; indexA < 41; indexA++) { for (int indexB = 0; indexB < 45; indexB++) { profiles_CPU[indexL, indexA, indexB].L = indexL; profiles_CPU[indexL, indexA, indexB].A = indexA; profiles_CPU[indexL, indexA, indexB].B = indexB; profiles_CPU[indexL, indexA, indexB].Given_R = 0; profiles_CPU[indexL, indexA, indexB].Given_G = 0; profiles_CPU[indexL, indexA, indexB].Given_B = 0; profiles_CPU[indexL, indexA, indexB].ML = 0; profiles_CPU[indexL, indexA, indexB].MA = 0; profiles_CPU[indexL, indexA, indexB].MB = 0; profiles_CPU[indexL, indexA, indexB].MX = 0; profiles_CPU[indexL, indexA, indexB].MY = 0; profiles_CPU[indexL, indexA, indexB].MZ = 0; profiles_CPU[indexL, indexA, indexB].distance = -1.0; profiles_CPU[indexL, indexA, indexB].weight = -1.0; profiles_CPU[indexL, indexA, indexB].isempty = TRUE; profiles_CPU[indexL, indexA, indexB].isMoreAccurateThanOrigin = FALSE; } } } int lvalue, avalue, bvalue; try { for (int i = 1; i < profile.Rows.Count; i++) { lvalue = Convert.ToInt32(profile.Rows[i][0].ToString()); avalue = Convert.ToInt32(profile.Rows[i][1].ToString()); bvalue = Convert.ToInt32(profile.Rows[i][2].ToString()); lvalue = (int)(lvalue * 0.2); avalue = (int)(avalue * 0.2) + 20; bvalue = (int)(bvalue * 0.2) + 22; profiles_CPU[lvalue, avalue, bvalue].L = lvalue; profiles_CPU[lvalue, avalue, bvalue].A = avalue; profiles_CPU[lvalue, avalue, bvalue].B = bvalue; profiles_CPU[lvalue, avalue, bvalue].Given_R = (byte)Convert.ToByte(profile.Rows[i][9].ToString()); profiles_CPU[lvalue, avalue, bvalue].Given_G = (byte)Convert.ToByte(profile.Rows[i][10].ToString()); profiles_CPU[lvalue, avalue, bvalue].Given_B = (byte)Convert.ToByte(profile.Rows[i][11].ToString()); profiles_CPU[lvalue, avalue, bvalue].ML = (double)Convert.ToDouble(profile.Rows[i][3].ToString()); profiles_CPU[lvalue, avalue, bvalue].MA = (double)Convert.ToDouble(profile.Rows[i][4].ToString()); profiles_CPU[lvalue, avalue, bvalue].MB = (double)Convert.ToDouble(profile.Rows[i][5].ToString()); profiles_CPU[lvalue, avalue, bvalue].MX = (double)Convert.ToDouble(profile.Rows[i][6].ToString()); profiles_CPU[lvalue, avalue, bvalue].MY = (double)Convert.ToDouble(profile.Rows[i][7].ToString()); profiles_CPU[lvalue, avalue, bvalue].MZ = (double)Convert.ToDouble(profile.Rows[i][8].ToString()); profiles_CPU[lvalue, avalue, bvalue].isempty = FALSE; } } catch (Exception ex) { Console.WriteLine(ex); } #endregion //grab the colors ProfileStrucuture[, ,] profile_GPU = gpu.CopyToDevice(profiles_CPU); SampleStructure[,] samples_GPU = gpu.CopyToDevice(samples_CPU); //begin execution // capture the start time gpu.StartTimer(); ForeGroundStrucuture[] foregorungRGB_GPU = gpu.CopyToDevice(foregorungRGB_CPU); BackGroundStrucuture[] BackgroundXYZ_GPU = gpu.CopyToDevice(BackgroundXYZ_CPU); //out put ForeGroundStrucuture[] distance_GPU = gpu.Allocate(distance_CPU); // generate a bitmap from our sphere data //Image size: 1024 x 768 dim3 grids = new dim3(24, 675); dim3 threads = new dim3(8, 4); //dim3 grids = new dim3(1, 1); //dim3 threads = new dim3(1, 1); //quick_correct //gpu.Launch(grids, threads, ((Action<GThread, ProfileStrucuture[, ,], ForeGroundStrucuture[], BackGroundStrucuture[], ProfileStrucuture[], SampleStructure[,]>)QuickCorr), profile_GPU, foregorungRGB_GPU, BackgroundXYZ_GPU, distance_GPU, samples_GPU); //quick correct - testing gpu.Launch(grids, threads, ((Action<GThread, ProfileStrucuture[, ,], ForeGroundStrucuture[], BackGroundStrucuture[], ForeGroundStrucuture[], SampleStructure[,]>)Snake), profile_GPU, foregorungRGB_GPU, BackgroundXYZ_GPU, distance_GPU, samples_GPU); // copy our bitmap back from the GPU for display gpu.CopyFromDevice(distance_GPU, distance_CPU); // get stop time, and display the timing results double elapsedTime = gpu.StopTimer(); TestOutput to_return = new TestOutput(); to_return.output_image = distance_CPU; to_return.timeTaken = elapsedTime; Console.WriteLine("Time to generate: {0} ms", elapsedTime); gpu.Free(foregorungRGB_GPU); gpu.Free(BackgroundXYZ_GPU); gpu.Free(distance_GPU); gpu.FreeAll(); return to_return; }
/// <summary> /// Initializes a new instance of the <see cref="GGrid"/> class. /// </summary> /// <param name="size">The size.</param> public GGrid(dim3 size) { Dim = size; }
public static void Execute(byte[] bitmap) { DateTime dt = DateTime.Now; CudafyModule km = CudafyModule.TryDeserialize(csFILENAME); // Check the module exists and matches the .NET modules, else make new if (km == null || !km.TryVerifyChecksums()) { Console.WriteLine("There was no cached module available so we make a new one."); km = CudafyModule.Deserialize(typeof(ray_serialize).Name); km.Serialize(csFILENAME); } GPGPU gpu = CudafyHost.GetGPGPU(CudafyModes.Target, 1); gpu.LoadModule(km); Console.WriteLine("Time taken to load module: {0}ms", DateTime.Now.Subtract(dt).Milliseconds); // capture the start time gpu.StartTimer(); // allocate memory on the GPU for the bitmap (same size as ptr) byte[] dev_bitmap = gpu.Allocate(bitmap); // allocate temp memory, initialize it, copy to constant memory on the GPU Sphere[] temp_s = new Sphere[SPHERES]; for (int i = 0; i < SPHERES; i++) { temp_s[i].r = rnd(1.0f); temp_s[i].g = rnd(1.0f); temp_s[i].b = rnd(1.0f); temp_s[i].x = rnd(1000.0f) - 500; temp_s[i].y = rnd(1000.0f) - 500; temp_s[i].z = rnd(1000.0f) - 500; temp_s[i].radius = rnd(100.0f) + 20; } gpu.CopyToConstantMemory(temp_s, s); // generate a bitmap from our sphere data dim3 grids = new dim3(DIM/16, DIM/16); dim3 threads = new dim3(16, 16); gpu.Launch(grids, threads, "kernel", dev_bitmap); // copy our bitmap back from the GPU for display gpu.CopyFromDevice(dev_bitmap, bitmap); // get stop time, and display the timing results float elapsedTime = gpu.StopTimer(); Console.WriteLine("Time to generate: {0} ms", elapsedTime); gpu.DeviceFreeAll(); }
public static TestOutput CorrectColour(ForeGroundStrucuture[] foregorungRGB_CPU, BackGroundStrucuture[] BackgroundXYZ_CPU) { //set these to constant if you want testing //rgb = System.Drawing.Color.FromArgb(65, 108, 20); //X = 0.613829950099918; //Y = 0.938638756488747; //Z = 1.08019833591292; const int image_size = 960 * 540; //cuda intializer CudafyModule km = CudafyModule.TryDeserialize(); if (km == null || !km.TryVerifyChecksums()) { // km = CudafyTranslator.Cudafy((typeof(ForeGroundStrucuture)), (typeof(BackGroundStrucuture)), typeof(Color)); km = CudafyTranslator.Cudafy(typeof(ProfileStrucuture),typeof(ForeGroundStrucuture), typeof(BackGroundStrucuture), typeof(bf)); km.TrySerialize(); } CudafyTranslator.GenerateDebug = true; // cuda or emulator GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId); //sGPGPU gpu = CudafyHost.GetDevice(eGPUType.Emulator); gpu.LoadModule(km); Console.WriteLine("Running brute force correction using {0}", gpu.GetDeviceProperties(false).Name); ForeGroundStrucuture[] output_image_CPU = new ForeGroundStrucuture[image_size]; // allocate memory on the GPU for the bitmap (same size as ptr) DataTable profile = new DataTable(); try { // add the csv bin file using (GenericParserAdapter parser = new GenericParserAdapter(@"C:\lev\STColorCorrection\Data\PROFILE\p3700.csv")) { System.Data.DataSet dsResult = parser.GetDataSet(); profile = dsResult.Tables[0]; } } catch(Exception ex) {Console.WriteLine(ex); } // allocate temp memory, initialize it, copy to constant memory on the GPU // L 0-21 A 0-41 B 0-45 ProfileStrucuture[ , , ] profiles_CPU = new ProfileStrucuture[21,41,45]; //ForeGroundStrucuture[] foregorungRGB_CPU = new ForeGroundStrucuture[image_size]; //BackGroundStrucuture[] BackgroundXYZ_CPU = new BackGroundStrucuture[image_size]; for (int indexL = 0; indexL < 21; indexL++) { for (int indexA = 0; indexA < 41; indexA++) { for (int indexB = 0; indexB < 45; indexB++) { profiles_CPU[indexL, indexA, indexB].L = indexL; profiles_CPU[indexL, indexA, indexB].A = indexA; profiles_CPU[indexL, indexA, indexB].B = indexB; profiles_CPU[indexL, indexA, indexB].Given_R = 0; profiles_CPU[indexL, indexA, indexB].Given_G = 0; profiles_CPU[indexL, indexA, indexB].Given_B = 0; profiles_CPU[indexL, indexA, indexB].ML = 0; profiles_CPU[indexL, indexA, indexB].MA = 0; profiles_CPU[indexL, indexA, indexB].MB = 0; profiles_CPU[indexL, indexA, indexB].MX = 0; profiles_CPU[indexL, indexA, indexB].MY = 0; profiles_CPU[indexL, indexA, indexB].MZ = 0; profiles_CPU[indexL, indexA, indexB].isempty = TRUE; profiles_CPU[indexL, indexA, indexB].isMoreAccurateThanOrigin = -1; } } } int lvalue, avalue, bvalue; try { for (int i = 1; i < profile.Rows.Count; i++) { lvalue=Convert.ToInt32 (profile.Rows[i][0].ToString()); avalue = Convert.ToInt32(profile.Rows[i][1].ToString()); bvalue= Convert.ToInt32(profile.Rows[i][2].ToString()); lvalue=(int)(lvalue*0.2); avalue=(int)(avalue*0.2)+20; bvalue=(int)(bvalue*0.2)+22; profiles_CPU[lvalue, avalue, bvalue].L = lvalue; profiles_CPU[lvalue, avalue, bvalue].A = avalue; profiles_CPU[lvalue, avalue, bvalue].B = bvalue; profiles_CPU[lvalue, avalue, bvalue].Given_R = (byte)Convert.ToByte(profile.Rows[i][9].ToString()); profiles_CPU[lvalue, avalue, bvalue].Given_G = (byte)Convert.ToByte(profile.Rows[i][10].ToString()); profiles_CPU[lvalue, avalue, bvalue].Given_B = (byte)Convert.ToByte(profile.Rows[i][11].ToString()); profiles_CPU[lvalue, avalue, bvalue].ML = (double)Convert.ToDouble(profile.Rows[i][3].ToString()); profiles_CPU[lvalue, avalue, bvalue].MA = (double)Convert.ToDouble(profile.Rows[i][4].ToString()); profiles_CPU[lvalue, avalue, bvalue].MB = (double)Convert.ToDouble(profile.Rows[i][5].ToString()); profiles_CPU[lvalue, avalue, bvalue].MX = (double)Convert.ToDouble(profile.Rows[i][6].ToString()); profiles_CPU[lvalue, avalue, bvalue].MY = (double)Convert.ToDouble(profile.Rows[i][7].ToString()); profiles_CPU[lvalue, avalue, bvalue].MZ = (double)Convert.ToDouble(profile.Rows[i][8].ToString()); profiles_CPU[lvalue, avalue, bvalue].isempty = FALSE; } } catch (Exception ex) { Console.WriteLine(ex); } //foreground and background image inicialization #region //try //{ // for (int i = 0; i < 1; i++) // { // foregorungRGB_CPU[i].R = rgb.R; // foregorungRGB_CPU[i].G = rgb.G; // foregorungRGB_CPU[i].B = rgb.B; // BackgroundXYZ_CPU[i].X = X; // BackgroundXYZ_CPU[i].Y = Y; // BackgroundXYZ_CPU[i].Z = Z; // } //} //catch (Exception ex) //{ Console.WriteLine(ex); } #endregion ProfileStrucuture[, ,] profile_GPU = gpu.CopyToDevice(profiles_CPU); // capture the start time gpu.StartTimer(); ForeGroundStrucuture[] foregorungRGB_GPU = gpu.CopyToDevice(foregorungRGB_CPU); BackGroundStrucuture[] BackgroundXYZ_GPU = gpu.CopyToDevice(BackgroundXYZ_CPU); //out put ForeGroundStrucuture[] distance_GPU = gpu.Allocate(output_image_CPU); // generate a bitmap from our sphere data //Image size: 1024 x 768 //dim3 grids = new dim3(1, 1); //dim3 threads = new dim3(1,1); dim3 grids = new dim3(24, 675); dim3 threads = new dim3(8, 4); gpu.Launch(grids, threads, ((Action<GThread, ProfileStrucuture[, ,], ForeGroundStrucuture[], BackGroundStrucuture[], ForeGroundStrucuture[]>)Bruteforce), profile_GPU, foregorungRGB_GPU, BackgroundXYZ_GPU, distance_GPU); //gpu.Launch(grids, threads, ((Action<GThread, ForeGroundStrucuture[], BackGroundStrucuture[], double[]>)Bruteforce), foregorungRGB_GPU, BackgroundXYZ_GPU, distance_GPU); // copy our bitmap back from the GPU for display gpu.CopyFromDevice(distance_GPU, output_image_CPU); // get stop time, and display the timing results double elapsedTime = gpu.StopTimer(); TestOutput to_return = new TestOutput(); to_return.output_image = output_image_CPU; to_return.timeTaken = elapsedTime; //encapsulte the output image into a class //output_image_CPU[0].execution_time = elapsedTime; Console.WriteLine("Time to generate: {0} ms", elapsedTime); gpu.Free(foregorungRGB_GPU); gpu.Free(BackgroundXYZ_GPU); gpu.Free(distance_GPU); gpu.FreeAll(); return to_return; }