static void Main(string[] args) { GrayBitmap image = GrayBitmap.Load("../../images/lena_highres_greyscale_noise.bmp"); GrayBitmap denoised = new GrayBitmap(image.Width, image.Height); ushort[] input = image.PixelsUShort; ushort[] output = new ushort[image.Width * image.Height]; Stopwatch watch = new Stopwatch(); watch.Start(); int window = 3; // create an instance of runner HybRunner runner = HybRunner.Cuda(); // wrap a new instance of Program dynamic wrapper = runner.Wrap(new Program()); // run the method on GPU wrapper.ParForGPU(output, input, (int)image.Width, (int)image.Height, window); watch.Stop(); string time = String.Format("{0:0.00}", watch.ElapsedMilliseconds * 1.0E-3); Console.WriteLine($"Naive GPU time : {time}"); denoised.PixelsUShort = output; denoised.Save("../../output-03-naive-gpu/denoised.bmp"); }
static void Main(string[] args) { SparseMatrix A = SparseMatrix.Laplacian_1D(10000000); float[] X = VectorReader.GetSplatVector(10000000, 1.0F); int redo = 2; double memoryOperationsSize = (double)redo * (3.0 * (double)(A.data.Length * sizeof(float)) + (double)(2 * A.rows.Length * sizeof(uint)) + (double)(A.indices.Length * sizeof(uint))); Console.WriteLine("matrix read --- starting computations"); float[] B = new float[A.rows.Length - 1]; #region CUDA cudaDeviceProp prop; cuda.GetDeviceProperties(out prop, 0); HybRunner runner = HybRunner.Cuda("SparseMatrix_CUDA.dll").SetDistrib(8 * prop.multiProcessorCount, 256); dynamic wrapper = runner.Wrap(new Program()); for (int i = 0; i < redo; ++i) { wrapper.Multiply(B, A, X, X.Length); } #endregion }
static void Main(string[] args) { // configure CUDA cudaDeviceProp prop; cuda.GetDeviceProperties(out prop, 0); const int BLOCK_DIM = 256; runner = HybRunner.Cuda().SetDistrib(16 * prop.multiProcessorCount, 1, BLOCK_DIM, 1, 1, BLOCK_DIM * sizeof(float)); wrapper = runner.Wrap(new Program()); int size = 1000000; // very slow convergence with no preconditioner SparseMatrix A = SparseMatrix.Laplacian_1D(size); FloatResidentArray B = new FloatResidentArray(size); FloatResidentArray X = new FloatResidentArray(size); int maxiter = 1000; float eps = 1.0e-09f; for (int i = 0; i < size; ++i) { B[i] = 1.0f; // right side X[i] = 0.0f; // starting point } ConjugateGradient(X, A, B, maxiter, eps); }
static void Main(string[] args) { GrayBitmap image = GrayBitmap.Load("../../images/lena_highres_greyscale_noise.bmp"); GrayBitmap denoised = new GrayBitmap(image.Width, image.Height); ushort[] input = image.PixelsUShort; ushort[] output = new ushort[image.Width * image.Height]; Stopwatch watch = new Stopwatch(); watch.Start(); dim3 grid = new dim3(< gridX >, <gridY>, 1); dim3 block = new dim3(< blockX >, <blockY>, 1); // create an instance of runner HybRunner runner = HybRunner.Cuda(); // wrap a new instance of Program dynamic wrapper = runner.Wrap(new Filter()); // run the method on GPU wrapper.SetDistrib(grid, block).ParForGPU(output, input, (int)image.Width, (int)image.Height); watch.Stop(); string time = String.Format("{0:0.00}", watch.ElapsedMilliseconds * 1.0E-3); Console.WriteLine($"Parallel2D GPU time : {time}"); denoised.PixelsUShort = output; denoised.Save("../../output-05-dice-gpu/denoised.bmp"); }
static void Main(string[] args) { Random random = new Random(); const int N = 1024 * 1024 * 32; int[] a = new int[N]; for (int i = 0; i < N; ++i) { a[i] = (random.NextDouble() < 0.2) ? 1 : 0; } int[] result = new int[1]; cudaDeviceProp prop; cuda.GetDeviceProperties(out prop, 0); const int BLOCK_DIM = 256; HybRunner runner = HybRunner.Cuda().SetDistrib(16 * prop.multiProcessorCount, 1, BLOCK_DIM, 1, 1, BLOCK_DIM * sizeof(int)); dynamic wrapped = runner.Wrap(new Program()); wrapped.ReduceAdd(N, a, result); cuda.DeviceSynchronize(); Console.Out.WriteLine("sum = {0}", result[0]); Console.Out.WriteLine("expected = {0}", a.Aggregate((i, j) => i + j)); }
static void Main(string[] args) { HybRunner runner = HybRunner.Cuda().SetDistrib(32, 32, 16, 16, 1, 0); GrayBitmap image = GrayBitmap.Load("../../images/lena512.bmp"); uint height = image.Height, width = image.Width; ushort[] inputPixels = image.PixelsUShort; float[] imageFloat = new float[width * height]; float[] imageCompute = new float[width * height]; for (int i = 0; i < width * height; ++i) { imageFloat[i] = (float)inputPixels[i]; } dynamic wrapper = runner.Wrap(new Program()); wrapper.Sobel(imageFloat, imageCompute, (int)width, (int)height); ushort[] outputPixel = new ushort[width * height]; for (int i = 0; i < width * height; ++i) { outputPixel[i] = (ushort)imageCompute[i]; } GrayBitmap imageSobel = new GrayBitmap(width, height); imageSobel.PixelsUShort = outputPixel; imageSobel.Save("../../output-01-gpu/sobel.bmp"); }
static void Main(string[] args) { //open the input image and lock the image Bitmap baseImage = (Bitmap)Image.FromFile("../../images/lena_highres_greyscale.bmp"); int height = baseImage.Height, width = baseImage.Width; //create result image and lock Bitmap resImage = new Bitmap(width, height); //take pointer from locked memory byte[] inputPixels = new byte[width * height]; byte[] outputPixels = new byte[width * height]; ReadImage(inputPixels, baseImage, width, height); // pin images memory for cuda HybRunner runner = HybRunner.Cuda().SetDistrib(32, 32, 16, 16, 1, 0); dynamic wrapper = runner.Wrap(new Program()); wrapper.ComputeSobel(outputPixels, inputPixels, width, height); // unregister pinned memory and unlock images SaveImage("lena_highres_sobel.bmp", outputPixels, width, height); try { Process.Start("lena_highres_sobel.bmp"); } catch {} // catch exception for non interactives machines }
static void Main(string[] args) { const int N = 1024 * 1024 * 32; float[] a = new float[N]; // initialization Random random = new Random(42); Parallel.For(0, N, i => a[i] = (float)random.NextDouble()); // hybridizer configuration cudaDeviceProp prop; cuda.GetDeviceProperties(out prop, 0); int gridDimX = 16 * prop.multiProcessorCount; int blockDimX = 256; cuda.DeviceSetCacheConfig(cudaFuncCache.cudaFuncCachePreferShared); HybRunner runner = HybRunner.Cuda().SetDistrib(gridDimX, 1, blockDimX, 1, 1, blockDimX * sizeof(float)); float[] buffMax = new float[1]; float[] buffAdd = new float[1]; var maxReductor = new GridReductor <MaxReductor>(); var addReductor = new GridReductor <AddReductor>(); dynamic wrapped = runner.Wrap(new EntryPoints()); // device reduction wrapped.ReduceMax(maxReductor, buffMax, a, N); wrapped.ReduceAdd(addReductor, buffAdd, a, N); cuda.ERROR_CHECK(cuda.DeviceSynchronize()); // check results float expectedMax = a.AsParallel().Aggregate((x, y) => Math.Max(x, y)); float expectedAdd = a.AsParallel().Aggregate((x, y) => x + y); bool hasError = false; if (buffMax[0] != expectedMax) { Console.Error.WriteLine($"MAX Error : {buffMax[0]} != {expectedMax}"); hasError = true; } // addition is not associative, so results cannot be exactly the same // https://en.wikipedia.org/wiki/Associative_property#Nonassociativity_of_floating_point_calculation if (Math.Abs(buffAdd[0] - expectedAdd) / expectedAdd > 1.0E-5F) { Console.Error.WriteLine($"ADD Error : {buffAdd[0]} != {expectedAdd}"); hasError = true; } if (hasError) { Environment.Exit(1); } Console.Out.WriteLine("OK"); }
public Program() : base(1024, 1024, GraphicsMode.Default, "Hybridizer Mandelbulb", GameWindowFlags.Default) { WindowBorder = WindowBorder.Fixed; // disable resize Init(); runner = HybRunner.Cuda().SetDistrib(32, 32, 16, 16, 1, 0); wrapped = runner.Wrap(new Mandelbulb()); cuda.ERROR_CHECK(cuda.GetLastError()); cuda.ERROR_CHECK(cuda.DeviceSynchronize()); }
static void Main(string[] args) { if (args.Length == 0) { args = new string[] { "512", "512", "512", "512" }; } const int redo = 10; int heightA = Convert.ToInt32(args[0]); int widthA = Convert.ToInt32(args[1]); int heightB = Convert.ToInt32(args[2]); int widthB = Convert.ToInt32(args[3]); if (widthA != heightB) { throw new ArgumentException("invalid data -- incompatible matrices"); } Console.WriteLine("Execution Naive matrix mul with sizes ({0}, {1}) x ({2}, {3})", heightA, widthA, heightB, widthB); NaiveMatrix matrixA = new NaiveMatrix(widthA, heightA); NaiveMatrix matrixB = new NaiveMatrix(widthB, heightB); NaiveMatrix res_net = new NaiveMatrix(widthB, heightA); NaiveMatrix res_cuda = new NaiveMatrix(widthB, heightA); double numberCompute = ((double)matrixA.Height * (double)matrixA.Width * (double)matrixB.Width) * 3.0E-9; matrixA.FillMatrix(); matrixB.FillMatrix(); Random rand = new Random(); #region CUDA HybRunner runner = HybRunner.Cuda().SetDistrib(4, 5, 8, 32, 32, 0); dynamic wrapper = runner.Wrap(new Program()); for (int i = 0; i < redo; ++i) { wrapper.ComputeRowsOfProduct(res_cuda, matrixA, matrixB, 0, res_cuda.Height); } #endregion #region C# for (int i = 0; i < redo; ++i) { Parallel.For(0, res_net.Height, (line) => { ComputeRowsOfProduct(res_net, matrixA, matrixB, line, line + 1); }); } #endregion Console.Out.WriteLine("DONE"); }
static void Main(string[] args) { int currentDevice; cuda.GetDevice(out currentDevice); cudaDeviceProp prop; cuda.GetDeviceProperties(out prop, currentDevice); GrayBitmap image = GrayBitmap.Load("../../images/lena_highres_greyscale_noise.bmp"); GrayBitmap denoised = new GrayBitmap(image.Width, image.Height); ushort[] input = image.PixelsUShort; ushort[] output = new ushort[image.Width * image.Height]; Stopwatch watch = new Stopwatch(); watch.Start(); int chunk; if ((prop.major >= 6) && (prop.minor == 0)) { chunk = ((int)image.Height + (prop.multiProcessorCount / 2) - 1) / (prop.multiProcessorCount / 2); } else { chunk = ((int)image.Height + (prop.multiProcessorCount) - 1) / (prop.multiProcessorCount); } Console.Out.WriteLine("Chunk size = {0}", chunk); dim3 grid = new dim3(16, ((int)image.Height + chunk - 1) / chunk, 1); dim3 block = new dim3(128, 1, 1); // create an instance of runner HybRunner runner = HybRunner.Cuda(); // wrap a new instance of Program dynamic wrapper = runner.Wrap(new Filter()); // run the method on GPU wrapper.SetDistrib(grid, block).ParForGPU(output, input, (int)image.Width, (int)image.Height, chunk); cuda.DeviceSynchronize(); watch.Stop(); string time = String.Format("{0:0.00}", watch.ElapsedMilliseconds * 1.0E-3); string kernelTime = String.Format("{0:0.00}", runner.LastKernelDuration.ElapsedMilliseconds * 1.0E-3); Console.WriteLine($"SweepSort GPU time : {time}"); Console.WriteLine($"SweepSort GPU -- kernel time : {kernelTime}"); denoised.PixelsUShort = output; denoised.Save("../../output-07-cache-aware-gpu/denoised.bmp"); cuda.DeviceReset(); }
static void Main(string[] args) { float4[] callResult_net = new float4[OPT_N / 4]; float4[] putResult_net = new float4[OPT_N / 4]; float4[] stockPrice_net = new float4[OPT_N / 4]; float4[] optionStrike_net = new float4[OPT_N / 4]; float4[] optionYears_net = new float4[OPT_N / 4]; float4[] callResult_cuda = new float4[OPT_N / 4]; float4[] putResult_cuda = new float4[OPT_N / 4]; Random rand = new Random(Guid.NewGuid().GetHashCode()); for (int i = 0; i < OPT_N / 4; ++i) { callResult_net[i] = new float4(0.0f, 0.0f, 0.0f, 0.0f); putResult_net[i] = new float4(-1.0f, -1.0f, -1.0f, -1.0f); callResult_cuda[i] = new float4(0.0f, 0.0f, 0.0f, 0.0f); putResult_cuda[i] = new float4(-1.0f, -1.0f, -1.0f, -1.0f); stockPrice_net[i] = rand.NextFloat4(5.0f, 30.0f); optionStrike_net[i] = rand.NextFloat4(1.0f, 100.0f); optionYears_net[i] = rand.NextFloat4(0.25f, 10f); } cudaDeviceProp prop; cuda.GetDeviceProperties(out prop, 0); HybRunner runner = HybRunner.Cuda("BlackScholesFloat4_CUDA.dll").SetDistrib(8 * prop.multiProcessorCount, 256); dynamic wrapper = runner.Wrap(new Program()); for (int i = 0; i < NUM_ITERATIONS; ++i) { wrapper.BlackScholes(callResult_cuda, putResult_cuda, stockPrice_net, optionStrike_net, optionYears_net, 0, OPT_N / 4); } for (int i = 0; i < NUM_ITERATIONS; ++i) { Parallel.For(0, OPT_N / 4, (opt) => { BlackScholes(callResult_net, putResult_net, stockPrice_net, optionStrike_net, optionYears_net, opt, opt + 1); }); } WriteCalculationError(callResult_net, callResult_cuda, putResult_net, putResult_cuda); }
static void Main(string[] args) { int[] a = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 }; // create an instance of HybRunner object to wrap calls on GPU HybRunner runner = HybRunner.Cuda().SetDistrib(4, 4); // create a wrapper object to call GPU methods instead of C# dynamic wrapped = runner.Wrap(new Program()); // run the method on GPU wrapped.Run(a.Length, a); }
public void ParallelCalculateHeatMap_Cuda(string dllName, double[,] heatMap, int width, int height, float widthTerrain, float heightTerrain, float destinationX, float destinationY) { cudaDeviceProp prop; cuda.GetDeviceProperties(out prop, 0); HybRunner runner = HybRunner.Cuda(dllName).SetDistrib(prop.multiProcessorCount * 16, 128); // create a wrapper object to call GPU methods instead of C# dynamic wrapped = runner.Wrap(this); // run the method on GPU wrapped.ParallelCalculateHeatMap(heatMap, width, height, widthTerrain, heightTerrain, destinationX, destinationY); }
public static void Run(Stats data, BigInteger n, int keySize, BigInteger e, BigInteger realTotient, bool debug) { var expectedSigDigits = (int)Math.Round(data.SigDigitsRegression.GetRegressionCurve().ValueAt(keySize) - .5); var baseNString = n.ToString().Substring(0, expectedSigDigits); var dynamicNDouble = double.Parse(BigInteger.Parse(n.ToString().Substring(expectedSigDigits)).ToString()); //Remove this block after testing var nStr = n.ToString(); var totStr = realTotient.ToString(); var actualShared = 0; for (; actualShared < n.ToString().Length; actualShared++) { if (nStr[actualShared] != totStr[actualShared]) { break; } } var target = BigInteger.Parse(realTotient.ToString().Substring(expectedSigDigits)); var interval = data.MinRangeRegression.GetPredictionInterval(dynamicNDouble, .99); var min = interval.LowerBound < 0 ? 0 : new BigInteger(interval.LowerBound); var max = new BigInteger(interval.UpperBound); if (debug) { Console.WriteLine("\n N: " + nStr.Substring(actualShared)); Console.WriteLine("Totient: " + totStr.Substring(actualShared)); Console.WriteLine("Predicted Shared: " + expectedSigDigits); Console.WriteLine("Actual Shared: " + actualShared); Console.WriteLine("Diff: " + double.Parse(BigInteger.Subtract(n, realTotient).ToString())); //Console.WriteLine("Estimated Diff Magnitude: " + mag); //Console.WriteLine(max >= target && target >= min); Console.WriteLine("Range: " + interval); } cudaDeviceProp prop; cuda.GetDeviceProperties(out prop, 0); //if .SetDistrib is not used, the default is .SetDistrib(prop.multiProcessorCount * 16, 128) HybRunner runner = HybRunner.Cuda(); // create a wrapper object to call GPU methods instead of C# dynamic wrapped = runner.Wrap(new CryptoExternal()); wrapped.GuessTotient(); //wrapped.GuessTotient(min,max,target); Console.Out.WriteLine("DONE"); }
static void Main(string[] args) { const int redo = 20; int[] light_net = new int[N * N]; int[] light_cuda = new int[N * N]; #region c# for (int i = 0; i < redo; ++i) { ComputeImage(light_net, false); } #endregion c# HybRunner runner = HybRunner.Cuda("Mandelbrot_CUDA.dll").SetDistrib(32, 32, 16, 16, 1, 0); wrapper = runner.Wrap(new Program()); // profile with nsight to get performance #region cuda for (int i = 0; i < redo; ++i) { ComputeImage(light_cuda, true); } #endregion #region save to image Color[] colors = new Color[maxiter + 1]; for (int k = 0; k < maxiter; ++k) { int red = (int)(127.0F * (float)k / (float)maxiter); int green = (int)(200.0F * (float)k / (float)maxiter); int blue = (int)(90.0F * (float)k / (float)maxiter); colors[k] = Color.FromArgb(red, green, blue); } colors[maxiter] = Color.Black; Bitmap image = new Bitmap(N, N); for (int i = 0; i < N; ++i) { for (int j = 0; j < N; ++j) { int index = i * N + j; image.SetPixel(i, j, colors[light_cuda[index]]); } } image.Save("mandelbrot.png", System.Drawing.Imaging.ImageFormat.Png); #endregion try { Process.Start("mandelbrot.png"); } catch {} // catch exception for non interactives machines }
static void Main(string[] args) { if (args.Length == 0) { args = new string[] { "512", "512", "512", "512" }; } const int redo = 10; int heightA = Convert.ToInt32(args[0]); int widthA = Convert.ToInt32(args[1]); int heightB = Convert.ToInt32(args[2]); int widthB = Convert.ToInt32(args[3]); if (widthA != heightB) { throw new ArgumentException("invalid data -- incompatible matrices"); } Console.WriteLine("Execution Naive matrix mul with sizes ({0}, {1}) x ({2}, {3})", heightA, widthA, heightB, widthB); NaiveMatrix matrixA = new NaiveMatrix(widthA, heightA); NaiveMatrix matrixB = new NaiveMatrix(widthB, heightB); NaiveMatrix res_net = new NaiveMatrix(widthB, heightA); NaiveMatrix res_cuda = new NaiveMatrix(widthB, heightA); double numberCompute = ((double)matrixA.Height * (double)matrixA.Width * (double)matrixB.Width) * 3.0E-9; matrixA.FillMatrix(); matrixB.FillMatrix(); #region CUDA HybRunner runner = HybRunner.Cuda("SharedMatrix_CUDA.dll").SetDistrib(4, 5, 32, 32, 1, 1024 * 2 * 8); dynamic wrapper = runner.Wrap(new Program()); for (int i = 0; i < redo; ++i) { wrapper.Multiply(res_cuda, matrixA, matrixB, matrixA.Width); } #endregion #region C# Reference(res_net, matrixA, matrixB); #endregion Console.Out.WriteLine("DONE"); }
static void Main(string[] args) { HybRunner runner = HybRunner.Cuda().SetDistrib(32, 32, 16, 16, 1, 0); GrayBitmap image = GrayBitmap.Load("../../images/lena512.bmp"); uint height = image.Height, width = image.Width; ushort[] inputPixels = image.PixelsUShort; float[] imageFloat = new float[width * height]; float[] imageCompute = new float[width * height]; for (int i = 0; i < width * height; ++i) { imageFloat[i] = (float)inputPixels[i]; } IntPtr src = runner.Marshaller.MarshalManagedToNative(imageFloat); //bind texture cudaChannelFormatDesc channelDescTex = TextureHelpers.cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKind.cudaChannelFormatKindFloat); cudaArray_t cuArrayTex = TextureHelpers.CreateCudaArray(channelDescTex, src, (int)width, (int)height); cudaResourceDesc resDescTex = TextureHelpers.CreateCudaResourceDesc(cuArrayTex); //create Texture descriptor cudaTextureDesc texDesc = TextureHelpers.CreateCudaTextureDesc(); //create Texture object cudaTextureObject_t texObj; cuda.CreateTextureObject(out texObj, ref resDescTex, ref texDesc); //create and bind surface dynamic wrapper = runner.Wrap(new Program()); wrapper.Sobel(texObj, imageCompute, (int)width, (int)height); ushort[] outputPixel = new ushort[width * height]; for (int i = 0; i < width * height; ++i) { outputPixel[i] = (ushort)imageCompute[i]; } GrayBitmap imageSobel = new GrayBitmap(width, height); imageSobel.PixelsUShort = outputPixel; imageSobel.Save("../../output-03-surface/sobel.bmp"); }
public static void Main() { const int n = 1_000_000; var a = new double[n]; var b = new double[n]; var results = new double[n]; cuda.GetDeviceProperties(out var prop, 0); HybRunner runner = HybRunner.Cuda("DotnetosGPU.Hybridizer_CUDA.dll") .SetDistrib(prop.multiProcessorCount * 16, 256); // create a wrapper object to call GPU methods instead of C# dynamic wrapped = runner.Wrap(new Program()); // run the method on GPU wrapped.Run(n, a, b, results); }
static void Main(string[] args) { int[] a = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 }; // create an instance of HybRunner object to wrap calls on GPU HybRunner runner = HybRunner.Cuda().SetDistrib(4, 4); // create a wrapper object to call GPU methods instead of C# dynamic wrapped = runner.Wrap(new Program()); // run the method on GPU wrapped.Run(a.Length, a); // synchronize the GPU to flush stdout on the device // add error checking cuda.ERROR_CHECK(cuda.DeviceSynchronize()); }
public void Test(string dllname) { // 268 MB allocated on device -- should fit in every CUDA compatible GPU int N = 1024 * 1024 * 16; double[] acuda = new double[N]; double[] adotnet = new double[N]; double[] b = new double[N]; Random rand = new Random(); //Initialize acuda et adotnet and b by some doubles randoms, acuda and adotnet have same numbers. for (int i = 0; i < N; ++i) { acuda[i] = rand.NextDouble(); adotnet[i] = acuda[i]; b[i] = rand.NextDouble(); } cudaDeviceProp prop; cuda.GetDeviceProperties(out prop, 0); HybRunner runner = HybRunner.Cuda(dllname).SetDistrib(prop.multiProcessorCount * 16, 128); // create a wrapper object to call GPU methods instead of C# dynamic wrapped = runner.Wrap(this); // run the method on GPU wrapped.Run(N, acuda, b); // run .Net method Run(N, adotnet, b); // verify the results for (int k = 0; k < N; ++k) { if (acuda[k] != adotnet[k]) { Console.Out.WriteLine("ERROR !"); } } Console.Out.WriteLine("DONE"); //Thread.Sleep(10000); }
static void Main(string[] args) { int[] a = { 1, 2, 3, 4, 5 }; int[] b = { 10, 20, 30, 40, 50 }; cudaDeviceProp prop; cuda.GetDeviceProperties(out prop, 0); //if .SetDistrib is not used, the default is .SetDistrib(prop.multiProcessorCount * 16, 128) HybRunner runner = HybRunner.Cuda(); // create a wrapper object to call GPU methods instead of C# dynamic wrapped = runner.Wrap(new Program()); wrapped.Run(); Console.Out.WriteLine("DONE"); }
static void Main(string[] args) { Bitmap baseImage = (Bitmap)Image.FromFile("lena512.bmp"); const int size = 512; Bitmap resImage = new Bitmap(size, size); byte[,] inputPixels = new byte[size, size]; byte[,] outputPixels = new byte[size, size]; ReadImage(inputPixels, baseImage, size); HybRunner runner = HybRunner.Cuda().SetDistrib(32, 32, 16, 16, 1, 0); dynamic wrapper = runner.Wrap(new Program()); wrapper.ComputeSobel(outputPixels, inputPixels); SaveImage("lena-sobel.bmp", outputPixels, size); try { Process.Start("lena-sobel.bmp"); } catch {} // catch exception for non interactives machines }
static void Main(string[] args) { Bitmap baseImage = (Bitmap)Image.FromFile("lena512.bmp"); const int size = 512; Bitmap resImage = new Bitmap(size, size); byte[,] inputPixels = new byte[size, size]; byte[,] outputPixels = new byte[size, size]; ReadImage(inputPixels, baseImage, size); HybRunner runner = HybRunner.Cuda("Sobel_2D_CUDA.dll").SetDistrib(32, 32, 16, 16, 1, 0); dynamic wrapper = runner.Wrap(new Program()); wrapper.ComputeSobel(outputPixels, inputPixels); SaveImage("lena-sobel.bmp", outputPixels, size); Process.Start("lena-sobel.bmp"); }
static void Main(string[] args) { // open the input image and lock its content for read operations Bitmap baseImage = (Bitmap)Image.FromFile("lena512.bmp"); PixelFormat format = baseImage.PixelFormat; var lockedSource = baseImage.LockBits(new Rectangle(0, 0, baseImage.Width, baseImage.Height), ImageLockMode.ReadOnly, format); IntPtr srcData = lockedSource.Scan0; int imageBytes = baseImage.Width * baseImage.Height; // create a result image with same pixel format (8 bits per pixel) and lock its content for write operations Bitmap resImage = new Bitmap(baseImage.Width, baseImage.Height, format); BitmapData lockedDest = resImage.LockBits(new Rectangle(0, 0, baseImage.Width, baseImage.Height), ImageLockMode.WriteOnly, format); IntPtr destData = lockedDest.Scan0; // pin images memory for cuda cuda.HostRegister(srcData, imageBytes, (uint)cudaHostAllocFlags.cudaHostAllocMapped); cuda.HostRegister(destData, imageBytes, (uint)cudaHostAllocFlags.cudaHostAllocMapped); IntPtr d_input, d_result; cuda.HostGetDevicePointer(out d_input, srcData, cudaGetDevicePointerFlags.cudaReserved); cuda.HostGetDevicePointer(out d_result, destData, cudaGetDevicePointerFlags.cudaReserved); // run the kernel HybRunner runner = HybRunner.Cuda().SetDistrib(32, 32, 16, 16, 1, 0); runner.Wrap(new Program()).ComputeSobel(d_result, d_input, baseImage.Width, baseImage.Height); cuda.DeviceSynchronize(); // unregister pinned memory cuda.HostUnregister(destData); cuda.HostUnregister(srcData); // unlock images resImage.UnlockBits(lockedDest); baseImage.UnlockBits(lockedSource); // and save result resImage.Palette = baseImage.Palette; resImage.Save("lena_sobel.bmp"); try { Process.Start("lena_sobel.bmp"); } catch {} // catch exception for non interactives machines }
static void Main(string[] args) { const int N = 1024; int[] a = new int[N]; for (int i = 0; i < 1024; ++i) { a[i] = i; } int[] result = new int[1]; HybRunner runner = HybRunner.Cuda(); // create a wrapper object to call GPU methods instead of C# dynamic wrapped = runner.Wrap(new Program()); wrapped.Run(N, a, result); cuda.DeviceSynchronize(); Console.Out.WriteLine("sum = {0}", result[0]); }
internal static void RunOnGPU(double[] a, double[] b) { cudaDeviceProp prop; cuda.GetDeviceProperties(out prop, 0); //if .SetDistrib is not used, the default is .SetDistrib(prop.multiProcessorCount * 16, 128) HybRunner runner = HybRunner.Cuda(); // create a wrapper object to call GPU methods instead of C# dynamic wrapped = runner.Wrap(new LoadTest()); Console.Out.WriteLine("\n[" + DateTime.Now.ToLongTimeString() + "] Started heavy load task on GPU."); var executionWatch = Stopwatch.StartNew(); wrapped.Run(a, b); executionWatch.Stop(); Console.Out.WriteLine("[" + DateTime.Now.ToLongTimeString() + "] Finished heavy load task."); double elapsedS = Math.Floor((double)executionWatch.ElapsedMilliseconds / 1000), elapsedMs = executionWatch.ElapsedMilliseconds % 1000; Console.Out.WriteLine("The time elapsed during the load is " + elapsedS + " s " + elapsedMs + " ms."); }
static void Main(string[] args) { Bitmap baseImage = (Bitmap)Image.FromFile("lena512.bmp"); int height = baseImage.Height, width = baseImage.Width; Bitmap resImage = new Bitmap(width, height); byte[] inputPixels = new byte[width * height]; byte[] outputPixels = new byte[width * height]; ReadImage(inputPixels, baseImage, width, height); HybRunner runner = HybRunner.Cuda("Sobel_CUDA.dll").SetDistrib(32, 32, 16, 16, 1, 0); dynamic wrapper = runner.Wrap(new Program()); wrapper.ComputeSobel(outputPixels, inputPixels, width, height, 0, height); SaveImage("lena-sobel.bmp", outputPixels, width, height); try { Process.Start("lena-sobel.bmp"); } catch {} // catch exception for non interactives machines }
static void Main() { // Trouble-shooting // print execution directory Console.Out.WriteLine("Current directory : {0}", Environment.CurrentDirectory); Console.Out.WriteLine("Size of IntPtr = {0}", Marshal.SizeOf(IntPtr.Zero)); // first, make sure file exists string path = @"Troubleshooting_CUDA.dll"; // replace with actual dll name - you can read that on the output of the build if (!File.Exists(path)) { Console.Out.WriteLine("Dll could not be found in path, please verify dll is located in the appropriate directory that LoadLibrary may find it"); Environment.Exit(1); } // make sure it can be loaded -- open DLL in depends to missing troubleshoot dependencies (may be long to load) IntPtr lib = LoadLibrary(path); if (lib == IntPtr.Zero) { int code = Marshal.GetLastWin32Error(); string er = ErrorToString(code); Console.Out.WriteLine("Dll could not be loaded : {0}", er); Environment.Exit(2); } // finally try to get the proc address -- open DLL in depends to see list of symbols (may be long to load) IntPtr procAddress = GetProcAddress(lib, "Hello_Worldx46Hello_ExternCWrapper_CUDA"); if (procAddress == IntPtr.Zero) { int code = Marshal.GetLastWin32Error(); string er = ErrorToString(code); Console.Out.WriteLine("Could not find symbol in dll : {0}", er); Environment.Exit(3); } cuda.DeviceSynchronize(); HybRunner runner = HybRunner.Cuda().SetDistrib(1, 2); runner.Wrap(new Hello_World()).Hello(); }
public Form1() { InitializeComponent(); CUDA.Enabled = hasCUDA; AVX.Enabled = hasAVX; AVX2.Enabled = hasAVX2; AVX512.Enabled = hasAVX512; if (hasCUDA) { DisplayGPUName(); } ManagementObjectSearcher mos = new ManagementObjectSearcher("root\\CIMV2", "SELECT * FROM Win32_Processor"); foreach (ManagementObject mo in mos.Get()) { string cpuName = (string)mo["Name"]; label4.Text = cpuName.Split('@')[0]; } if (hasCUDA) runnerCUDA = HybRunner.Cuda("MandelbrotRenderer_CUDA.dll").SetDistrib(32, 32, 16, 16, 1, 0); if (hasAVX) runnerAVX = HybRunner.AVX("MandelbrotRenderer_AVX.dll").SetDistrib(Environment.ProcessorCount, 32); if (hasAVX2) runnerAVX2 = HybRunner.AVX("MandelbrotRenderer_AVX2.dll").SetDistrib(Environment.ProcessorCount, 32); if (hasAVX512) runnerAVX512 = HybRunner.AVX512("MandelbrotRenderer_AVX512.dll").SetDistrib(Environment.ProcessorCount, 32); if (hasCUDA) MandelbrotCUDA = runnerCUDA.Wrap(new Mandelbrot()); if (hasAVX) MandelbrotAVX = runnerAVX.Wrap(new Mandelbrot()); if (hasAVX2) MandelbrotAVX2 = runnerAVX2.Wrap(new Mandelbrot()); if (hasAVX512) MandelbrotAVX512 = runnerAVX512.Wrap(new Mandelbrot()); this.FormBorderStyle = FormBorderStyle.FixedSingle; this.MaximizeBox = false; this.MinimizeBox = false; image = new Bitmap(W, H, PixelFormat.Format32bppRgb); Rendering.Image = image; render(); Rendering.MouseDown += (s, e) => { ImageOnMouseDown(e); }; Rendering.MouseMove += (s, e)=> { ImageOnMouseMove(e); }; }