Example #1
0
        static void Main(string[] args)
        {
            GrayBitmap image    = GrayBitmap.Load("../../images/lena_highres_greyscale_noise.bmp");
            GrayBitmap denoised = new GrayBitmap(image.Width, image.Height);

            ushort[] input  = image.PixelsUShort;
            ushort[] output = new ushort[image.Width * image.Height];

            Stopwatch watch = new Stopwatch();

            watch.Start();

            int window = 3;

            // create an instance of runner
            HybRunner runner = HybRunner.Cuda();
            // wrap a new instance of Program
            dynamic wrapper = runner.Wrap(new Program());

            // run the method on GPU
            wrapper.ParForGPU(output, input, (int)image.Width, (int)image.Height, window);

            watch.Stop();
            string time = String.Format("{0:0.00}", watch.ElapsedMilliseconds * 1.0E-3);

            Console.WriteLine($"Naive GPU time : {time}");
            denoised.PixelsUShort = output;
            denoised.Save("../../output-03-naive-gpu/denoised.bmp");
        }
Example #2
0
        static void Main(string[] args)
        {
            SparseMatrix A = SparseMatrix.Laplacian_1D(10000000);

            float[] X = VectorReader.GetSplatVector(10000000, 1.0F);

            int    redo = 2;
            double memoryOperationsSize = (double)redo * (3.0 * (double)(A.data.Length * sizeof(float)) + (double)(2 * A.rows.Length * sizeof(uint)) + (double)(A.indices.Length * sizeof(uint)));

            Console.WriteLine("matrix read --- starting computations");

            float[] B = new float[A.rows.Length - 1];

            #region CUDA
            cudaDeviceProp prop;
            cuda.GetDeviceProperties(out prop, 0);

            HybRunner runner  = HybRunner.Cuda("SparseMatrix_CUDA.dll").SetDistrib(8 * prop.multiProcessorCount, 256);
            dynamic   wrapper = runner.Wrap(new Program());

            for (int i = 0; i < redo; ++i)
            {
                wrapper.Multiply(B, A, X, X.Length);
            }
            #endregion
        }
Example #3
0
        static void Main(string[] args)
        {
            // configure CUDA
            cudaDeviceProp prop;

            cuda.GetDeviceProperties(out prop, 0);
            const int BLOCK_DIM = 256;

            runner  = HybRunner.Cuda().SetDistrib(16 * prop.multiProcessorCount, 1, BLOCK_DIM, 1, 1, BLOCK_DIM * sizeof(float));
            wrapper = runner.Wrap(new Program());

            int                size = 1000000; // very slow convergence with no preconditioner
            SparseMatrix       A    = SparseMatrix.Laplacian_1D(size);
            FloatResidentArray B    = new FloatResidentArray(size);
            FloatResidentArray X    = new FloatResidentArray(size);

            int   maxiter = 1000;
            float eps     = 1.0e-09f;

            for (int i = 0; i < size; ++i)
            {
                B[i] = 1.0f; // right side
                X[i] = 0.0f; // starting point
            }

            ConjugateGradient(X, A, B, maxiter, eps);
        }
        static void Main(string[] args)
        {
            GrayBitmap image    = GrayBitmap.Load("../../images/lena_highres_greyscale_noise.bmp");
            GrayBitmap denoised = new GrayBitmap(image.Width, image.Height);

            ushort[] input  = image.PixelsUShort;
            ushort[] output = new ushort[image.Width * image.Height];

            Stopwatch watch = new Stopwatch();

            watch.Start();

            dim3 grid  = new dim3(< gridX >, <gridY>, 1);
            dim3 block = new dim3(< blockX >, <blockY>, 1);

            // create an instance of runner
            HybRunner runner = HybRunner.Cuda();
            // wrap a new instance of Program
            dynamic wrapper = runner.Wrap(new Filter());

            // run the method on GPU
            wrapper.SetDistrib(grid, block).ParForGPU(output, input, (int)image.Width, (int)image.Height);

            watch.Stop();
            string time = String.Format("{0:0.00}", watch.ElapsedMilliseconds * 1.0E-3);

            Console.WriteLine($"Parallel2D GPU time : {time}");
            denoised.PixelsUShort = output;
            denoised.Save("../../output-05-dice-gpu/denoised.bmp");
        }
Example #5
0
        static void Main(string[] args)
        {
            Random    random = new Random();
            const int N      = 1024 * 1024 * 32;

            int[] a = new int[N];
            for (int i = 0; i < N; ++i)
            {
                a[i] = (random.NextDouble() < 0.2) ? 1 : 0;
            }

            int[] result = new int[1];

            cudaDeviceProp prop;

            cuda.GetDeviceProperties(out prop, 0);

            const int BLOCK_DIM = 256;
            HybRunner runner    = HybRunner.Cuda().SetDistrib(16 * prop.multiProcessorCount, 1, BLOCK_DIM, 1, 1, BLOCK_DIM * sizeof(int));

            dynamic wrapped = runner.Wrap(new Program());

            wrapped.ReduceAdd(N, a, result);

            cuda.DeviceSynchronize();
            Console.Out.WriteLine("sum =      {0}", result[0]);
            Console.Out.WriteLine("expected = {0}", a.Aggregate((i, j) => i + j));
        }
Example #6
0
        static void Main(string[] args)
        {
            HybRunner runner = HybRunner.Cuda().SetDistrib(32, 32, 16, 16, 1, 0);

            GrayBitmap image = GrayBitmap.Load("../../images/lena512.bmp");
            uint       height = image.Height, width = image.Width;

            ushort[] inputPixels = image.PixelsUShort;

            float[] imageFloat   = new float[width * height];
            float[] imageCompute = new float[width * height];
            for (int i = 0; i < width * height; ++i)
            {
                imageFloat[i] = (float)inputPixels[i];
            }

            dynamic wrapper = runner.Wrap(new Program());

            wrapper.Sobel(imageFloat, imageCompute, (int)width, (int)height);

            ushort[] outputPixel = new ushort[width * height];
            for (int i = 0; i < width * height; ++i)
            {
                outputPixel[i] = (ushort)imageCompute[i];
            }

            GrayBitmap imageSobel = new GrayBitmap(width, height);

            imageSobel.PixelsUShort = outputPixel;
            imageSobel.Save("../../output-01-gpu/sobel.bmp");
        }
Example #7
0
        static void Main(string[] args)
        {
            //open the input image and lock the image
            Bitmap baseImage = (Bitmap)Image.FromFile("../../images/lena_highres_greyscale.bmp");
            int    height = baseImage.Height, width = baseImage.Width;

            //create result image and lock
            Bitmap resImage = new Bitmap(width, height);

            //take pointer from locked memory

            byte[] inputPixels  = new byte[width * height];
            byte[] outputPixels = new byte[width * height];

            ReadImage(inputPixels, baseImage, width, height);

            // pin images memory for cuda

            HybRunner runner  = HybRunner.Cuda().SetDistrib(32, 32, 16, 16, 1, 0);
            dynamic   wrapper = runner.Wrap(new Program());

            wrapper.ComputeSobel(outputPixels, inputPixels, width, height);

            // unregister pinned memory and unlock images
            SaveImage("lena_highres_sobel.bmp", outputPixels, width, height);
            try { Process.Start("lena_highres_sobel.bmp"); } catch {}            // catch exception for non interactives machines
        }
Example #8
0
        static void Main(string[] args)
        {
            const int N = 1024 * 1024 * 32;

            float[] a = new float[N];

            // initialization
            Random random = new Random(42);

            Parallel.For(0, N, i => a[i] = (float)random.NextDouble());

            // hybridizer configuration
            cudaDeviceProp prop;

            cuda.GetDeviceProperties(out prop, 0);
            int gridDimX  = 16 * prop.multiProcessorCount;
            int blockDimX = 256;

            cuda.DeviceSetCacheConfig(cudaFuncCache.cudaFuncCachePreferShared);
            HybRunner runner = HybRunner.Cuda().SetDistrib(gridDimX, 1, blockDimX, 1, 1, blockDimX * sizeof(float));

            float[] buffMax     = new float[1];
            float[] buffAdd     = new float[1];
            var     maxReductor = new GridReductor <MaxReductor>();
            var     addReductor = new GridReductor <AddReductor>();
            dynamic wrapped     = runner.Wrap(new EntryPoints());

            // device reduction
            wrapped.ReduceMax(maxReductor, buffMax, a, N);
            wrapped.ReduceAdd(addReductor, buffAdd, a, N);
            cuda.ERROR_CHECK(cuda.DeviceSynchronize());

            // check results
            float expectedMax = a.AsParallel().Aggregate((x, y) => Math.Max(x, y));
            float expectedAdd = a.AsParallel().Aggregate((x, y) => x + y);
            bool  hasError    = false;

            if (buffMax[0] != expectedMax)
            {
                Console.Error.WriteLine($"MAX Error : {buffMax[0]} != {expectedMax}");
                hasError = true;
            }

            // addition is not associative, so results cannot be exactly the same
            // https://en.wikipedia.org/wiki/Associative_property#Nonassociativity_of_floating_point_calculation
            if (Math.Abs(buffAdd[0] - expectedAdd) / expectedAdd > 1.0E-5F)
            {
                Console.Error.WriteLine($"ADD Error : {buffAdd[0]} != {expectedAdd}");
                hasError = true;
            }

            if (hasError)
            {
                Environment.Exit(1);
            }

            Console.Out.WriteLine("OK");
        }
 public Program() : base(1024, 1024, GraphicsMode.Default, "Hybridizer Mandelbulb", GameWindowFlags.Default)
 {
     WindowBorder = WindowBorder.Fixed; // disable resize
     Init();
     runner  = HybRunner.Cuda().SetDistrib(32, 32, 16, 16, 1, 0);
     wrapped = runner.Wrap(new Mandelbulb());
     cuda.ERROR_CHECK(cuda.GetLastError());
     cuda.ERROR_CHECK(cuda.DeviceSynchronize());
 }
Example #10
0
        static void Main(string[] args)
        {
            if (args.Length == 0)
            {
                args = new string[] { "512", "512", "512", "512" };
            }
            const int redo = 10;

            int heightA = Convert.ToInt32(args[0]);
            int widthA  = Convert.ToInt32(args[1]);
            int heightB = Convert.ToInt32(args[2]);
            int widthB  = Convert.ToInt32(args[3]);

            if (widthA != heightB)
            {
                throw new ArgumentException("invalid data -- incompatible matrices");
            }

            Console.WriteLine("Execution Naive matrix mul with sizes ({0}, {1}) x ({2}, {3})", heightA, widthA, heightB, widthB);

            NaiveMatrix matrixA = new NaiveMatrix(widthA, heightA);
            NaiveMatrix matrixB = new NaiveMatrix(widthB, heightB);

            NaiveMatrix res_net  = new NaiveMatrix(widthB, heightA);
            NaiveMatrix res_cuda = new NaiveMatrix(widthB, heightA);

            double numberCompute = ((double)matrixA.Height * (double)matrixA.Width * (double)matrixB.Width) * 3.0E-9;

            matrixA.FillMatrix();
            matrixB.FillMatrix();

            Random rand = new Random();

            #region CUDA

            HybRunner runner  = HybRunner.Cuda().SetDistrib(4, 5, 8, 32, 32, 0);
            dynamic   wrapper = runner.Wrap(new Program());

            for (int i = 0; i < redo; ++i)
            {
                wrapper.ComputeRowsOfProduct(res_cuda, matrixA, matrixB, 0, res_cuda.Height);
            }
            #endregion

            #region C#

            for (int i = 0; i < redo; ++i)
            {
                Parallel.For(0, res_net.Height, (line) =>
                {
                    ComputeRowsOfProduct(res_net, matrixA, matrixB, line, line + 1);
                });
            }
            #endregion

            Console.Out.WriteLine("DONE");
        }
Example #11
0
        static void Main(string[] args)
        {
            int currentDevice;

            cuda.GetDevice(out currentDevice);
            cudaDeviceProp prop;

            cuda.GetDeviceProperties(out prop, currentDevice);

            GrayBitmap image    = GrayBitmap.Load("../../images/lena_highres_greyscale_noise.bmp");
            GrayBitmap denoised = new GrayBitmap(image.Width, image.Height);

            ushort[] input  = image.PixelsUShort;
            ushort[] output = new ushort[image.Width * image.Height];

            Stopwatch watch = new Stopwatch();

            watch.Start();

            int chunk;

            if ((prop.major >= 6) && (prop.minor == 0))
            {
                chunk = ((int)image.Height + (prop.multiProcessorCount / 2) - 1) / (prop.multiProcessorCount / 2);
            }
            else
            {
                chunk = ((int)image.Height + (prop.multiProcessorCount) - 1) / (prop.multiProcessorCount);
            }

            Console.Out.WriteLine("Chunk size = {0}", chunk);

            dim3 grid  = new dim3(16, ((int)image.Height + chunk - 1) / chunk, 1);
            dim3 block = new dim3(128, 1, 1);

            // create an instance of runner
            HybRunner runner = HybRunner.Cuda();
            // wrap a new instance of Program
            dynamic wrapper = runner.Wrap(new Filter());

            // run the method on GPU
            wrapper.SetDistrib(grid, block).ParForGPU(output, input, (int)image.Width, (int)image.Height, chunk);
            cuda.DeviceSynchronize();

            watch.Stop();
            string time = String.Format("{0:0.00}", watch.ElapsedMilliseconds * 1.0E-3);

            string kernelTime = String.Format("{0:0.00}", runner.LastKernelDuration.ElapsedMilliseconds * 1.0E-3);

            Console.WriteLine($"SweepSort GPU time : {time}");
            Console.WriteLine($"SweepSort GPU -- kernel time : {kernelTime}");
            denoised.PixelsUShort = output;
            denoised.Save("../../output-07-cache-aware-gpu/denoised.bmp");

            cuda.DeviceReset();
        }
Example #12
0
        static void Main(string[] args)
        {
            float4[] callResult_net   = new float4[OPT_N / 4];
            float4[] putResult_net    = new float4[OPT_N / 4];
            float4[] stockPrice_net   = new float4[OPT_N / 4];
            float4[] optionStrike_net = new float4[OPT_N / 4];
            float4[] optionYears_net  = new float4[OPT_N / 4];

            float4[] callResult_cuda = new float4[OPT_N / 4];
            float4[] putResult_cuda  = new float4[OPT_N / 4];

            Random rand = new Random(Guid.NewGuid().GetHashCode());

            for (int i = 0; i < OPT_N / 4; ++i)
            {
                callResult_net[i]   = new float4(0.0f, 0.0f, 0.0f, 0.0f);
                putResult_net[i]    = new float4(-1.0f, -1.0f, -1.0f, -1.0f);
                callResult_cuda[i]  = new float4(0.0f, 0.0f, 0.0f, 0.0f);
                putResult_cuda[i]   = new float4(-1.0f, -1.0f, -1.0f, -1.0f);
                stockPrice_net[i]   = rand.NextFloat4(5.0f, 30.0f);
                optionStrike_net[i] = rand.NextFloat4(1.0f, 100.0f);
                optionYears_net[i]  = rand.NextFloat4(0.25f, 10f);
            }

            cudaDeviceProp prop;

            cuda.GetDeviceProperties(out prop, 0);
            HybRunner runner  = HybRunner.Cuda("BlackScholesFloat4_CUDA.dll").SetDistrib(8 * prop.multiProcessorCount, 256);
            dynamic   wrapper = runner.Wrap(new Program());

            for (int i = 0; i < NUM_ITERATIONS; ++i)
            {
                wrapper.BlackScholes(callResult_cuda,
                                     putResult_cuda,
                                     stockPrice_net,
                                     optionStrike_net,
                                     optionYears_net,
                                     0, OPT_N / 4);
            }
            for (int i = 0; i < NUM_ITERATIONS; ++i)
            {
                Parallel.For(0, OPT_N / 4, (opt) =>
                {
                    BlackScholes(callResult_net,
                                 putResult_net,
                                 stockPrice_net,
                                 optionStrike_net,
                                 optionYears_net,
                                 opt,
                                 opt + 1);
                });
            }

            WriteCalculationError(callResult_net, callResult_cuda, putResult_net, putResult_cuda);
        }
Example #13
0
        static void Main(string[] args)
        {
            int[] a = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 };

            // create an instance of HybRunner object to wrap calls on GPU
            HybRunner runner = HybRunner.Cuda().SetDistrib(4, 4);

            // create a wrapper object to call GPU methods instead of C#
            dynamic wrapped = runner.Wrap(new Program());

            // run the method on GPU
            wrapped.Run(a.Length, a);
        }
Example #14
0
        public void ParallelCalculateHeatMap_Cuda(string dllName, double[,] heatMap, int width, int height, float widthTerrain, float heightTerrain, float destinationX, float destinationY)
        {
            cudaDeviceProp prop;

            cuda.GetDeviceProperties(out prop, 0);
            HybRunner runner = HybRunner.Cuda(dllName).SetDistrib(prop.multiProcessorCount * 16, 128);

            // create a wrapper object to call GPU methods instead of C#
            dynamic wrapped = runner.Wrap(this);

            // run the method on GPU
            wrapped.ParallelCalculateHeatMap(heatMap, width, height, widthTerrain, heightTerrain, destinationX, destinationY);
        }
Example #15
0
        public static void Run(Stats data, BigInteger n, int keySize, BigInteger e, BigInteger realTotient, bool debug)
        {
            var expectedSigDigits = (int)Math.Round(data.SigDigitsRegression.GetRegressionCurve().ValueAt(keySize) - .5);

            var baseNString    = n.ToString().Substring(0, expectedSigDigits);
            var dynamicNDouble = double.Parse(BigInteger.Parse(n.ToString().Substring(expectedSigDigits)).ToString());

            //Remove this block after testing
            var nStr         = n.ToString();
            var totStr       = realTotient.ToString();
            var actualShared = 0;

            for (; actualShared < n.ToString().Length; actualShared++)
            {
                if (nStr[actualShared] != totStr[actualShared])
                {
                    break;
                }
            }
            var target = BigInteger.Parse(realTotient.ToString().Substring(expectedSigDigits));

            var interval = data.MinRangeRegression.GetPredictionInterval(dynamicNDouble, .99);
            var min      = interval.LowerBound < 0 ? 0 : new BigInteger(interval.LowerBound);
            var max      = new BigInteger(interval.UpperBound);

            if (debug)
            {
                Console.WriteLine("\n      N: " + nStr.Substring(actualShared));
                Console.WriteLine("Totient: " + totStr.Substring(actualShared));
                Console.WriteLine("Predicted Shared: " + expectedSigDigits);
                Console.WriteLine("Actual Shared: " + actualShared);
                Console.WriteLine("Diff: " + double.Parse(BigInteger.Subtract(n, realTotient).ToString()));
                //Console.WriteLine("Estimated Diff Magnitude: " + mag);
                //Console.WriteLine(max >= target && target >= min);
                Console.WriteLine("Range: " + interval);
            }

            cudaDeviceProp prop;

            cuda.GetDeviceProperties(out prop, 0);
            //if .SetDistrib is not used, the default is .SetDistrib(prop.multiProcessorCount * 16, 128)
            HybRunner runner = HybRunner.Cuda();

            // create a wrapper object to call GPU methods instead of C#
            dynamic wrapped = runner.Wrap(new CryptoExternal());

            wrapped.GuessTotient();
            //wrapped.GuessTotient(min,max,target);

            Console.Out.WriteLine("DONE");
        }
Example #16
0
        static void Main(string[] args)
        {
            const int redo = 20;

            int[] light_net  = new int[N * N];
            int[] light_cuda = new int[N * N];

            #region c#
            for (int i = 0; i < redo; ++i)
            {
                ComputeImage(light_net, false);
            }
            #endregion c#

            HybRunner runner = HybRunner.Cuda("Mandelbrot_CUDA.dll").SetDistrib(32, 32, 16, 16, 1, 0);
            wrapper = runner.Wrap(new Program());
            // profile with nsight to get performance
            #region cuda
            for (int i = 0; i < redo; ++i)
            {
                ComputeImage(light_cuda, true);
            }
            #endregion

            #region save to image
            Color[] colors = new Color[maxiter + 1];

            for (int k = 0; k < maxiter; ++k)
            {
                int red   = (int)(127.0F * (float)k / (float)maxiter);
                int green = (int)(200.0F * (float)k / (float)maxiter);
                int blue  = (int)(90.0F * (float)k / (float)maxiter);
                colors[k] = Color.FromArgb(red, green, blue);
            }
            colors[maxiter] = Color.Black;

            Bitmap image = new Bitmap(N, N);
            for (int i = 0; i < N; ++i)
            {
                for (int j = 0; j < N; ++j)
                {
                    int index = i * N + j;
                    image.SetPixel(i, j, colors[light_cuda[index]]);
                }
            }

            image.Save("mandelbrot.png", System.Drawing.Imaging.ImageFormat.Png);
            #endregion

            try { Process.Start("mandelbrot.png"); } catch {}            // catch exception for non interactives machines
        }
Example #17
0
        static void Main(string[] args)
        {
            if (args.Length == 0)
            {
                args = new string[] { "512", "512", "512", "512" };
            }
            const int redo = 10;

            int heightA = Convert.ToInt32(args[0]);
            int widthA  = Convert.ToInt32(args[1]);
            int heightB = Convert.ToInt32(args[2]);
            int widthB  = Convert.ToInt32(args[3]);

            if (widthA != heightB)
            {
                throw new ArgumentException("invalid data -- incompatible matrices");
            }

            Console.WriteLine("Execution Naive matrix mul with sizes ({0}, {1}) x ({2}, {3})", heightA, widthA, heightB, widthB);

            NaiveMatrix matrixA = new NaiveMatrix(widthA, heightA);
            NaiveMatrix matrixB = new NaiveMatrix(widthB, heightB);

            NaiveMatrix res_net  = new NaiveMatrix(widthB, heightA);
            NaiveMatrix res_cuda = new NaiveMatrix(widthB, heightA);

            double numberCompute = ((double)matrixA.Height * (double)matrixA.Width * (double)matrixB.Width) * 3.0E-9;

            matrixA.FillMatrix();
            matrixB.FillMatrix();

            #region CUDA

            HybRunner runner  = HybRunner.Cuda("SharedMatrix_CUDA.dll").SetDistrib(4, 5, 32, 32, 1, 1024 * 2 * 8);
            dynamic   wrapper = runner.Wrap(new Program());

            for (int i = 0; i < redo; ++i)
            {
                wrapper.Multiply(res_cuda, matrixA, matrixB, matrixA.Width);
            }
            #endregion

            #region C#
            Reference(res_net, matrixA, matrixB);
            #endregion

            Console.Out.WriteLine("DONE");
        }
        static void Main(string[] args)
        {
            HybRunner runner = HybRunner.Cuda().SetDistrib(32, 32, 16, 16, 1, 0);

            GrayBitmap image = GrayBitmap.Load("../../images/lena512.bmp");
            uint       height = image.Height, width = image.Width;

            ushort[] inputPixels = image.PixelsUShort;

            float[] imageFloat   = new float[width * height];
            float[] imageCompute = new float[width * height];
            for (int i = 0; i < width * height; ++i)
            {
                imageFloat[i] = (float)inputPixels[i];
            }

            IntPtr src = runner.Marshaller.MarshalManagedToNative(imageFloat);

            //bind texture
            cudaChannelFormatDesc channelDescTex = TextureHelpers.cudaCreateChannelDesc(32, 0, 0, 0, cudaChannelFormatKind.cudaChannelFormatKindFloat);
            cudaArray_t           cuArrayTex     = TextureHelpers.CreateCudaArray(channelDescTex, src, (int)width, (int)height);
            cudaResourceDesc      resDescTex     = TextureHelpers.CreateCudaResourceDesc(cuArrayTex);

            //create Texture descriptor
            cudaTextureDesc texDesc = TextureHelpers.CreateCudaTextureDesc();

            //create Texture object
            cudaTextureObject_t texObj;

            cuda.CreateTextureObject(out texObj, ref resDescTex, ref texDesc);

            //create and bind surface

            dynamic wrapper = runner.Wrap(new Program());

            wrapper.Sobel(texObj, imageCompute, (int)width, (int)height);

            ushort[] outputPixel = new ushort[width * height];
            for (int i = 0; i < width * height; ++i)
            {
                outputPixel[i] = (ushort)imageCompute[i];
            }

            GrayBitmap imageSobel = new GrayBitmap(width, height);

            imageSobel.PixelsUShort = outputPixel;
            imageSobel.Save("../../output-03-surface/sobel.bmp");
        }
Example #19
0
        public static void Main()
        {
            const int n       = 1_000_000;
            var       a       = new double[n];
            var       b       = new double[n];
            var       results = new double[n];

            cuda.GetDeviceProperties(out var prop, 0);
            HybRunner runner = HybRunner.Cuda("DotnetosGPU.Hybridizer_CUDA.dll")
                               .SetDistrib(prop.multiProcessorCount * 16, 256);

            // create a wrapper object to call GPU methods instead of C#
            dynamic wrapped = runner.Wrap(new Program());

            // run the method on GPU
            wrapped.Run(n, a, b, results);
        }
        static void Main(string[] args)
        {
            int[] a = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13 };

            // create an instance of HybRunner object to wrap calls on GPU
            HybRunner runner = HybRunner.Cuda().SetDistrib(4, 4);

            // create a wrapper object to call GPU methods instead of C#
            dynamic wrapped = runner.Wrap(new Program());

            // run the method on GPU
            wrapped.Run(a.Length, a);

            // synchronize the GPU to flush stdout on the device
            // add error checking
            cuda.ERROR_CHECK(cuda.DeviceSynchronize());
        }
Example #21
0
        public void Test(string dllname)
        {
            // 268 MB allocated on device -- should fit in every CUDA compatible GPU
            int N = 1024 * 1024 * 16;

            double[] acuda   = new double[N];
            double[] adotnet = new double[N];

            double[] b = new double[N];

            Random rand = new Random();

            //Initialize acuda et adotnet and b by some doubles randoms, acuda and adotnet have same numbers.
            for (int i = 0; i < N; ++i)
            {
                acuda[i]   = rand.NextDouble();
                adotnet[i] = acuda[i];
                b[i]       = rand.NextDouble();
            }

            cudaDeviceProp prop;

            cuda.GetDeviceProperties(out prop, 0);
            HybRunner runner = HybRunner.Cuda(dllname).SetDistrib(prop.multiProcessorCount * 16, 128);

            // create a wrapper object to call GPU methods instead of C#
            dynamic wrapped = runner.Wrap(this);

            // run the method on GPU
            wrapped.Run(N, acuda, b);

            // run .Net method
            Run(N, adotnet, b);

            // verify the results
            for (int k = 0; k < N; ++k)
            {
                if (acuda[k] != adotnet[k])
                {
                    Console.Out.WriteLine("ERROR !");
                }
            }
            Console.Out.WriteLine("DONE");
            //Thread.Sleep(10000);
        }
Example #22
0
        static void Main(string[] args)
        {
            int[] a = { 1, 2, 3, 4, 5 };
            int[] b = { 10, 20, 30, 40, 50 };

            cudaDeviceProp prop;

            cuda.GetDeviceProperties(out prop, 0);
            //if .SetDistrib is not used, the default is .SetDistrib(prop.multiProcessorCount * 16, 128)
            HybRunner runner = HybRunner.Cuda();

            // create a wrapper object to call GPU methods instead of C#
            dynamic wrapped = runner.Wrap(new Program());

            wrapped.Run();

            Console.Out.WriteLine("DONE");
        }
Example #23
0
        static void Main(string[] args)
        {
            Bitmap    baseImage = (Bitmap)Image.FromFile("lena512.bmp");
            const int size      = 512;

            Bitmap resImage = new Bitmap(size, size);

            byte[,] inputPixels  = new byte[size, size];
            byte[,] outputPixels = new byte[size, size];

            ReadImage(inputPixels, baseImage, size);

            HybRunner runner  = HybRunner.Cuda().SetDistrib(32, 32, 16, 16, 1, 0);
            dynamic   wrapper = runner.Wrap(new Program());

            wrapper.ComputeSobel(outputPixels, inputPixels);

            SaveImage("lena-sobel.bmp", outputPixels, size);
            try { Process.Start("lena-sobel.bmp"); } catch {}            // catch exception for non interactives machines
        }
        static void Main(string[] args)
        {
            Bitmap    baseImage = (Bitmap)Image.FromFile("lena512.bmp");
            const int size      = 512;

            Bitmap resImage = new Bitmap(size, size);

            byte[,] inputPixels  = new byte[size, size];
            byte[,] outputPixels = new byte[size, size];

            ReadImage(inputPixels, baseImage, size);

            HybRunner runner  = HybRunner.Cuda("Sobel_2D_CUDA.dll").SetDistrib(32, 32, 16, 16, 1, 0);
            dynamic   wrapper = runner.Wrap(new Program());

            wrapper.ComputeSobel(outputPixels, inputPixels);

            SaveImage("lena-sobel.bmp", outputPixels, size);
            Process.Start("lena-sobel.bmp");
        }
        static void Main(string[] args)
        {
            // open the input image and lock its content for read operations
            Bitmap      baseImage    = (Bitmap)Image.FromFile("lena512.bmp");
            PixelFormat format       = baseImage.PixelFormat;
            var         lockedSource = baseImage.LockBits(new Rectangle(0, 0, baseImage.Width, baseImage.Height), ImageLockMode.ReadOnly, format);
            IntPtr      srcData      = lockedSource.Scan0;
            int         imageBytes   = baseImage.Width * baseImage.Height;

            // create a result image with same pixel format (8 bits per pixel) and lock its content for write operations
            Bitmap     resImage   = new Bitmap(baseImage.Width, baseImage.Height, format);
            BitmapData lockedDest = resImage.LockBits(new Rectangle(0, 0, baseImage.Width, baseImage.Height), ImageLockMode.WriteOnly, format);
            IntPtr     destData   = lockedDest.Scan0;

            // pin images memory for cuda
            cuda.HostRegister(srcData, imageBytes, (uint)cudaHostAllocFlags.cudaHostAllocMapped);
            cuda.HostRegister(destData, imageBytes, (uint)cudaHostAllocFlags.cudaHostAllocMapped);
            IntPtr d_input, d_result;

            cuda.HostGetDevicePointer(out d_input, srcData, cudaGetDevicePointerFlags.cudaReserved);
            cuda.HostGetDevicePointer(out d_result, destData, cudaGetDevicePointerFlags.cudaReserved);

            // run the kernel
            HybRunner runner = HybRunner.Cuda().SetDistrib(32, 32, 16, 16, 1, 0);

            runner.Wrap(new Program()).ComputeSobel(d_result, d_input, baseImage.Width, baseImage.Height);
            cuda.DeviceSynchronize();

            // unregister pinned memory
            cuda.HostUnregister(destData);
            cuda.HostUnregister(srcData);

            // unlock images
            resImage.UnlockBits(lockedDest);
            baseImage.UnlockBits(lockedSource);

            // and save result
            resImage.Palette = baseImage.Palette;
            resImage.Save("lena_sobel.bmp");
            try { Process.Start("lena_sobel.bmp"); } catch {}            // catch exception for non interactives machines
        }
Example #26
0
        static void Main(string[] args)
        {
            const int N = 1024;

            int[] a = new int[N];
            for (int i = 0; i < 1024; ++i)
            {
                a[i] = i;
            }

            int[] result = new int[1];

            HybRunner runner = HybRunner.Cuda();

            // create a wrapper object to call GPU methods instead of C#
            dynamic wrapped = runner.Wrap(new Program());

            wrapped.Run(N, a, result);
            cuda.DeviceSynchronize();
            Console.Out.WriteLine("sum = {0}", result[0]);
        }
        internal static void RunOnGPU(double[] a, double[] b)
        {
            cudaDeviceProp prop;

            cuda.GetDeviceProperties(out prop, 0);
            //if .SetDistrib is not used, the default is .SetDistrib(prop.multiProcessorCount * 16, 128)
            HybRunner runner = HybRunner.Cuda();
            // create a wrapper object to call GPU methods instead of C#
            dynamic wrapped = runner.Wrap(new LoadTest());

            Console.Out.WriteLine("\n[" + DateTime.Now.ToLongTimeString() + "] Started heavy load task on GPU.");
            var executionWatch = Stopwatch.StartNew();

            wrapped.Run(a, b);

            executionWatch.Stop();
            Console.Out.WriteLine("[" + DateTime.Now.ToLongTimeString() + "] Finished heavy load task.");
            double elapsedS = Math.Floor((double)executionWatch.ElapsedMilliseconds / 1000), elapsedMs = executionWatch.ElapsedMilliseconds % 1000;

            Console.Out.WriteLine("The time elapsed during the load is " + elapsedS + " s " + elapsedMs + " ms.");
        }
Example #28
0
        static void Main(string[] args)
        {
            Bitmap baseImage = (Bitmap)Image.FromFile("lena512.bmp");
            int    height = baseImage.Height, width = baseImage.Width;

            Bitmap resImage = new Bitmap(width, height);

            byte[] inputPixels  = new byte[width * height];
            byte[] outputPixels = new byte[width * height];

            ReadImage(inputPixels, baseImage, width, height);

            HybRunner runner  = HybRunner.Cuda("Sobel_CUDA.dll").SetDistrib(32, 32, 16, 16, 1, 0);
            dynamic   wrapper = runner.Wrap(new Program());

            wrapper.ComputeSobel(outputPixels, inputPixels, width, height, 0, height);


            SaveImage("lena-sobel.bmp", outputPixels, width, height);
            try { Process.Start("lena-sobel.bmp"); } catch {}            // catch exception for non interactives machines
        }
Example #29
0
     static void Main()
     {
         // Trouble-shooting
 
         // print execution directory
         Console.Out.WriteLine("Current directory : {0}", Environment.CurrentDirectory);
         Console.Out.WriteLine("Size of IntPtr = {0}", Marshal.SizeOf(IntPtr.Zero));
 
         // first, make sure file exists
         string path = @"Troubleshooting_CUDA.dll"; // replace with actual dll name - you can read that on the output of the build
         if (!File.Exists(path))
         {
             Console.Out.WriteLine("Dll could not be found in path, please verify dll is located in the appropriate directory that LoadLibrary may find it");
             Environment.Exit(1);
         }
 
         // make sure it can be loaded -- open DLL in depends to missing troubleshoot dependencies (may be long to load)
         IntPtr lib = LoadLibrary(path);
         if (lib == IntPtr.Zero)
         {
             int code = Marshal.GetLastWin32Error();
             string er = ErrorToString(code);
             Console.Out.WriteLine("Dll could not be loaded : {0}", er);
             Environment.Exit(2);
         }
 
         // finally try to get the proc address -- open DLL in depends to see list of symbols (may be long to load)
         IntPtr procAddress = GetProcAddress(lib, "Hello_Worldx46Hello_ExternCWrapper_CUDA");
         if (procAddress == IntPtr.Zero)
         {
             int code = Marshal.GetLastWin32Error();
             string er = ErrorToString(code);
             Console.Out.WriteLine("Could not find symbol in dll : {0}", er);
             Environment.Exit(3);
         }
 
         cuda.DeviceSynchronize();
         HybRunner runner = HybRunner.Cuda().SetDistrib(1, 2);
         runner.Wrap(new Hello_World()).Hello();
     }
Example #30
0
        public Form1()
        {
            InitializeComponent();
            CUDA.Enabled = hasCUDA;
            AVX.Enabled = hasAVX;
            AVX2.Enabled = hasAVX2;
            AVX512.Enabled = hasAVX512;

            if (hasCUDA)
            {
                DisplayGPUName();
            }

            ManagementObjectSearcher mos = new ManagementObjectSearcher("root\\CIMV2", "SELECT * FROM Win32_Processor");
            foreach (ManagementObject mo in mos.Get())
            {
                string cpuName = (string)mo["Name"];
                label4.Text = cpuName.Split('@')[0];
            }

            if (hasCUDA) runnerCUDA = HybRunner.Cuda("MandelbrotRenderer_CUDA.dll").SetDistrib(32, 32, 16, 16, 1, 0);
            if (hasAVX) runnerAVX = HybRunner.AVX("MandelbrotRenderer_AVX.dll").SetDistrib(Environment.ProcessorCount, 32);
            if (hasAVX2) runnerAVX2 = HybRunner.AVX("MandelbrotRenderer_AVX2.dll").SetDistrib(Environment.ProcessorCount, 32);
            if (hasAVX512) runnerAVX512 = HybRunner.AVX512("MandelbrotRenderer_AVX512.dll").SetDistrib(Environment.ProcessorCount, 32);

            if (hasCUDA) MandelbrotCUDA = runnerCUDA.Wrap(new Mandelbrot());
            if (hasAVX) MandelbrotAVX = runnerAVX.Wrap(new Mandelbrot());
            if (hasAVX2) MandelbrotAVX2 = runnerAVX2.Wrap(new Mandelbrot());
            if (hasAVX512) MandelbrotAVX512 = runnerAVX512.Wrap(new Mandelbrot());

            this.FormBorderStyle = FormBorderStyle.FixedSingle;
            this.MaximizeBox = false;
            this.MinimizeBox = false;
            image = new Bitmap(W, H, PixelFormat.Format32bppRgb);
            Rendering.Image = image;
            render();
            Rendering.MouseDown += (s, e) => { ImageOnMouseDown(e); };
            Rendering.MouseMove += (s, e)=>  { ImageOnMouseMove(e); };

        }