コード例 #1
0
        public static void Execute()
        {
            // Translates this class to CUDA C and then compliles
            CudafyModule km = CudafyTranslator.Cudafy();

            // Get the first GPU and load the module
            GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);

            gpu.LoadModule(km);

            // Create some arrays on the host
            int[] a = new int[N];
            int[] b = new int[N];
            int[] c = new int[N];

            // allocate the memory on the GPU
            int[] dev_c = gpu.Allocate <int>(c);

            // fill the arrays 'a' and 'b' on the CPU
            for (int i = 0; i < N; i++)
            {
                a[i] = i;
                b[i] = 2 * i;
            }

            // copy the arrays 'a' and 'b' to the GPU
            int[] dev_a = gpu.CopyToDevice(a);
            int[] dev_b = gpu.CopyToDevice(b);

            // Launch 128 blocks of 128 threads each
            gpu.Launch(128, 128).add(dev_a, dev_b, dev_c);

            // copy the array 'c' back from the GPU to the CPU
            gpu.CopyFromDevice(dev_c, c);

            // verify that the GPU did the work we requested
            bool success = true;

            for (int i = 0; i < N; i++)
            {
                if ((a[i] + b[i]) != c[i])
                {
                    Console.WriteLine("{0} + {1} != {2}", a[i], b[i], c[i]);
                    success = false;
                    break;
                }
            }
            if (success)
            {
                Console.WriteLine("We did it!");
            }

            // free the memory allocated on the GPU
            gpu.FreeAll();
        }
コード例 #2
0
ファイル: ray_noconst.cs プロジェクト: rblenis/cudafy
        public static void Execute(byte[] bitmap)
        {
            CudafyModule km = CudafyModule.TryDeserialize();

            if (km == null || !km.TryVerifyChecksums())
            {
                km = CudafyTranslator.Cudafy(typeof(Sphere), typeof(ray_noconst));
                km.TrySerialize();
            }

            GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);

            gpu.LoadModule(km);

            // capture the start time
            gpu.StartTimer();

            // allocate memory on the GPU for the bitmap (same size as ptr)
            byte[] dev_bitmap = gpu.Allocate(bitmap);

            // allocate memory for the Sphere dataset
            Sphere[] s = gpu.Allocate <Sphere>(SPHERES);

            // allocate temp memory, initialize it, copy to constant memory on the GPU
            Sphere[] temp_s = new Sphere[SPHERES];
            for (int i = 0; i < SPHERES; i++)
            {
                temp_s[i].r = rnd(1.0f);
                temp_s[i].g = rnd(1.0f);
                temp_s[i].b = rnd(1.0f);

                temp_s[i].x      = rnd(1000.0f) - 500;
                temp_s[i].y      = rnd(1000.0f) - 500;
                temp_s[i].z      = rnd(1000.0f) - 500;
                temp_s[i].radius = rnd(100.0f) + 20;
            }
            gpu.CopyToDevice(temp_s, s);

            // generate a bitmap from our sphere data
            dim3 grids   = new dim3(ray_gui.DIM / 16, ray_gui.DIM / 16);
            dim3 threads = new dim3(16, 16);

            //gpu.Launch(grids, threads).kernel(s, dev_bitmap); // Dynamic
            gpu.Launch(grids, threads, ((Action <GThread, Sphere[], byte[]>)thekernel), s, dev_bitmap); // Strongly typed

            // copy our bitmap back from the GPU for display
            gpu.CopyFromDevice(dev_bitmap, bitmap);

            // get stop time, and display the timing results
            float elapsedTime = gpu.StopTimer();

            Console.WriteLine("Time to generate: {0} ms", elapsedTime);

            gpu.FreeAll();
        }
コード例 #3
0
ファイル: copy_timed.cs プロジェクト: rapiddev/CUDAfy.NET
        public void Execute()
        {
            float elapsedTime;
            float MB = (float)100 * SIZE * sizeof(int) / 1024 / 1024;

            _gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);
            var props = _gpu.GetDeviceProperties();

            Console.WriteLine(props.Name);
            Console.WriteLine("Using {0}optimized driver.", props.HighPerformanceDriver ? "" : "non-");

            // try it with malloc
            elapsedTime = cuda_malloc_test(SIZE, true);
            Console.WriteLine("Time using cudaMalloc: {0} ms",
                              elapsedTime);
            Console.WriteLine("\tMB/s during copy up: {0}",
                              MB / (elapsedTime / 1000));

            elapsedTime = cuda_malloc_test(SIZE, false);
            Console.WriteLine("Time using cudaMalloc: {0} ms",
                              elapsedTime);
            Console.WriteLine("\tMB/s during copy down: {0}",
                              MB / (elapsedTime / 1000));

            // now try it with cudaHostAlloc
            elapsedTime = cuda_host_alloc_test(SIZE, true);
            Console.WriteLine("Time using cudaHostAlloc: {0} ms",
                              elapsedTime);
            Console.WriteLine("\tMB/s during copy up: {0}",
                              MB / (elapsedTime / 1000));

            elapsedTime = cuda_host_alloc_test(SIZE, false);
            Console.WriteLine("Time using cudaHostAlloc: {0} ms",
                              elapsedTime);
            Console.WriteLine("\tMB/s during copy down: {0}",
                              MB / (elapsedTime / 1000));

            #region 15-06-2011 Not working on laptop, works fine on workstation

            //// now try it with cudaHostAlloc copy
            //elapsedTime = cuda_host_alloc_copy_test(SIZE, true);
            //Console.WriteLine("Time using cudaHostAlloc + async copy: {0} ms",
            //        elapsedTime);
            //Console.WriteLine("\tMB/s during copy up: {0}",
            //        MB / (elapsedTime / 1000));

            //elapsedTime = cuda_host_alloc_copy_test(SIZE, false);
            //Console.WriteLine("Time using cudaHostAlloc + async copy: {0} ms",
            //        elapsedTime);
            //Console.WriteLine("\tMB/s during copy down: {0}",
            //        MB / (elapsedTime / 1000));

            #endregion
        }
コード例 #4
0
        public double[] transpose(double[] inputArray)
        {
            GPGPU        gpu = CudafyHost.GetDevice(eGPUType.Cuda);
            CudafyModule km  = CudafyTranslator.Cudafy(eArchitecture.sm_35);

            gpu.LoadModule(km);
            dim3 grid = new dim3(1000);

            gpu.Launch();
            return(new double[1]);
        }
コード例 #5
0
        public static void RunTest()
        {
            CudafyModule km  = CudafyTranslator.Cudafy();
            GPGPU        gpu = CudafyHost.GetDevice(eGPUType.Cuda);

            gpu.LoadModule(km);


            gpu.Launch().thekernel(); // or gpu.Launch(1, 1, "kernel");
            Console.WriteLine("Sample kernel started successfully!");
        }
コード例 #6
0
        public void SetUp()
        {
            //var x = CompilerHelper.Create(ePlatform.x64, eArchitecture.OpenCL, eCudafyCompileMode.Default);
            var y = CompilerHelper.Create(ePlatform.x64, CudafyModes.Architecture, eCudafyCompileMode.DynamicParallelism);

            _cm = CudafyTranslator.Cudafy(new CompileProperties[] { y }, this.GetType());
            Console.WriteLine(_cm.CompilerOutput);
            _cm.Serialize();
            _gpu = CudafyHost.GetDevice(y.Architecture, CudafyModes.DeviceId);
            _gpu.LoadModule(_cm);
        }
コード例 #7
0
ファイル: GPGPUTests.cs プロジェクト: uzbekdev1/CUDAfy.NET-1
        public void Test_CreateEmulatedGPU()
        {
            if (CudafyModes.Target != eGPUType.Emulator)
            {
                Console.WriteLine("Only tests Emulator devices, so skip.");
                return;
            }
            GPGPU gpu = CudafyHost.GetDevice(eGPUType.Emulator);

            Assert.IsTrue(gpu is EmulatedGPU);
            gpu = null;
        }
コード例 #8
0
 public void SetUp()
 {
     CudafyTranslator.GenerateDebug = true;
     _cm  = CudafyModule.TryDeserialize();
     _gpu = CudafyHost.GetDevice(CudafyModes.Architecture, CudafyModes.DeviceId);
     if (_cm == null || !_cm.TryVerifyChecksums())
     {
         _cm = CudafyTranslator.Cudafy(_gpu.GetArchitecture(), this.GetType(), (_gpu is OpenCLDevice) ? null  : typeof(StringConstClass));
         _cm.TrySerialize();
     }
     _gpu.LoadModule(_cm);
 }
コード例 #9
0
        public void Initialize(int bytes)
        {
            CudafyModule km = CudafyTranslator.Cudafy();

            _gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);
            _gpu.LoadModule(km);

            _dev_bitmap = _gpu.Allocate <byte>(bytes);

            _blocks  = new dim3(DIM / 16, DIM / 16);
            _threads = new dim3(16, 16);
        }
コード例 #10
0
ファイル: GpuCore.cs プロジェクト: itfenom/Tucan
        public void Initialize(int DeviceId, String Directory)
        {
            CudafyModes.Target                = eGPUType.OpenCL;
            CudafyTranslator.Language         = eLanguage.OpenCL;
            CudafyTranslator.WorkingDirectory = Directory;
            CudafyTranslator.DeleteTempFiles  = false;
            CudafyModule Module = CudafyTranslator.Cudafy();

            Gpu = CudafyHost.GetDevice(eGPUType.OpenCL, DeviceId);
            Gpu.LoadModule(Module);
            Initialized = true;
        }
コード例 #11
0
ファイル: dot.cs プロジェクト: rblenis/cudafy
        public static void Execute()
        {
            CudafyModule km = CudafyTranslator.Cudafy();

            GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);

            gpu.LoadModule(km);

            float c;

            // allocate memory on the cpu side
            float[] a         = new float[N];
            float[] b         = new float[N];
            float[] partial_c = new float[blocksPerGrid];

            // allocate the memory on the GPU
            float[] dev_a         = gpu.Allocate <float>(N);
            float[] dev_b         = gpu.Allocate <float>(N);
            float[] dev_partial_c = gpu.Allocate <float>(blocksPerGrid);

            float[] dev_test = gpu.Allocate <float>(blocksPerGrid * blocksPerGrid);

            // fill in the host memory with data
            for (int i = 0; i < N; i++)
            {
                a[i] = i;
                b[i] = i * 2;
            }

            // copy the arrays 'a' and 'b' to the GPU
            gpu.CopyToDevice(a, dev_a);
            gpu.CopyToDevice(b, dev_b);

            gpu.Launch(blocksPerGrid, threadsPerBlock).Dot(dev_a, dev_b, dev_partial_c);

            // copy the array 'c' back from the GPU to the CPU
            gpu.CopyFromDevice(dev_partial_c, partial_c);

            // finish up on the CPU side
            c = 0;
            for (int i = 0; i < blocksPerGrid; i++)
            {
                c += partial_c[i];
            }

            Console.WriteLine("Does GPU value {0} = {1}?\n", c, 2 * sum_squares((float)(N - 1)));

            // free memory on the gpu side
            gpu.FreeAll();

            // free memory on the cpu side
            // No worries...
        }
コード例 #12
0
        static void Main(string[] args)
        {
            try
            {
                CudafyModes.DeviceId     = 0;
                CudafyModes.Architecture = CudafyHost.GetDevice(eGPUType.Cuda, CudafyModes.DeviceId).GetArchitecture();  //eArchitecture.sm_35; // *** Change this to the architecture of your target board ***
                CudafyModes.Target       = CompilerHelper.GetGPUType(CudafyModes.Architecture);

                if (CudafyModes.Target != eGPUType.OpenCL)
                {
                    CURANDTests.Basics();
                }

                StringTests st = new StringTests();
                CudafyUnitTest.PerformAllTests(st);

                BasicFunctionTests bft = new BasicFunctionTests();
                CudafyUnitTest.PerformAllTests(bft);

                GMathUnitTests gmu = new GMathUnitTests();
                CudafyUnitTest.PerformAllTests(gmu);

                MultithreadedTests mtt = new MultithreadedTests();
                CudafyUnitTest.PerformAllTests(mtt);

                CopyTests1D ct1d = new CopyTests1D();
                CudafyUnitTest.PerformAllTests(ct1d);

                GPGPUTests gput = new GPGPUTests();
                CudafyUnitTest.PerformAllTests(gput);

                if (CudafyHost.GetDeviceCount(CudafyModes.Target) > 1)
                {
                    MultiGPUTests mgt = new MultiGPUTests();
                    CudafyUnitTest.PerformAllTests(mgt);
                }

                if (CudafyModes.Architecture == eArchitecture.sm_35)
                {
                    Compute35Features c35f = new Compute35Features();
                    CudafyUnitTest.PerformAllTests(c35f);
                }

                Console.WriteLine("Done");
                Console.ReadLine();
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.ToString());
                Console.ReadLine();
            }
        }
        public static void Execute()
        {
            CudafyModes.Target        = eGPUType.Cuda;
            CudafyModes.DeviceId      = ChosenDeviceId; // If not set, the value is 0 - so default, good one
            CudafyTranslator.Language = CudafyModes.Target == eGPUType.OpenCL ? eLanguage.OpenCL : eLanguage.Cuda;

            var gpu  = CudafyHost.GetDevice(CudafyModes.Target);
            var arch = gpu.GetArchitecture();
            var km   = CudafyTranslator.Cudafy(arch);

            gpu.LoadModule(km);

            MaximumDimensionSize = GetMaxThreadsPerBlock();

            // Save vanilla state of matrix
            DataHandler.SaveMatrix(Matrix, string.Format(AppConfigHelper.GetValueFromAppSettings(@"CellMatrixOutputLocation"), 0));
            DataHandler.PrepareVisualisation(string.Format(AppConfigHelper.GetValueFromAppSettings(@"CellMatrixOutputLocation"), 0));

            var a = GetGridSize();
            var b = GetBlockSize();

            Console.WriteLine("Grid size - {0},{1},{2} - Block size {3},{4},{5}",
                              a.x, a.y, a.z, b.x, b.y, b.z);

            for (var i = 0; i < Generations; i++)
            {
                var rulesArray = new[] { LonelinessDeathNumber, OvercrowingDeathNumber, RevivalNumber, MaximumDimensionSize };
                var rules      = gpu.CopyToDevice(rulesArray);

                // copy the matrix to the GPU
                var deviceMatrix = gpu.Allocate <bool>(Matrix);
                gpu.CopyToDevice(Matrix, deviceMatrix);

                gpu.Launch(GetGridSize(), GetBlockSize(), @"Simulation", deviceMatrix, rules);

                // copy the array 'c' back from the GPU to the CPU
                gpu.CopyFromDevice(deviceMatrix, Matrix);

                // verify that the GPU did the work we requested

                // free the memory allocated on the GPU
                gpu.Free(deviceMatrix);
                gpu.Free(rules);

                //Save on disk
                DataHandler.SaveMatrix(Matrix, string.Format(AppConfigHelper.GetValueFromAppSettings(@"CellMatrixOutputLocation"), i + 1));
                DataHandler.PrepareVisualisation(string.Format(AppConfigHelper.GetValueFromAppSettings(@"CellMatrixOutputLocation"), i + 1));

                // free the memory we allocated on the CPU
                // Not necessary, this is .NET
            }
        }
コード例 #14
0
        public static void Execute()
        {
            bool previousValue = CudafyTranslator.AllowClasses;

            CudafyTranslator.AllowClasses = true;
            CudafyModule km  = CudafyTranslator.Cudafy(new Type[] { typeof(BaseClass), typeof(MemberClass), typeof(DerivedClass), typeof(ArrayView), typeof(CudafyClassExamples) });
            GPGPU        gpu = CudafyHost.GetDevice(CudafyModes.Target, 0);

            gpu.LoadModule(km);
            Example1(gpu);
            Example2(gpu);
            CudafyTranslator.AllowClasses = previousValue;
        }
コード例 #15
0
        private LevenshteinGPU()
        {
            CudafyModule km = null;

            try {
                km = CudafyModule.Deserialize(typeof(LevenshteinGPU).Name);
            }
            catch {
                km = CudafyTranslator.Cudafy(eArchitecture.sm_50);
            }
            _gpu = CudafyHost.GetDevice(CudafyModes.Target);
            _gpu.LoadModule(km);
        }
コード例 #16
0
ファイル: BasicFunctionTests.cs プロジェクト: rblenis/cudafy
        public void SetUp()
        {
            _cm = CudafyModule.TryDeserialize();
            if (_cm == null || !_cm.TryVerifyChecksums())
            {
                _cm = CudafyTranslator.Cudafy(CudafyModes.Architecture);//typeof(PrimitiveStruct), typeof(BasicFunctionTests));
                Console.WriteLine(_cm.CompilerOutput);
                _cm.TrySerialize();
            }

            _gpu = CudafyHost.GetDevice(CudafyModes.Architecture, CudafyModes.DeviceId);
            _gpu.LoadModule(_cm);
            //_gpu.CopyToConstantMemory(new int[constant_data.Length], constant_data);
        }
コード例 #17
0
        internal override Answer GetAnswer()
        {
            var stopWatchLoad = Stopwatch.StartNew();

            using (var gpu = CudafyHost.GetDevice()) {
                var arch = gpu.GetDeviceProperties().Capability.GetArchitecture();
                gpu.LoadModule(CudafyTranslator.Cudafy(ePlatform.x64, arch));
                LoadTime = stopWatchLoad.ElapsedMilliseconds;

                var  stopWatchRun = Stopwatch.StartNew();
                var  gpuLatLong   = gpu.CopyToDevice(_latLong.ToArray());
                var  divisors     = new long[_cities];
                long divisor      = _permutations;
                for (int city = _cities; city > 0; /* decrement in loop body */)
                {
                    divisor /= city;
                    city--;
                    divisors[city] = divisor;
                }
                gpu.CopyToConstantMemory(divisors, gpuDivisors);

                var answer    = new AnswerStruct[_blocksPerGrid];;
                var gpuAnswer = gpu.Allocate(answer);

                gpu.SafeLaunch(_blocksPerGrid, _threadsPerBlock,
                               GpuFindPathDistance, _permutations, gpuLatLong, gpuAnswer);

                gpu.Synchronize();
                gpu.CopyFromDevice(gpuAnswer, answer);
                gpu.FreeAll();

                var bestDistance    = float.MaxValue;
                var bestPermutation = 0L;
                for (var i = 0; i < _blocksPerGrid; i++)
                {
                    if (answer[i].distance < bestDistance)
                    {
                        bestDistance    = answer[i].distance;
                        bestPermutation = answer[i].pathNo;
                    }
                }

                return(new Answer {
                    Distance = bestDistance,
                    Permutation = bestPermutation,
                    msLoadTime = LoadTime,
                    msRunTime = stopWatchRun.ElapsedMilliseconds
                });
            }
        }
コード例 #18
0
ファイル: Ballot.cs プロジェクト: uzbekdev1/CUDAfy.NET
        public static void Execute()
        {
            CudafyModule km  = CudafyTranslator.Cudafy(Program.testArchitecture);
            GPGPU        gpu = CudafyHost.GetDevice(CudafyModes.Target, 0);

            gpu.LoadModule(km);

            const int warps          = 4;
            const int count          = warps * 32;
            var       random         = new Random();
            var       input          = new int[count];
            var       output         = new int[count / 32];
            var       expectedOutput = new int[count / 32];

            for (var i = 0; i < warps; i++)
            {
                expectedOutput[i] = 0;
            }

            for (var i = 0; i < count; i++)
            {
                input[i] = random.Next(2);
            }

            for (var i = 0; i < count; i++)
            {
                expectedOutput[i / 32] += input[i] << (i % 32);
            }


            var devInput  = gpu.Allocate <int>(count);
            var devOutput = gpu.Allocate <int>(warps);

            gpu.CopyToDevice(input, devInput);

            gpu.Launch(1, count, "BallotKernel", devInput, devOutput);

            // copy the array 'c' back from the GPU to the CPU
            gpu.CopyFromDevice(devOutput, output);

            gpu.Free(devInput);
            gpu.Free(devOutput);

            for (var i = 0; i < warps; i++)
            {
                Console.WriteLine("Warp {0} Ballot: {1}", i, output[i]);
                Console.WriteLine("Expected: {0} \t{1}", expectedOutput[i], expectedOutput[i] == output[i] ? "PASSED" : "FAILED");
            }
        }
コード例 #19
0
ファイル: GPGPUTests.cs プロジェクト: uzbekdev1/CUDAfy.NET-1
        public void Test_CreateCudaGPU()
        {
            if (CudafyModes.Target != eGPUType.Cuda)
            {
                Console.WriteLine("Only tests CUDA devices, so skip.");
                return;
            }
            int cnt = CudafyHost.GetDeviceCount(eGPUType.Cuda);

            if (cnt > 0)
            {
                GPGPU gpu = CudafyHost.GetDevice(eGPUType.Cuda, 0);
                Assert.IsTrue(gpu is CudaGPU);
                gpu = null;
            }
        }
コード例 #20
0
ファイル: julia_gpu.cs プロジェクト: rblenis/cudafy
        public static void Execute(byte[] ptr)
        {
            CudafyModule km = CudafyTranslator.Cudafy();

            GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);

            gpu.LoadModule(km);

            byte[] dev_bitmap = gpu.Allocate <byte>(ptr.Length);

            gpu.Launch(new dim3(DIM, DIM), 1).thekernel(dev_bitmap);

            gpu.CopyFromDevice(dev_bitmap, ptr);

            gpu.FreeAll();
        }
コード例 #21
0
ファイル: GPGPUTests.cs プロジェクト: uzbekdev1/CUDAfy.NET-1
        public void Test_CreateOpenCLDevice()
        {
            if (CudafyModes.Target != eGPUType.OpenCL)
            {
                Console.WriteLine("Only tests OpenCL devices, so skip.");
                return;
            }
            int cnt = CudafyHost.GetDeviceCount(eGPUType.OpenCL);

            if (cnt > 0)
            {
                GPGPU gpu = CudafyHost.GetDevice(eGPUType.OpenCL, 0);
                Assert.IsTrue(gpu is OpenCLDevice);
                gpu = null;
            }
        }
コード例 #22
0
        /// <summary>
        ///     Приведение матрицы к "каноническому" виду, методом Гаусса-Жордана,
        ///     то есть к матрице, получаемой в результате эквивалентных преобразований
        ///     над строками, и у которой выполнено следующее - если i - индекс первого ненулевого значения в строке, то во всех
        ///     остальных строках матрицы по индексу i содержится только ноль.
        ///     Очевидно, что если индекса первого нулевого значения нет (-1), то вся строка нулевая.
        ///     Приведение матрицы к каноническому виду используется при решении систем линейных уравнений и при поиске
        ///     фундаментальной системы решений системы линейных уравнений.
        ///     В данной реализации используется матрица на полем GF(2), то есть булева матрица.
        /// </summary>
        /// <param name="function"></param>
        public static void ExecuteGaussJordan()
        {
            CudafyModule km = CudafyTranslator.Cudafy();

            GPGPU gpu = CudafyHost.GetDevice();

            gpu.LoadModule(km);

            int[,] devA = gpu.Allocate(_a);
            int[,] devB = gpu.Allocate(_b);
            int[] devC = gpu.Allocate(_c);
            int[] devD = gpu.Allocate(_d);
            int[] devE = gpu.Allocate(E);

            gpu.CopyToDevice(_a, devA);

            int rows    = _a.GetLength(0);
            int columns = _a.GetLength(1);

            dim3 gridSize  = Math.Min(15, (int)Math.Pow(rows * columns, 0.33333333333));
            dim3 blockSize = Math.Min(15, (int)Math.Pow(rows * columns, 0.33333333333));

            gpu.Launch(gridSize, blockSize, "RepeatZero", devA, devB, devC, devD, devE);
            for (int i = 0; i < Math.Min(rows, columns); i++)
            {
                gpu.Launch(gridSize, blockSize, "IndexOfNonZero", devA, devB, devC, devD, devE);
                gpu.CopyFromDevice(devC, _c);
                while (i < Math.Min(rows, columns) && _c[i] == -1)
                {
                    i++;
                }
                if (i >= Math.Min(rows, columns))
                {
                    break;
                }
                int j = _c[i];
                gpu.Launch(gridSize, blockSize, "BooleanGaussJordan", devA, devB, i, j);
                int[,] t = devA;
                devA     = devB;
                devB     = t;
            }

            gpu.CopyFromDevice(devA, _a);
            // free the memory allocated on the GPU
            gpu.FreeAll();
        }
コード例 #23
0
        public virtual void SetUp()
        {
            _gpu = CudafyHost.GetDevice(CudafyModes.Architecture, CudafyModes.DeviceId);
            var types = new List <Type>();

            types.Add(this.GetType());
            types.Add(typeof(MathSingleTest));
            SupportsDouble = _gpu.GetDeviceProperties().SupportsDoublePrecision;
            if (SupportsDouble)
            {
                types.Add(typeof(MathDoubleTest));
            }

            _cm = CudafyTranslator.Cudafy(CudafyModes.Architecture, types.ToArray());
            Debug.WriteLine(_cm.SourceCode);
            _gpu.LoadModule(_cm);
        }
コード例 #24
0
        public void ExeTestKernel()
        {
            GPGPU         gpu  = CudafyHost.GetDevice(CudafyModes.Target, 0);
            eArchitecture arch = gpu.GetArchitecture();
            CudafyModule  km   = CudafyTranslator.Cudafy(arch);

            gpu.LoadModule(km);

            int[] host_results = new int[N];

            // Either assign a new block of memory to hold results on device
            var dev_results = gpu.Allocate <int>(N);

            gpu.Set <int>(dev_results);

            // Or fill your array with values first and then
            for (int i = 0; i < N; i++)
            {
                host_results[i] = i * 3;
            }

            // Copy array with ints to device
            //var dev_filled_results = gpu.CopyToDevice(host_results);

            // 64*16 = 1024 threads per block (which is max for sm_30)
            dim3 threadsPerBlock = new dim3(64, 16);

            // 8*8 = 64 blocks per grid, 1024 threads per block = kernel launched 65536 times
            dim3 blocksPerGrid = new dim3(8, 8);

            //var threadsPerBlock = 1024; // this will only give you blockDim.x = 1024, .y = 0, .z = 0
            //var blocksPerGrid = 1;      // just for show

            gpu.Launch(blocksPerGrid, threadsPerBlock, "GenerateRipples", dev_results);

            gpu.CopyFromDevice(dev_results, host_results);

            // Test our results
            for (int index = 0; index < N; index++)
            {
                if (host_results[index] != index)
                {
                    throw new Exception("Check your indexing math, genius!!!");
                }
            }
        }
コード例 #25
0
ファイル: Lab.cs プロジェクト: h0lger/GPU_Lab
        public static void InitGPU()
        {
            //CudafyModes.Target = eGPUType.Cuda;
            //CudafyModes.DeviceId = 0;
            //CudafyTranslator.Language = CudafyModes.Target == eGPUType.OpenCL ? eLanguage.OpenCL : eLanguage.Cuda;

            //int deviceCount = CudafyHost.GetDeviceCount(CudafyModes.Target);
            //if (deviceCount == 0)
            //  throw new InvalidOperationException(string.Format("No suitable {0} devices found.", CudafyModes.Target));
            CudafyTranslator.GenerateDebug = true;

            CudafyModule _km = CudafyTranslator.Cudafy(typeof(Lab));

            _gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);
            _gpu.LoadModule(_km);
            Console.WriteLine("Running examples using {0}", _gpu.GetDeviceProperties(false).Name);
        }
コード例 #26
0
ファイル: Tuning.cs プロジェクト: uzbekdev1/CUDAfy.NET-1
        public static void Execute()
        {
            Console.WriteLine("Compiling ...");
            RunTest(GetThreadInfo(), GetAnswer());
            ThreadsPerBlock /= 2;
            RunTest(GetThreadInfo(), GetAnswer());
            ThreadsPerBlock /= 2;
            RunTest(GetThreadInfo(), GetAnswer());
            BlocksPerGrid /= 2;
            RunTest(GetThreadInfo(), GetAnswer());

            Console.WriteLine("Done ... Press Enter to shutdown.");
            try { Console.Read(); }
            catch (InvalidOperationException) {; }
            CudafyHost.GetDevice().FreeAll();
            CudafyHost.GetDevice().HostFreeAll();
        }
コード例 #27
0
        public static void Execute()
        {
            CudafyModule km = CudafyTranslator.Cudafy();

            GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);

            gpu.LoadModule(km);

            int[] a = new int[N];
            int[] b = new int[N];
            int[] c = new int[N];

            // allocate the memory on the GPU
            int[] dev_a = gpu.Allocate <int>(a);
            int[] dev_b = gpu.Allocate <int>(b);
            int[] dev_c = gpu.Allocate <int>(c);

            // fill the arrays 'a' and 'b' on the CPU
            for (int i = 0; i < N; i++)
            {
                a[i] = -i;
                b[i] = i * i;
            }

            // copy the arrays 'a' and 'b' to the GPU
            gpu.CopyToDevice(a, dev_a);
            gpu.CopyToDevice(b, dev_b);

            // launch add on N threads (really blocks)
            gpu.Launch(N, 1).adder(dev_a, dev_b, dev_c);

            // copy the array 'c' back from the GPU to the CPU
            gpu.CopyFromDevice(dev_c, c);

            // display the results
            for (int i = 0; i < N; i++)
            {
                Console.WriteLine("{0} + {1} = {2}", a[i], b[i], c[i]);
            }

            // free the memory allocated on the GPU
            gpu.Free(dev_a);
            gpu.Free(dev_b);
            gpu.Free(dev_c);
        }
コード例 #28
0
        public void Test_TwoThreadTwoGPUVer2()
        {
            eArchitecture arch = CudafyModes.Target == eGPUType.OpenCL ? eArchitecture.OpenCL : eArchitecture.sm_11;

            _gpu0 = CudafyHost.GetDevice(CudafyModes.Target, 0);
            var cm = CudafyTranslator.Cudafy(arch, typeof(MultiGPUTests));

            _gpu0.SetCurrentContext();
            _gpu0.LoadModule(cm);
            _gpuuintBufferIn0 = _gpu0.Allocate(_uintBufferIn0);

            _gpu1 = CudafyHost.GetDevice(CudafyModes.Target, 1);
            // Cannot load same module to two devices, therefore need to clone.
            var cm1 = cm.Clone();

            _gpu1.SetCurrentContext();
            _gpu1.LoadModule(cm1);
            _gpuuintBufferIn1 = _gpu1.Allocate(_uintBufferIn1);

            _gpu0.EnableMultithreading();
            _gpu1.EnableMultithreading();
            bool j1 = false;
            bool j2 = false;

            for (int i = 0; i < 10; i++)
            {
                Console.WriteLine(i);
                Thread t1 = new Thread(Test_TwoThreadTwoGPU_Thread0V2);
                Thread t2 = new Thread(Test_TwoThreadTwoGPU_Thread1V2);
                t1.Start();
                t2.Start();
                j1 = t1.Join(10000);
                j2 = t2.Join(10000);
                if (!j1 || !j2)
                {
                    break;
                }
            }
            _gpu0.DisableMultithreading();
            _gpu0.FreeAll();
            _gpu1.DisableMultithreading();
            _gpu1.FreeAll();
            Assert.IsTrue(j1);
            Assert.IsTrue(j2);
        }
コード例 #29
0
        public static void MyExecute(byte[] ptr, int dimX, int dimY)
        {
            CudafyModule km = CudafyTranslator.Cudafy();

            GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);

            gpu.LoadModule(km);

            byte[] allocated_dev_bitmap = gpu.Allocate <byte>(ptr.Length);

            byte[] copied_dev_bitmap = gpu.CopyToDevice(ptr);

            gpu.Launch(new dim3(dimX, dimY), 1).mykernel(allocated_dev_bitmap, copied_dev_bitmap);

            gpu.CopyFromDevice(allocated_dev_bitmap, ptr);

            gpu.FreeAll();
        }
コード例 #30
0
ファイル: SIMDFunctions.cs プロジェクト: uzbekdev1/CUDAfy.NET
        public static void Execute()
        {
            _gpu = CudafyHost.GetDevice(eGPUType.Cuda);

            CudafyModule km = CudafyTranslator.Cudafy(ePlatform.Auto, Program.testArchitecture, typeof(SIMDFunctions));

            //CudafyModule km = CudafyTranslator.Cudafy(ePlatform.Auto, eArchitecture.sm_12, typeof(SIMDFunctions));
            _gpu.LoadModule(km);
            int w = 1024;
            int h = 1024;

            for (int loop = 0; loop < 3; loop++)
            {
                uint[] a = new uint[w * h];
                Fill(a);
                uint[] dev_a = _gpu.CopyToDevice(a);
                uint[] b     = new uint[w * h];
                Fill(b);
                uint[] dev_b = _gpu.CopyToDevice(b);
                uint[] c     = new uint[w * h];
                uint[] dev_c = _gpu.Allocate(c);
                _gpu.StartTimer();
                _gpu.Launch(h, w, "SIMDFunctionTest", dev_a, dev_b, dev_c);
                _gpu.CopyFromDevice(dev_c, c);
                float time = _gpu.StopTimer();
                Console.WriteLine("Time: {0}", time);
                if (loop == 0)
                {
                    bool    passed = true;
                    GThread thread = new GThread(1, 1, null);
                    for (int i = 0; i < w * h; i++)
                    {
                        uint exp = thread.vadd2(a[i], b[i]);
                        if (exp != c[i])
                        {
                            passed = false;
                        }
                    }
                    Console.WriteLine("Test {0}", passed ? "passed. " : "failed!");
                }
                _gpu.FreeAll();
            }
        }