Ejemplo n.º 1
0
        public static void MyFirstBlasEmulatorTest()
        {
            Console.WriteLine("MyTest()");
            // Get GPU device
            CudafyModes.Target = eGPUType.Emulator;
            GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target);

            // Create GPGPUBLAS (CUBLAS Wrapper)
            using (GPGPUBLAS blas = GPGPUBLAS.Create(gpu))
            {
                const int N     = 100;
                float[]   a     = new float[N];
                float[]   b     = new float[N];
                float[]   c     = new float[N];
                float     alpha = -1;
                float     beta  = 0;

                float[] device_a = gpu.CopyToDevice(a);
                float[] device_b = gpu.CopyToDevice(b);
                float[] device_c = gpu.CopyToDevice(c);

                int             m  = 10;
                int             n  = 10;
                int             k  = 10;
                cublasOperation Op = cublasOperation.N;
                blas.GEMM(m, k, n, alpha, device_a, device_b, beta, device_c, Op);

                gpu.CopyFromDevice <float>(device_c, c);
            }
        }
Ejemplo n.º 2
0
        public static float[] CallGPU()
        {
            CudafyModes.Target        = eGPUType.OpenCL;
            CudafyModes.DeviceId      = 0;
            CudafyTranslator.Language = eLanguage.OpenCL;
            CudafyModule km  = CudafyTranslator.Cudafy(ePlatform.Auto, eArchitecture.OpenCL, typeof(GPU));
            GPGPU        gpu = CudafyHost.GetDevice(eGPUType.OpenCL, 0);

            gpu.LoadModule(km);
            km.Serialize();

            float[] input = Utils.GenerateRandomVector();
            float[,,] NN = Utils.GenerateRandomMatrix().AsSingleDimension();
            float[] output = new float[Utils.N];

            Stopwatch gpuSW = new Stopwatch();

            gpuSW.Start();
            float[] dev_output = gpu.Allocate <float>(output);
            float[] dev_input  = gpu.CopyToDevice(input);
            float[,,] dev_NN = gpu.CopyToDevice(NN);
            gpu.Launch(Utils.GRID_SIZE, Utils.BLOCK_SIZE).CalculateNeuralNetwork(dev_input, dev_NN, dev_output);
            gpu.CopyFromDevice(dev_output, output);
            gpu.FreeAll();
            gpuSW.Stop();
            Console.WriteLine("GPU: " + gpuSW.ElapsedMilliseconds);
            return(output);
        }
Ejemplo n.º 3
0
        //
        // http://stackoverflow.com/questions/18628447/cudafy-throws-an-exception-while-testing
        //
        private static void BlasSample(int deviceId)
        {
            CudafyModes.Target = eGPUType.Emulator;
            GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, deviceId);

            CudafyModes.DeviceId = deviceId;
            eArchitecture arch = gpu.GetArchitecture();
            CudafyModule  km   = CudafyTranslator.Cudafy(arch);

            gpu.LoadModule(km);

            GPGPUBLAS blas = GPGPUBLAS.Create(gpu);

            const int N = 100;

            float[] a     = new float[N];
            float[] b     = new float[N];
            float[] c     = new float[N];
            float   alpha = -1;
            float   beta  = 0;

            float[] device_a = gpu.CopyToDevice(a);
            float[] device_b = gpu.CopyToDevice(b);
            float[] device_c = gpu.CopyToDevice(c);

            int             m  = 10;
            int             n  = 10;
            int             k  = 10;
            cublasOperation Op = cublasOperation.N;

            blas.GEMM(m, k, n, alpha, device_a, device_b, beta, device_c, Op);

            throw new NotImplementedException();
        }
Ejemplo n.º 4
0
        public static void Main()
        {
            Console.WriteLine("CUDAfy Example\nCollecting necessary resources...");

            CudafyModes.Target        = eGPUType.Cuda; // To use OpenCL, change this enum
            CudafyModes.DeviceId      = 0;
            CudafyTranslator.Language = CudafyModes.Target == eGPUType.OpenCL ? eLanguage.OpenCL : eLanguage.Cuda;

            //Check for available devices
            if (CudafyHost.GetDeviceCount(CudafyModes.Target) == 0)
            {
                throw new System.ArgumentException("No suitable devices found.", "original");
            }

            //Init device
            var gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);

            Console.WriteLine("Running example using {0}", gpu.GetDeviceProperties(false).Name);

            //Load module for GPU
            var km = CudafyTranslator.Cudafy();

            gpu.LoadModule(km);

            //Define local arrays
            var a = new int[N];
            var b = new int[N];
            var c = new int[N];

            // allocate the memory on the GPU
            var dev_c = gpu.Allocate <int>(c);

            // fill the arrays 'a' and 'b' on the CPU
            for (var i = 0; i < N; i++)
            {
                a[i] = i;
                b[i] = i * i;
            }

            // copy the arrays 'a' and 'b' to the GPU
            var dev_a = gpu.CopyToDevice(a);
            var dev_b = gpu.CopyToDevice(b);

            gpu.Launch(1, N).add(dev_a, dev_b, dev_c);

            // copy the array 'c' back from the GPU to the CPU
            gpu.CopyFromDevice(dev_c, c);

            // display the results
            for (var i = 0; i < N; i++)
            {
                Console.WriteLine("{0} + {1} = {2}", a[i], b[i], c[i]);
            }

            // free the memory allocated on the GPU
            gpu.FreeAll();

            Console.WriteLine("Done!");
            Console.ReadKey();
        }
Ejemplo n.º 5
0
        public void SetUp()
        {
            //CudafyModes.Architecture = eArchitecture.sm_30;
            _gpu = CudafyHost.GetDevice(eArchitecture.sm_30, CudafyModes.DeviceId);
            Assert.IsFalse(_gpu is OpenCLDevice, "OpenCL devices are not supported.");

            _cm = CudafyModule.TryDeserialize();
            if (_cm == null || !_cm.TryVerifyChecksums())
            {
                _cm = CudafyTranslator.Cudafy(eArchitecture.sm_30);
                Console.WriteLine(_cm.CompilerOutput);
                _cm.TrySerialize();
            }

            _gpu.LoadModule(_cm);

            inputIntArray = new int[] { 0x17, 0x01, 0x7f, 0xd1, 0xfe, 0x23, 0x2c, 0xa0, 0x00, 0xcf, 0xaa, 0x7a, 0x35, 0xf4, 0x04, 0xbc,
                                        0xe9, 0x6d, 0xb2, 0x55, 0xb0, 0xc8, 0x10, 0x49, 0x76, 0x17, 0x92, 0xab, 0xf3, 0xf2, 0xab, 0xcb }; // arbitrary values
            d_inputIntArray  = _gpu.CopyToDevice(inputIntArray);
            d_outputIntArray = _gpu.Allocate <int>(WARP_SIZE);
            gpuIntResult     = new int[WARP_SIZE];
            cpuIntResult     = new int[WARP_SIZE];

            inputFloatArray = new float[] { 1.7f, -37.03f, 2147.6436f, -0.1f, 7.7f, 99.99f, -809.142f, -0.1115f,
                                            1.0f, 2.0f, 3.0f, 5.0f, 7.5f, 0.1001f, 11.119f, -9.0f,
                                            7749.9847f, -860249.118843f, 0.0f, -2727745.586215f, 12.0f, -11.0f, 77.77f, 22.0f,
                                            377.1112f, -377.1112f, 0.12345f, -0.12345f, 0.11111f, -0.11111f, 700000f, -14f }; // arbitrary values
            d_inputFloatArray  = _gpu.CopyToDevice(inputFloatArray);
            d_outputFloatArray = _gpu.Allocate <float>(WARP_SIZE);
            gpuFloatResult     = new float[WARP_SIZE];
            cpuFloatResult     = new float[WARP_SIZE];
        }
Ejemplo n.º 6
0
        public void SetUp()
        {
            _gpu = CudafyHost.GetDevice(CudafyModes.Architecture, CudafyModes.DeviceId);

            _byteBufferIn  = new byte[N];
            _byteBufferOut = new byte[N];

            _sbyteBufferIn  = new sbyte[N];
            _sbyteBufferOut = new sbyte[N];

            _ushortBufferIn  = new ushort[N];
            _ushortBufferOut = new ushort[N];

            _uintBufferIn  = new uint[N];
            _uintBufferOut = new uint[N];

            _ulongBufferIn  = new ulong[N];
            _ulongBufferOut = new ulong[N];

            _cplxDBufferIn  = new ComplexD[N];
            _cplxDBufferOut = new ComplexD[N];

            _cplxFBufferIn  = new ComplexF[N];
            _cplxFBufferOut = new ComplexF[N];

            SetInputs();
            ClearOutputsAndGPU();
        }
Ejemplo n.º 7
0
        public static void Execute()
        {
            CudafyModule km = CudafyModule.TryDeserialize();

            if (km == null || !km.TryVerifyChecksums())
            {
                km = CudafyTranslator.Cudafy(typeof(Generic <ushort, ushort>), typeof(SimpleGeneric));
                km.Serialize();
            }

            GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target);

            gpu.LoadModule(km);

            var input = new Generic <ushort, ushort>();

            input.A = 187;

            int[] devoutput = gpu.Allocate <int>(1);
            gpu.Launch(1, 1, "Kernel", input, devoutput);

            int output;

            gpu.CopyFromDevice(devoutput, out output);

            Console.WriteLine("Simple Generic: " + ((output == 1) ? "PASSED" : "FAILED"));
        }
Ejemplo n.º 8
0
        public void InitGPU()
        {
            // Work around for bug in Cudafy trying to find the path..
            var os64Bit = Environment.Is64BitOperatingSystem;

            if (os64Bit)
            {
                var dir = Environment.GetEnvironmentVariable("ProgramFiles");
                Environment.SetEnvironmentVariable("ProgramFiles", "C:\\Program Files\\");
                dir = Environment.GetEnvironmentVariable("ProgramFiles");
            }

            if (Gpu == null)
            {
                Gpu = CudafyHost.GetDevice(_gpuType, 0);
                //Blas = GPGPUBLAS.Create(Gpu);
                if (_gpuType == eGPUType.Cuda)
                {
                    Blas = new SharpBLAS(Gpu);
                    Rand = GPGPURAND.Create(Gpu, curandRngType.CURAND_RNG_PSEUDO_DEFAULT);
                    Rand.SetPseudoRandomGeneratorSeed((ulong)RandomHelpers.Next(9999));
                }

                CudafyTranslator.GenerateDebug = true;
                Debug.WriteLine("CUDA workdir = " + CudafyTranslator.WorkingDirectory);
                Console.WriteLine("Recompile module");
                CudafyTranslator.Language = eLanguage.Cuda;
                var km = CudafyTranslator.Cudafy(eArchitecture.sm_30);
                km = CudafyTranslator.Cudafy();
                km.Serialize();
                Gpu.LoadModule(km);
            }
        }
        /// <summary>
        ///     Вызов и исполнение одной элементарной функции по имени функции
        /// </summary>
        /// <param name="function"></param>
        public static void Execute(string function)
        {
            Debug.Assert(_indexes1.Last() == _sequencies1.Length);
            Debug.Assert(_indexes2.Last() == _sequencies2.Length);

            CudafyModule km = CudafyTranslator.Cudafy();

            GPGPU gpu = CudafyHost.GetDevice();

            gpu.LoadModule(km);

            // copy the arrays 'a' and 'b' to the GPU
            int[] devIndexes1    = gpu.CopyToDevice(_indexes1);
            int[] devIndexes2    = gpu.CopyToDevice(_indexes2);
            int[] devSequencies1 = gpu.CopyToDevice(_sequencies1);
            int[] devSequencies2 = gpu.CopyToDevice(_sequencies2);
            int[,] devMatrix = gpu.Allocate(_matrix);

            int rows    = _matrix.GetLength(0);
            int columns = _matrix.GetLength(1);

            dim3 gridSize  = Math.Min(15, (int)Math.Pow((double)rows * columns, 0.33333333333));
            dim3 blockSize = Math.Min(15, (int)Math.Pow((double)rows * columns, 0.33333333333));

            gpu.Launch(gridSize, blockSize, function,
                       devSequencies1, devIndexes1,
                       devSequencies2, devIndexes2,
                       devMatrix);

            // copy the array 'c' back from the GPU to the CPU
            gpu.CopyFromDevice(devMatrix, _matrix);

            // free the memory allocated on the GPU
            gpu.FreeAll();
        }
Ejemplo n.º 10
0
        /// <summary>
        ///     Вызов и исполнение функции проверки что массив отсортирован
        /// </summary>
        public static void ExecuteSorted(int direction = 1)
        {
            CudafyModule km = CudafyTranslator.Cudafy();

            GPGPU gpu = CudafyHost.GetDevice();

            gpu.LoadModule(km);


            int[] devA = gpu.Allocate(_a);
            int[] devB = gpu.Allocate(_b);
            int[] devC = gpu.Allocate(_c);
            int[] devD = gpu.Allocate(D);

            gpu.CopyToDevice(_a, devA);

            gpu.Launch(1, 1).Split(devA, devB, devC, _middle);
            gpu.Launch(_gridSize, _blockSize).Sorted(devA, devB, devC, devD, 0, direction);
            gpu.Launch(1, 1).Sorted(devA, devB, devC, devD, 1, direction);

            gpu.CopyFromDevice(devD, D);

            // free the memory allocated on the GPU
            gpu.FreeAll();
        }
Ejemplo n.º 11
0
        public void Test_TwoThreadTwoGPU()
        {
            _gpu0 = CudafyHost.CreateDevice(CudafyModes.Target, 0);
            _gpu1 = CudafyHost.CreateDevice(CudafyModes.Target, 1);
            _gpu0.EnableMultithreading();
            _gpu1.EnableMultithreading();
            bool j1 = false;
            bool j2 = false;

            for (int i = 0; i < 10; i++)
            {
                Console.WriteLine(i);
                Thread t1 = new Thread(Test_TwoThreadTwoGPU_Thread0);
                Thread t2 = new Thread(Test_TwoThreadTwoGPU_Thread1);
                t1.Start();
                t2.Start();
                j1 = t1.Join(10000);
                j2 = t2.Join(10000);
                if (!j1 || !j2)
                {
                    break;
                }
            }
            _gpu0.DisableMultithreading();
            _gpu0.FreeAll();
            _gpu1.DisableMultithreading();
            _gpu1.FreeAll();
            Assert.IsTrue(j1);
            Assert.IsTrue(j2);
        }
Ejemplo n.º 12
0
        public void SetUp()
        {
            _gpu = CudafyHost.CreateDevice(CudafyModes.Target);

            _hostInput      = new double[N * BATCH];
            _hostInputCplx  = new ComplexD[N * BATCH];
            _hostOutput     = new double[N * BATCH];
            _hostOutputCplx = new ComplexD[N * BATCH];
            _devInput       = _gpu.Allocate(_hostInput);
            _devInputCplx   = _gpu.Allocate(_hostInputCplx);
            _devInter       = _gpu.Allocate <double>(N * 2 * BATCH);
            _devInterCplx   = _gpu.Allocate <ComplexD>(N * BATCH);
            _devOutput      = _gpu.Allocate(_hostOutput);
            _devOutputCplx  = _gpu.Allocate(_hostOutputCplx);
            _fft            = GPGPUFFT.Create(_gpu);
            for (int b = 0; b < BATCH; b++)
            {
                for (int i = 0; i < N; i++)
                {
                    ComplexD cf = new ComplexD();
                    cf.x = (double)((10.0F * Math.Sin(100 * 2 * Math.PI * i / N * Math.PI / 180)));
                    cf.y = (double)((10.0F * Math.Sin(200 * 2 * Math.PI * i / N * Math.PI / 180)));
                    _hostInput[i + b * N]     = cf.x;
                    _hostInputCplx[i + b * N] = cf;
                }
            }
        }
Ejemplo n.º 13
0
 internal TensorOpGpu()
 {
     //CudafyTranslator.GenerateDebug = true;
     Module = CudafyTranslator.Cudafy();
     Gpu    = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);
     Gpu.LoadModule(Module);
 }
Ejemplo n.º 14
0
        internal static AnswerStruct GetAnswer()
        {
            using (var gpu = CudafyHost.GetDevice())
            {
                gpu.LoadModule(CudafyTranslator.Cudafy());

                var answer    = new AnswerStruct[BlocksPerGrid];;
                var gpuAnswer = gpu.Allocate(answer);

                gpu.Launch(BlocksPerGrid, ThreadsPerBlock,
                           GpuFindPathDistance, gpuAnswer);

                gpu.Synchronize();
                gpu.CopyFromDevice(gpuAnswer, answer);
                gpu.FreeAll();

                var bestDistance    = float.MaxValue;
                var bestPermutation = 0L;
                for (var i = 0; i < BlocksPerGrid; i++)
                {
                    if (answer[i].distance < bestDistance)
                    {
                        bestDistance    = answer[i].distance;
                        bestPermutation = answer[i].pathNo;
                    }
                }

                return(new AnswerStruct
                {
                    distance = bestDistance,
                    pathNo = bestPermutation
                });
            }
        }
Ejemplo n.º 15
0
        public static void cudaTranspose(ref MathNet.Numerics.LinearAlgebra.Double.DenseMatrix dm)
        {
            GPGPU gpu = CudafyHost.GetDevice(eGPUType.Cuda);

            GPGPUBLAS blas = GPGPUBLAS.Create(gpu);

            int cols = dm.ColumnCount, rows = dm.RowCount;
            int restRows = rows - cols;

            //double[] a = dm.Storage.ToColumnMajorArray();
            double[] a = dm.SubMatrix(0, cols, 0, cols).Storage.ToColumnMajorArray();
            double[] b = dm.SubMatrix(cols, restRows, 0, cols).Storage.ToColumnMajorArray();
            dm = null;

            double[] a_d = gpu.CopyToDevice <double>(a);
            a = null;
            double[] c_d = gpu.Allocate <double>(cols * cols);
            double[] x_d = gpu.CopyToDevice <double>(new double[] { 1 });
            blas.GEMV(cols, cols, 1, c_d, x_d, 0, x_d, Cudafy.Maths.BLAS.Types.cublasOperation.T);
            a = new double[cols * rows];
            gpu.CopyFromDevice <double>(c_d, 0, a, 0, cols * cols);
            gpu.FreeAll();
            a_d = gpu.CopyToDevice <double>(b);
            b   = null;
            c_d = gpu.Allocate <double>(restRows * cols);
            x_d = gpu.CopyToDevice <double>(new double[] { 1 });
            blas.GEMV(restRows, cols, 1, c_d, x_d, 0, x_d, Cudafy.Maths.BLAS.Types.cublasOperation.T);
            gpu.CopyFromDevice <double>(c_d, 0, a, cols * cols, restRows * cols);
            gpu.FreeAll();
            dm = new MathNet.Numerics.LinearAlgebra.Double.DenseMatrix(cols, rows, a);
        }
Ejemplo n.º 16
0
        public static void Execute()
        {
            int i = 0;

            foreach (GPGPUProperties prop in CudafyHost.GetDeviceProperties(CudafyModes.Target, false))
            {
                Console.WriteLine("   --- General Information for device {0} ---", i);
                Console.WriteLine("Name:  {0}", prop.Name);
                Console.WriteLine("Platform Name:  {0}", prop.PlatformName);
                Console.WriteLine("Device Id:  {0}", prop.DeviceId);
                Console.WriteLine("Compute capability:  {0}.{1}", prop.Capability.Major, prop.Capability.Minor);
                Console.WriteLine("Clock rate: {0}", prop.ClockRate);
                Console.WriteLine("Simulated: {0}", prop.IsSimulated);
                Console.WriteLine();

                Console.WriteLine("   --- Memory Information for device {0} ---", i);
                Console.WriteLine("Total global mem:  {0}", prop.TotalMemory);
                Console.WriteLine("Total constant Mem:  {0}", prop.TotalConstantMemory);
                Console.WriteLine("Max mem pitch:  {0}", prop.MemoryPitch);
                Console.WriteLine("Texture Alignment:  {0}", prop.TextureAlignment);
                Console.WriteLine();

                Console.WriteLine("   --- MP Information for device {0} ---", i);
                Console.WriteLine("Shared mem per mp: {0}", prop.SharedMemoryPerBlock);
                Console.WriteLine("Registers per mp:  {0}", prop.RegistersPerBlock);
                Console.WriteLine("Threads in warp:  {0}", prop.WarpSize);
                Console.WriteLine("Max threads per block:  {0}", prop.MaxThreadsPerBlock);
                Console.WriteLine("Max thread dimensions:  ({0}, {1}, {2})", prop.MaxThreadsSize.x, prop.MaxThreadsSize.y, prop.MaxThreadsSize.z);
                Console.WriteLine("Max grid dimensions:  ({0}, {1}, {2})", prop.MaxGridSize.x, prop.MaxGridSize.y, prop.MaxGridSize.z);

                Console.WriteLine();

                i++;
            }
        }
Ejemplo n.º 17
0
        static void Main(string[] args)
        {
            CudafyModule km = CudafyTranslator.Cudafy();

            GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);

            gpu.LoadModule(km);

            int numFrames = numberOfSeconds * framesPerSecond;

            InitializeParticles();

            File.WriteAllText("length.txt", numFrames.ToString());

            for (int i = 0; i < numFrames; i++)
            {
                DateTime frameStart = DateTime.Now;

                Simulate(gpu);
                Bitmap frame = Render(gpu, i);

                TimeSpan frameTime = DateTime.Now - frameStart;
                Console.WriteLine("Frame " + i + " complete. Time: " + frameTime.TotalMilliseconds + "ms");
            }
        }
Ejemplo n.º 18
0
        public static bool TestGpuDoublePrecision(int DeviceId)
        {
            if (DeviceId > CudafyHost.GetDeviceCount(eGPUType.OpenCL))
            {
                return(false);
            }

            try
            {
                CudafyModes.Target        = eGPUType.OpenCL;
                CudafyTranslator.Language = eLanguage.OpenCL;
                CudafyModule km  = CudafyTranslator.Cudafy();
                GPGPU        gpu = CudafyHost.GetDevice(eGPUType.OpenCL, DeviceId);
                gpu.LoadModule(km);

                double   c;
                double[] dev_c = gpu.Allocate <double>();
                gpu.Launch().add_double(2.5d, 7.5d, dev_c);
                gpu.CopyFromDevice(dev_c, out c);
                gpu.Free(dev_c);
                return(c == 10.0d);
            }
            catch
            { return(false); }
        }
Ejemplo n.º 19
0
 public void SetUp()
 {
     _gpu = CudafyHost.CreateDevice(CudafyModes.Target);
     Console.WriteLine(_gpu.GetDriverVersion());
     _fft            = GPGPUFFT.Create(_gpu);
     _hostInput      = new float[N * BATCH];
     _hostInputCplx  = new ComplexF[N * BATCH];
     _hostOutput     = new float[N * BATCH];
     _hostOutputCplx = new ComplexF[N * BATCH];
     _devInput       = _gpu.Allocate(_hostInput);
     _devInputCplx   = _gpu.Allocate(_hostInputCplx);
     _devInter       = _gpu.Allocate <float>(N * 2 * BATCH);
     _devInterCplx   = _gpu.Allocate <ComplexF>(N * BATCH);
     _devOutput      = _gpu.Allocate(_hostOutput);
     _devOutputCplx  = _gpu.Allocate(_hostOutputCplx);
     Console.WriteLine(_fft.GetVersion());
     for (int b = 0; b < BATCH; b++)
     {
         for (int i = 0; i < N; i++)
         {
             ComplexF cf = new ComplexF();
             cf.x = (float)((10.0F * Math.Sin(100 * 2 * Math.PI * i / N * Math.PI / 180)));
             cf.y = (float)((10.0F * Math.Sin(200 * 2 * Math.PI * i / N * Math.PI / 180)));
             _hostInput[i + b * N]     = cf.x;
             _hostInputCplx[i + b * N] = cf;
         }
     }
 }
Ejemplo n.º 20
0
        /// <summary>
        ///     Выполнение сортировки слияниями
        ///     Пример использования:
        ///     CudafySequencies.SetSequencies(arrayOfArray,arrayOfArray);
        ///     CudafySequencies.Execute("Compare");
        ///     var compare = CudafySequencies.GetMartix();
        ///     CudafyArray.SetArray(Enumerable.Range(0,n).ToArray());
        ///     CudafyArray.SetCompare(compare);
        ///     CudafyArray.MergeSort();
        ///     var indexesOfSorted = CudafyArray.GetArray();
        /// </summary>
        public static void MergeSort(int direction = 1)
        {
            CudafyModule km = CudafyTranslator.Cudafy();

            GPGPU gpu = CudafyHost.GetDevice();

            gpu.LoadModule(km);

            int[] devA = gpu.Allocate(_a);
            int[] devB = gpu.Allocate(_b);

            gpu.CopyToDevice(_a, devA);

            for (int i = 0; i < _ceiling; i++)
            {
                int gridSize  = Math.Min(15, (int)Math.Pow((_length >> i) + i, 0.333333333333));
                int blockSize = Math.Min(15, (int)Math.Pow((_length >> i) + i, 0.333333333333));
                gpu.Launch(gridSize, blockSize)
                .MergeLinear(((i & 1) == 0) ? devA : devB, ((i & 1) == 0) ? devB : devA, i, 0,
                             _length,
                             direction);
            }
            gpu.CopyFromDevice(((_ceiling & 1) == 0) ? devA : devB, _a);

            // free the memory allocated on the GPU
            gpu.FreeAll();
        }
Ejemplo n.º 21
0
        public static void primaGPU()
        {
            CudafyModule modul_kernel = CudafyTranslator.Cudafy();
            GPGPU        vga          = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);

            vga.LoadModule(modul_kernel);

            Stopwatch waktu = new Stopwatch();

            waktu.Start();
            int[] list_cpu = new int[KONSTANTA_THREAD];
            int[] list_cpy = new int[KONSTANTA_THREAD];
            int[] list     = vga.Allocate <int>(KONSTANTA_THREAD);
            vga.Launch(KONSTANTA_THREAD, 1).ModulAtomic(list);
            vga.CopyFromDevice(list, list_cpy);
            vga.FreeAll();

            int index = 0;

            for (int z = 0; z < list_cpy.Length; z++)
            {
                if (list_cpy[z] != -1)
                {
                    list_cpu[index] = list_cpy[z];
                    //Console.WriteLine(list_cpu[index]);
                    index++;
                }
            }
            waktu.Stop();
            TimeSpan ts    = waktu.Elapsed;
            String   total = ts.Seconds.ToString();

            Console.WriteLine("Total GPU ------ {0} detik> ", total);
        }
Ejemplo n.º 22
0
        public static void eksekusi()
        {
            CudafyModule kernel_modul = CudafyTranslator.Cudafy();
            GPGPU        vga          = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);

            vga.LoadModule(kernel_modul);
            Stopwatch waktu = new Stopwatch();

            waktu.Start();
            int[] array_vga   = vga.Allocate <int>(KONSTANTA_THREAD);
            int[] array_hasil = new int[KONSTANTA_THREAD];

            //long[] matriks1 = vga.Allocate<long>(KONSTANTA_THREAD);
            //long[] matriks2 = vga.Allocate<long>(KONSTANTA_THREAD);//new int[KONSTANTA_THREAD];
            //long[] matriks3 = vga.Allocate<long>(KONSTANTA_THREAD); //[KONSTANTA_THREAD];

            vga.Launch(KONSTANTA_THREAD, 1).fungsiAtomic(array_vga);
            vga.CopyFromDevice(array_vga, array_hasil);
            vga.FreeAll();

            //for(int z = 0; z < array_hasil.Length; z++)
            //{
            //    Console.WriteLine("Hasil Ekstrak----" + array_hasil[z]);
            //}
            vga.FreeAll();
            waktu.Stop();
            TimeSpan ts    = waktu.Elapsed;
            String   total = ts.Milliseconds.ToString();

            Console.WriteLine("Total VGA ------ > " + total);
        }
Ejemplo n.º 23
0
        public void Test_TwoThreadCopy()
        {
            _gpu = CudafyHost.GetDevice(eGPUType.Cuda);
            _gpuuintBufferIn3 = _gpu.Allocate(_uintBufferIn1);
            _gpuuintBufferIn4 = _gpu.Allocate(_uintBufferIn1);
            _gpu.EnableMultithreading();
            bool j1 = false;
            bool j2 = false;

            for (int i = 0; i < 10; i++)
            {
                Console.WriteLine(i);
                SetInputs();
                ClearOutputs();
                Thread t1 = new Thread(Test_TwoThreadCopy_Thread1);
                Thread t2 = new Thread(Test_TwoThreadCopy_Thread2);
                t1.Start();
                t2.Start();
                j1 = t1.Join(10000);
                j2 = t2.Join(10000);
                if (!j1 || !j2)
                {
                    break;
                }
            }

            _gpu.DisableMultithreading();
            _gpu.FreeAll();
            Assert.IsTrue(j1);
            Assert.IsTrue(j2);
        }
Ejemplo n.º 24
0
        public static void Execute()
        {
            CudafyModule km = CudafyTranslator.Cudafy(typeof(ParamsStruct), typeof(ImpliedVolatile));

            _gpu = CudafyHost.GetDevice(CudafyModes.Target);
            _gpu.LoadModule(km);

            ParamsStruct[] host_par = new ParamsStruct[1];
            ParamsStruct[] result   = new ParamsStruct[1];
            host_par[0].OP     = 96.95;
            host_par[0].Price  = 1332.24;
            host_par[0].Strike = 1235;
            host_par[0].TD     = 31;
            host_par[0].R      = 0.0001355;
            host_par[0].Q      = 0.0166;
            host_par[0].N      = 100;// 1000;
            host_par[0].kind   = 1;

            ParamsStruct[] dev_par = _gpu.CopyToDevice(host_par);
            float[]        PA      = _gpu.Allocate <float>(1001);
            _gpu.Launch(1, 1, "impliedVolatile", dev_par, PA);

            _gpu.CopyFromDevice(dev_par, 0, result, 0, 1);

            Console.WriteLine("I={0}, B={1}", result[0].i, result[0].B);
            //Console.ReadKey();
        }
        public void ExeTestKernel()
        {
            GPGPU         gpu  = CudafyHost.GetDevice(CudafyModes.Target, 0);
            eArchitecture arch = gpu.GetArchitecture();
            CudafyModule  km   = CudafyTranslator.Cudafy(arch);

            gpu.LoadModule(km);

            int[] host_results = new int[N];

            // Either assign a new block of memory to hold results on device
            var dev_results = gpu.Allocate <int>(N);

            // Or fill your array with values first and then
            for (int i = 0; i < N; i++)
            {
                host_results[i] = i * 3;
            }

            // Copy array with ints to device
            var dev_filled_results = gpu.CopyToDevice(host_results);

            // 64*16 = 1024 threads per block (which is max for sm_30)
            dim3 threadsPerBlock = new dim3(64, 16);

            // 8*8 = 64 blocks per grid , just for show so you get varying numbers
            dim3 blocksPerGrid = new dim3(8, 8);

            //var threadsPerBlock = 1024; // this will only give you blockDim.x = 1024, .y = 0, .z = 0
            //var blocksPerGrid = 1;      // just for show

            gpu.Launch(blocksPerGrid, threadsPerBlock, "GenerateRipples", dev_results, dev_filled_results);

            gpu.CopyFromDevice(dev_results, host_results);
        }
Ejemplo n.º 26
0
        public static uint[] Evaluate(ulong[] hands, int numCards)
        {
            // Translates this class to CUDA C and then compliles
            CudafyModule km = CudafyTranslator.Cudafy();//eArchitecture.sm_20);

            // Get the first GPU and load the module
            GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);

            gpu.LoadModule(km);

            int blockSize = 256;
            int blockx    = hands.Length / blockSize;

            if (hands.Length % blockSize != 0)
            {
                blockx++;
            }

            ulong[] dev_hands = gpu.Allocate <ulong>(hands.Length);
            uint[]  dev_ranks = gpu.Allocate <uint>(hands.Length);

            gpu.CopyToDevice(hands, dev_hands);

            gpu.StartTimer();
            gpu.Launch(blockx, blockSize).evaluate(dev_hands, numCards, hands.Length, dev_ranks);
            var ts = gpu.StopTimer();

            uint[] toReturn = new uint[hands.Length];
            gpu.CopyFromDevice(dev_ranks, toReturn);

            return(toReturn);
        }
Ejemplo n.º 27
0
 public void SetUp()
 {
     _gpu    = CudafyHost.GetDevice();
     _sparse = GPGPUSPARSE.Create(_gpu);
     _blas   = GPGPUBLAS.Create(_gpu);
     _solver = new Solver(_gpu, _blas, _sparse);
 }
Ejemplo n.º 28
0
 public void SetUp()
 {
     _gpu            = CudafyHost.CreateDevice(CudafyModes.Target, CudafyModes.DeviceId);
     _uintBufferIn1  = new uint[N];
     _uintBufferOut1 = new uint[N];
     _uintBufferIn2  = new uint[N];
     _uintBufferOut2 = new uint[N];
 }
        private void PrintOutAviableDevices()
        {
            Console.WriteLine("Printing out avaiable devices...");
            Console.WriteLine("For now this code will work only with Cuda devs...");

            var numberOfCudaDevices     = CudafyHost.GetDeviceCount(eGPUType.Cuda);
            var numberOfOpenClDevices   = CudafyHost.GetDeviceCount(eGPUType.OpenCL);
            var numberOfEmulatorDevices = CudafyHost.GetDeviceCount(eGPUType.Emulator);

            Console.WriteLine("{0} devices of type Cuda found", numberOfCudaDevices);
            Console.WriteLine("{0} devices of type OpenCl found", numberOfOpenClDevices);
            Console.WriteLine("{0} devices of type Emulator found", numberOfEmulatorDevices);

            Console.WriteLine("Attempting to print out detailed info about Cuda devices..");
            var cudaDevicesProperties = CudafyHost.GetDeviceProperties(eGPUType.Cuda);

            if (cudaDevicesProperties.Count() != numberOfCudaDevices)
            {
                Console.WriteLine("Something is terribly off! Number of cuda devices differ from received properites");
            }

            foreach (var cudaDeviceProperties in cudaDevicesProperties)
            {
                Console.WriteLine(@"---");
                PrintOutObjectPublicProperties(cudaDeviceProperties);
                Console.WriteLine(@"---");
            }

            Console.WriteLine("Attempting to print out detailed info about openCl devices..");
            var openClDevicesProperties = CudafyHost.GetDeviceProperties(eGPUType.OpenCL);

            if (openClDevicesProperties.Count() != numberOfOpenClDevices)
            {
                Console.WriteLine("Something is terribly off! Number of openCl devices differ from received properites");
            }

            foreach (var openClDeviceProperties in openClDevicesProperties)
            {
                Console.WriteLine(@"---");
                PrintOutObjectPublicProperties(openClDeviceProperties);
                Console.WriteLine(@"---");
            }

            Console.WriteLine("Attempting to print out detailed info about emulator devices..");
            var emulatorDevicesProperties = CudafyHost.GetDeviceProperties(eGPUType.Emulator);

            if (emulatorDevicesProperties.Count() != numberOfEmulatorDevices)
            {
                Console.WriteLine("Something is terribly off! Number of emulator devices differ from received properites");
            }

            foreach (var emulatorDeviceProperties in emulatorDevicesProperties)
            {
                Console.WriteLine(@"---");
                PrintOutObjectPublicProperties(emulatorDeviceProperties);
                Console.WriteLine(@"---");
            }
        }
Ejemplo n.º 30
0
        public void Initialize(int DeviceId)
        {
            CudafyModes.Target        = eGPUType.OpenCL;
            CudafyTranslator.Language = eLanguage.OpenCL;
            CudafyModule km = CudafyTranslator.Cudafy();

            Gpu = CudafyHost.GetDevice(eGPUType.OpenCL, DeviceId);
            Gpu.LoadModule(km);
        }