private static void SimInit()
        {
            Console.WriteLine("Deserializing class");
            CudafyModule km = CudafyModule.TryDeserialize(typeof(Program).Name);
            Console.WriteLine("Got: " + km);
            var tvc = km == null ? false : km.TryVerifyChecksums();
            Console.WriteLine("TVC: " + tvc);

            if (km == null || !tvc)
            {
                Console.WriteLine("Serializing");
                km = CudafyTranslator.Cudafy(typeof(Program));
                km.Serialize();
            }

            Console.WriteLine("Requesting device");
            _gpu = CudafyHost.GetDevice(eGPUType.Cuda);
            if (_gpu == null)
            {
                _gpu = CudafyHost.GetDevice(eGPUType.OpenCL);
                if (_gpu == null)
                {
                    _gpu = CudafyHost.GetDevice(eGPUType.Emulator);
                    if (_gpu == null)
                    {
                        Console.WriteLine("No deivce found!");
                        return;
                    }
                }
                else Console.WriteLine("Got OpenCL Device: " + _gpu.DeviceId);
            }
            else Console.WriteLine("Got CUDA Device: " + _gpu.DeviceId);
            Console.WriteLine("Loading module");
            _gpu.LoadModule(km);
        }
Exemple #2
0
        /// <summary>
        /// Removes the specified GPGPU from the cache.
        /// </summary>
        /// <param name="gpu">The gpu.</param>
        /// <returns>True if gpu was removed, else false.</returns>
        public static bool RemoveDevice(GPGPU gpu)
        {
            List <GPGPU>  gpus    = GPGPUs.Values.Where(v => v == gpu).ToList();
            bool          removed = gpus.Count > 0;
            List <string> names   = new List <string>();

            for (int i = 0; i < gpus.Count; i++)
            {
                gpus[i].Dispose();

                foreach (var v in GPGPUs)
                {
                    if (v.Value == gpu)
                    {
                        names.Add(v.Key);
                    }
                }
            }
            foreach (var s in names.Distinct())
            {
                GPGPUs.Remove(s);
            }

            return(removed);
        }
 public override bool CanAccessPeer(GPGPU peer)
 {
     lock (_peerAccessLock)
     {
         return(peer != this && peer is EmulatedGPU);
     }
 }
Exemple #4
0
        static Bitmap Render(GPGPU gpu, int frameNum)
        {
            uint[,] deviceImage = gpu.Allocate <uint>(width, height);

            float[] pX1_gpu = gpu.CopyToDevice <float>(pX1);
            float[] pY1_gpu = gpu.CopyToDevice <float>(pY1);
            float[] pZ1_gpu = gpu.CopyToDevice <float>(pZ1);

            float[] colorPosition_gpu = gpu.CopyToDevice <float>(colorPosition);
            float[] currentTime_gpu   = gpu.CopyToDevice <float>(currentTime);

            dim3 threadsPerBlock = new dim3(8, 8);
            dim3 numBlocks       = new dim3(width / threadsPerBlock.x, height / threadsPerBlock.y);

            gpu.Launch(numBlocks, threadsPerBlock).renderKernel(deviceImage, pX1_gpu, pY1_gpu, pZ1_gpu, colorPosition_gpu, currentTime_gpu);

            uint[,] finalImage = new uint[width, height];
            gpu.CopyFromDevice <uint>(deviceImage, finalImage);

            gpu.Free(deviceImage);
            gpu.Free(pX1_gpu);
            gpu.Free(pY1_gpu);
            gpu.Free(pZ1_gpu);

            gpu.Free(colorPosition_gpu);
            gpu.Free(currentTime_gpu);

            GCHandle pixels = GCHandle.Alloc(finalImage, GCHandleType.Pinned);
            Bitmap   bmp    = new Bitmap(width, height, width * sizeof(int), PixelFormat.Format32bppRgb, pixels.AddrOfPinnedObject());

            bmp.Save("spring" + frameNum + ".png");
            pixels.Free();

            return(bmp);
        }
Exemple #5
0
        public void InitGPU()
        {
            // Work around for bug in Cudafy trying to find the path..
            var os64Bit = Environment.Is64BitOperatingSystem;

            if (os64Bit)
            {
                var dir = Environment.GetEnvironmentVariable("ProgramFiles");
                Environment.SetEnvironmentVariable("ProgramFiles", "C:\\Program Files\\");
                dir = Environment.GetEnvironmentVariable("ProgramFiles");
            }

            if (Gpu == null)
            {
                Gpu = CudafyHost.GetDevice(_gpuType, 0);
                //Blas = GPGPUBLAS.Create(Gpu);
                if (_gpuType == eGPUType.Cuda)
                {
                    Blas = new SharpBLAS(Gpu);
                    Rand = GPGPURAND.Create(Gpu, curandRngType.CURAND_RNG_PSEUDO_DEFAULT);
                    Rand.SetPseudoRandomGeneratorSeed((ulong)RandomHelpers.Next(9999));
                }

                CudafyTranslator.GenerateDebug = true;
                Debug.WriteLine("CUDA workdir = " + CudafyTranslator.WorkingDirectory);
                Console.WriteLine("Recompile module");
                CudafyTranslator.Language = eLanguage.Cuda;
                var km = CudafyTranslator.Cudafy(eArchitecture.sm_30);
                km = CudafyTranslator.Cudafy();
                km.Serialize();
                Gpu.LoadModule(km);
            }
        }
Exemple #6
0
        static void Main(string[] args)
        {
            CudafyModule km = CudafyTranslator.Cudafy();

            GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);

            gpu.LoadModule(km);

            int numFrames = numberOfSeconds * framesPerSecond;

            InitializeParticles();

            File.WriteAllText("length.txt", numFrames.ToString());

            for (int i = 0; i < numFrames; i++)
            {
                DateTime frameStart = DateTime.Now;

                Simulate(gpu);
                Bitmap frame = Render(gpu, i);

                TimeSpan frameTime = DateTime.Now - frameStart;
                Console.WriteLine("Frame " + i + " complete. Time: " + frameTime.TotalMilliseconds + "ms");
            }
        }
Exemple #7
0
        public void SetUp()
        {
            //CudafyModes.Architecture = eArchitecture.sm_30;
            _gpu = CudafyHost.GetDevice(eArchitecture.sm_30, CudafyModes.DeviceId);
            Assert.IsFalse(_gpu is OpenCLDevice, "OpenCL devices are not supported.");

            _cm = CudafyModule.TryDeserialize();
            if (_cm == null || !_cm.TryVerifyChecksums())
            {
                _cm = CudafyTranslator.Cudafy(eArchitecture.sm_30);
                Console.WriteLine(_cm.CompilerOutput);
                _cm.TrySerialize();
            }

            _gpu.LoadModule(_cm);

            inputIntArray = new int[] { 0x17, 0x01, 0x7f, 0xd1, 0xfe, 0x23, 0x2c, 0xa0, 0x00, 0xcf, 0xaa, 0x7a, 0x35, 0xf4, 0x04, 0xbc,
                                        0xe9, 0x6d, 0xb2, 0x55, 0xb0, 0xc8, 0x10, 0x49, 0x76, 0x17, 0x92, 0xab, 0xf3, 0xf2, 0xab, 0xcb }; // arbitrary values
            d_inputIntArray  = _gpu.CopyToDevice(inputIntArray);
            d_outputIntArray = _gpu.Allocate <int>(WARP_SIZE);
            gpuIntResult     = new int[WARP_SIZE];
            cpuIntResult     = new int[WARP_SIZE];

            inputFloatArray = new float[] { 1.7f, -37.03f, 2147.6436f, -0.1f, 7.7f, 99.99f, -809.142f, -0.1115f,
                                            1.0f, 2.0f, 3.0f, 5.0f, 7.5f, 0.1001f, 11.119f, -9.0f,
                                            7749.9847f, -860249.118843f, 0.0f, -2727745.586215f, 12.0f, -11.0f, 77.77f, 22.0f,
                                            377.1112f, -377.1112f, 0.12345f, -0.12345f, 0.11111f, -0.11111f, 700000f, -14f }; // arbitrary values
            d_inputFloatArray  = _gpu.CopyToDevice(inputFloatArray);
            d_outputFloatArray = _gpu.Allocate <float>(WARP_SIZE);
            gpuFloatResult     = new float[WARP_SIZE];
            cpuFloatResult     = new float[WARP_SIZE];
        }
Exemple #8
0
        public void InitGPU()
        {
            // Work around for bug in Cudafy trying to find the path..
            var os64Bit = Environment.Is64BitOperatingSystem;
            if (os64Bit)
            {
                var dir = Environment.GetEnvironmentVariable("ProgramFiles");
                Environment.SetEnvironmentVariable("ProgramFiles", "C:\\Program Files\\");
                dir = Environment.GetEnvironmentVariable("ProgramFiles");
            }

            if (Gpu == null)
            {
                Gpu = CudafyHost.GetDevice(_gpuType, 0);
                //Blas = GPGPUBLAS.Create(Gpu);
                if (_gpuType == eGPUType.Cuda)
                {
                    Blas = new SharpBLAS(Gpu);
                    Rand = GPGPURAND.Create(Gpu, curandRngType.CURAND_RNG_PSEUDO_DEFAULT);
                    Rand.SetPseudoRandomGeneratorSeed((ulong)RandomHelpers.Next(9999));
                }

                CudafyTranslator.GenerateDebug = true;
                Debug.WriteLine("CUDA workdir = " + CudafyTranslator.WorkingDirectory);
                Console.WriteLine("Recompile module");
                CudafyTranslator.Language = eLanguage.Cuda;
                var km = CudafyTranslator.Cudafy(eArchitecture.sm_30);
                km = CudafyTranslator.Cudafy();
                km.Serialize(); 
                Gpu.LoadModule(km);
            }
        }
Exemple #9
0
        public static float[] CallGPU()
        {
            CudafyModes.Target        = eGPUType.OpenCL;
            CudafyModes.DeviceId      = 0;
            CudafyTranslator.Language = eLanguage.OpenCL;
            CudafyModule km  = CudafyTranslator.Cudafy(ePlatform.Auto, eArchitecture.OpenCL, typeof(GPU));
            GPGPU        gpu = CudafyHost.GetDevice(eGPUType.OpenCL, 0);

            gpu.LoadModule(km);
            km.Serialize();

            float[] input = Utils.GenerateRandomVector();
            float[,,] NN = Utils.GenerateRandomMatrix().AsSingleDimension();
            float[] output = new float[Utils.N];

            Stopwatch gpuSW = new Stopwatch();

            gpuSW.Start();
            float[] dev_output = gpu.Allocate <float>(output);
            float[] dev_input  = gpu.CopyToDevice(input);
            float[,,] dev_NN = gpu.CopyToDevice(NN);
            gpu.Launch(Utils.GRID_SIZE, Utils.BLOCK_SIZE).CalculateNeuralNetwork(dev_input, dev_NN, dev_output);
            gpu.CopyFromDevice(dev_output, output);
            gpu.FreeAll();
            gpuSW.Stop();
            Console.WriteLine("GPU: " + gpuSW.ElapsedMilliseconds);
            return(output);
        }
        public void SetUp()
        {
            //CudafyModes.Architecture = eArchitecture.sm_30;
            _gpu = CudafyHost.GetDevice(eArchitecture.sm_30, CudafyModes.DeviceId);
            Assert.IsFalse(_gpu is OpenCLDevice, "OpenCL devices are not supported.");

            _cm = CudafyModule.TryDeserialize();
            if (_cm == null || !_cm.TryVerifyChecksums())
            {
                _cm = CudafyTranslator.Cudafy(eArchitecture.sm_30);
                Console.WriteLine(_cm.CompilerOutput);
                _cm.TrySerialize();
            }

            _gpu.LoadModule(_cm);

            inputIntArray = new int[] { 0x17, 0x01, 0x7f, 0xd1, 0xfe, 0x23, 0x2c, 0xa0, 0x00, 0xcf, 0xaa, 0x7a, 0x35, 0xf4, 0x04, 0xbc,
                                        0xe9, 0x6d, 0xb2, 0x55, 0xb0, 0xc8, 0x10, 0x49, 0x76, 0x17, 0x92, 0xab, 0xf3, 0xf2, 0xab, 0xcb}; // arbitrary values
            d_inputIntArray = _gpu.CopyToDevice(inputIntArray);
            d_outputIntArray = _gpu.Allocate<int>(WARP_SIZE);
            gpuIntResult = new int[WARP_SIZE];
            cpuIntResult = new int[WARP_SIZE];

            inputFloatArray = new float[] { 1.7f, -37.03f, 2147.6436f, -0.1f, 7.7f, 99.99f, -809.142f, -0.1115f,
                                            1.0f, 2.0f, 3.0f, 5.0f, 7.5f, 0.1001f, 11.119f, -9.0f,
                                            7749.9847f, -860249.118843f, 0.0f, -2727745.586215f, 12.0f, -11.0f, 77.77f, 22.0f,
                                            377.1112f, -377.1112f, 0.12345f, -0.12345f, 0.11111f, -0.11111f, 700000f, -14f}; // arbitrary values
            d_inputFloatArray = _gpu.CopyToDevice(inputFloatArray);
            d_outputFloatArray = _gpu.Allocate<float>(WARP_SIZE);
            gpuFloatResult = new float[WARP_SIZE];
            cpuFloatResult = new float[WARP_SIZE];
        }
Exemple #11
0
        public static void MyFirstBlasEmulatorTest()
        {
            Console.WriteLine("MyTest()");
            // Get GPU device
            CudafyModes.Target = eGPUType.Emulator;
            GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target);

            // Create GPGPUBLAS (CUBLAS Wrapper)
            using (GPGPUBLAS blas = GPGPUBLAS.Create(gpu))
            {
                const int N     = 100;
                float[]   a     = new float[N];
                float[]   b     = new float[N];
                float[]   c     = new float[N];
                float     alpha = -1;
                float     beta  = 0;

                float[] device_a = gpu.CopyToDevice(a);
                float[] device_b = gpu.CopyToDevice(b);
                float[] device_c = gpu.CopyToDevice(c);

                int             m  = 10;
                int             n  = 10;
                int             k  = 10;
                cublasOperation Op = cublasOperation.N;
                blas.GEMM(m, k, n, alpha, device_a, device_b, beta, device_c, Op);

                gpu.CopyFromDevice <float>(device_c, c);
            }
        }
Exemple #12
0
        public static void Execute()
        {
            CudafyModule km = CudafyTranslator.Cudafy(typeof(ParamsStruct), typeof(ImpliedVolatile));

            _gpu = CudafyHost.GetDevice(CudafyModes.Target);
            _gpu.LoadModule(km);

            ParamsStruct[] host_par = new ParamsStruct[1];
            ParamsStruct[] result = new ParamsStruct[1];
            host_par[0].OP = 96.95;
            host_par[0].Price = 1332.24;
            host_par[0].Strike = 1235;
            host_par[0].TD = 31;
            host_par[0].R = 0.0001355;
            host_par[0].Q = 0.0166;
            host_par[0].N = 100;// 1000;
            host_par[0].kind = 1;

            ParamsStruct[] dev_par = _gpu.CopyToDevice(host_par);
            float[] PA = _gpu.Allocate<float>(1001);
            _gpu.Launch(1,1, "impliedVolatile", dev_par, PA);

            _gpu.CopyFromDevice(dev_par, 0, result, 0, 1);

            Console.WriteLine("I={0}, B={1}", result[0].i, result[0].B);           
            //Console.ReadKey();
        }
 public void SetUp()
 {
     _gpu = CudafyHost.CreateDevice(CudafyModes.Target);
     
     _hostInput = new double[N * BATCH];
     _hostInputCplx = new ComplexD[N * BATCH];
     _hostOutput = new double[N * BATCH];
     _hostOutputCplx = new ComplexD[N * BATCH];
     _devInput = _gpu.Allocate(_hostInput);
     _devInputCplx = _gpu.Allocate(_hostInputCplx);
     _devInter = _gpu.Allocate<double>(N * 2 * BATCH);
     _devInterCplx = _gpu.Allocate<ComplexD>(N * BATCH);
     _devOutput = _gpu.Allocate(_hostOutput);
     _devOutputCplx = _gpu.Allocate(_hostOutputCplx);
     _fft = GPGPUFFT.Create(_gpu);
     for (int b = 0; b < BATCH; b++)
     {
         for (int i = 0; i < N; i++)
         {
             ComplexD cf = new ComplexD();
             cf.x = (double)((10.0F * Math.Sin(100 * 2 * Math.PI * i / N * Math.PI / 180)));
             cf.y = (double)((10.0F * Math.Sin(200 * 2 * Math.PI * i / N * Math.PI / 180)));
             _hostInput[i + b * N] = cf.x;
             _hostInputCplx[i + b * N] = cf;
         }
     }
 }
Exemple #14
0
        public Layer(GPUModule gpuModule, Layer previousLayer = null,int size = 0, string id = "", int miniBatchSize = Int32.MinValue)
        {
            if (previousLayer != null) MinibatchSize = previousLayer.MinibatchSize;
            if (miniBatchSize != Int32.MinValue) MinibatchSize = miniBatchSize;

            LayerIndex = IdCounter++;
            Id = id;
            if (String.IsNullOrEmpty(Id))
            {
                Id = "ID" + LayerIndex.ToString().PadLeft(2, '0');
            }

            _gpuModule = gpuModule;
            _gpu = _gpuModule.Gpu;
            PreviousLayer = previousLayer;
            if (size != 0)
            {
                this.Size = size;
                AddArray(ArrayName.Outputs, MinibatchSize, this.Size);                
            }

            if ((previousLayer != null) && (size > 0))
            {
                AddArray(ArrayName.Gradients, MinibatchSize, size);
            }
        }
Exemple #15
0
        public static void primaGPU()
        {
            CudafyModule modul_kernel = CudafyTranslator.Cudafy();
            GPGPU        vga          = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);

            vga.LoadModule(modul_kernel);

            Stopwatch waktu = new Stopwatch();

            waktu.Start();
            int[] list_cpu = new int[KONSTANTA_THREAD];
            int[] list_cpy = new int[KONSTANTA_THREAD];
            int[] list     = vga.Allocate <int>(KONSTANTA_THREAD);
            vga.Launch(KONSTANTA_THREAD, 1).ModulAtomic(list);
            vga.CopyFromDevice(list, list_cpy);
            vga.FreeAll();

            int index = 0;

            for (int z = 0; z < list_cpy.Length; z++)
            {
                if (list_cpy[z] != -1)
                {
                    list_cpu[index] = list_cpy[z];
                    //Console.WriteLine(list_cpu[index]);
                    index++;
                }
            }
            waktu.Stop();
            TimeSpan ts    = waktu.Elapsed;
            String   total = ts.Seconds.ToString();

            Console.WriteLine("Total GPU ------ {0} detik> ", total);
        }
        public static void cudaTranspose(ref MathNet.Numerics.LinearAlgebra.Double.DenseMatrix dm)
        {
            GPGPU gpu = CudafyHost.GetDevice(eGPUType.Cuda);

            GPGPUBLAS blas = GPGPUBLAS.Create(gpu);

            int cols = dm.ColumnCount, rows = dm.RowCount;
            int restRows = rows - cols;

            //double[] a = dm.Storage.ToColumnMajorArray();
            double[] a = dm.SubMatrix(0, cols, 0, cols).Storage.ToColumnMajorArray();
            double[] b = dm.SubMatrix(cols, restRows, 0, cols).Storage.ToColumnMajorArray();
            dm = null;

            double[] a_d = gpu.CopyToDevice <double>(a);
            a = null;
            double[] c_d = gpu.Allocate <double>(cols * cols);
            double[] x_d = gpu.CopyToDevice <double>(new double[] { 1 });
            blas.GEMV(cols, cols, 1, c_d, x_d, 0, x_d, Cudafy.Maths.BLAS.Types.cublasOperation.T);
            a = new double[cols * rows];
            gpu.CopyFromDevice <double>(c_d, 0, a, 0, cols * cols);
            gpu.FreeAll();
            a_d = gpu.CopyToDevice <double>(b);
            b   = null;
            c_d = gpu.Allocate <double>(restRows * cols);
            x_d = gpu.CopyToDevice <double>(new double[] { 1 });
            blas.GEMV(restRows, cols, 1, c_d, x_d, 0, x_d, Cudafy.Maths.BLAS.Types.cublasOperation.T);
            gpu.CopyFromDevice <double>(c_d, 0, a, cols * cols, restRows * cols);
            gpu.FreeAll();
            dm = new MathNet.Numerics.LinearAlgebra.Double.DenseMatrix(cols, rows, a);
        }
Exemple #17
0
        public static void eksekusi()
        {
            CudafyModule kernel_modul = CudafyTranslator.Cudafy();
            GPGPU        vga          = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);

            vga.LoadModule(kernel_modul);
            Stopwatch waktu = new Stopwatch();

            waktu.Start();
            int[] array_vga   = vga.Allocate <int>(KONSTANTA_THREAD);
            int[] array_hasil = new int[KONSTANTA_THREAD];

            //long[] matriks1 = vga.Allocate<long>(KONSTANTA_THREAD);
            //long[] matriks2 = vga.Allocate<long>(KONSTANTA_THREAD);//new int[KONSTANTA_THREAD];
            //long[] matriks3 = vga.Allocate<long>(KONSTANTA_THREAD); //[KONSTANTA_THREAD];

            vga.Launch(KONSTANTA_THREAD, 1).fungsiAtomic(array_vga);
            vga.CopyFromDevice(array_vga, array_hasil);
            vga.FreeAll();

            //for(int z = 0; z < array_hasil.Length; z++)
            //{
            //    Console.WriteLine("Hasil Ekstrak----" + array_hasil[z]);
            //}
            vga.FreeAll();
            waktu.Stop();
            TimeSpan ts    = waktu.Elapsed;
            String   total = ts.Milliseconds.ToString();

            Console.WriteLine("Total VGA ------ > " + total);
        }
Exemple #18
0
        public static bool TestGpuDoublePrecision(int DeviceId)
        {
            if (DeviceId > CudafyHost.GetDeviceCount(eGPUType.OpenCL))
            {
                return(false);
            }

            try
            {
                CudafyModes.Target        = eGPUType.OpenCL;
                CudafyTranslator.Language = eLanguage.OpenCL;
                CudafyModule km  = CudafyTranslator.Cudafy();
                GPGPU        gpu = CudafyHost.GetDevice(eGPUType.OpenCL, DeviceId);
                gpu.LoadModule(km);

                double   c;
                double[] dev_c = gpu.Allocate <double>();
                gpu.Launch().add_double(2.5d, 7.5d, dev_c);
                gpu.CopyFromDevice(dev_c, out c);
                gpu.Free(dev_c);
                return(c == 10.0d);
            }
            catch
            { return(false); }
        }
Exemple #19
0
        public static int MA(int[,] A, int[,] B, int[,] C, GPGPU gpu, int maxTheadBlockSize, int Size)
        {
            // allocate the memory on the GPU
            int[,] GPU_A = gpu.Allocate<int>(A);
            int[,] GPU_B = gpu.Allocate<int>(B);
            int[,] GPU_C = gpu.Allocate<int>(C);

            // copy the arrays 'a' and 'b' to the GPU
            gpu.CopyToDevice(A, GPU_A);
            gpu.CopyToDevice(B, GPU_B);
            dim3 threadsPerBlock;
            // find the number of threads and blocks
            if (Size < maxTheadBlockSize)
            {
                threadsPerBlock = new dim3(Size, Size);
            }
            else
            {
                threadsPerBlock = new dim3(maxTheadBlockSize, maxTheadBlockSize);
            }
            dim3 block = new dim3(Size, Size);

            // launch GPU_MA
            gpu.Launch(block, threadsPerBlock, "GPU_MA", GPU_A, GPU_B, GPU_C, Size);

            // copy the array 'c' back from the GPU to the CPU
            gpu.CopyFromDevice(GPU_C, C);

            gpu.Free(GPU_A);
            gpu.Free(GPU_B);
            gpu.Free(GPU_C);
            return 1;
        }
Exemple #20
0
        public void Test_TwoThreadCopy()
        {
            _gpu = CudafyHost.GetDevice(eGPUType.Cuda);
            _gpuuintBufferIn3 = _gpu.Allocate(_uintBufferIn1);
            _gpuuintBufferIn4 = _gpu.Allocate(_uintBufferIn1);
            _gpu.EnableMultithreading();
            bool j1 = false;
            bool j2 = false;

            for (int i = 0; i < 10; i++)
            {
                Console.WriteLine(i);
                SetInputs();
                ClearOutputs();
                Thread t1 = new Thread(Test_TwoThreadCopy_Thread1);
                Thread t2 = new Thread(Test_TwoThreadCopy_Thread2);
                t1.Start();
                t2.Start();
                j1 = t1.Join(10000);
                j2 = t2.Join(10000);
                if (!j1 || !j2)
                {
                    break;
                }
            }

            _gpu.DisableMultithreading();
            _gpu.FreeAll();
            Assert.IsTrue(j1);
            Assert.IsTrue(j2);
        }
        public void ExeTestKernel()
        {
            GPGPU         gpu  = CudafyHost.GetDevice(CudafyModes.Target, 0);
            eArchitecture arch = gpu.GetArchitecture();
            CudafyModule  km   = CudafyTranslator.Cudafy(arch);

            gpu.LoadModule(km);

            int[] host_results = new int[N];

            // Either assign a new block of memory to hold results on device
            var dev_results = gpu.Allocate <int>(N);

            // Or fill your array with values first and then
            for (int i = 0; i < N; i++)
            {
                host_results[i] = i * 3;
            }

            // Copy array with ints to device
            var dev_filled_results = gpu.CopyToDevice(host_results);

            // 64*16 = 1024 threads per block (which is max for sm_30)
            dim3 threadsPerBlock = new dim3(64, 16);

            // 8*8 = 64 blocks per grid , just for show so you get varying numbers
            dim3 blocksPerGrid = new dim3(8, 8);

            //var threadsPerBlock = 1024; // this will only give you blockDim.x = 1024, .y = 0, .z = 0
            //var blocksPerGrid = 1;      // just for show

            gpu.Launch(blocksPerGrid, threadsPerBlock, "GenerateRipples", dev_results, dev_filled_results);

            gpu.CopyFromDevice(dev_results, host_results);
        }
Exemple #22
0
        public static uint[] Evaluate(ulong[] hands, int numCards)
        {
            // Translates this class to CUDA C and then compliles
            CudafyModule km = CudafyTranslator.Cudafy();//eArchitecture.sm_20);

            // Get the first GPU and load the module
            GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);

            gpu.LoadModule(km);

            int blockSize = 256;
            int blockx    = hands.Length / blockSize;

            if (hands.Length % blockSize != 0)
            {
                blockx++;
            }

            ulong[] dev_hands = gpu.Allocate <ulong>(hands.Length);
            uint[]  dev_ranks = gpu.Allocate <uint>(hands.Length);

            gpu.CopyToDevice(hands, dev_hands);

            gpu.StartTimer();
            gpu.Launch(blockx, blockSize).evaluate(dev_hands, numCards, hands.Length, dev_ranks);
            var ts = gpu.StopTimer();

            uint[] toReturn = new uint[hands.Length];
            gpu.CopyFromDevice(dev_ranks, toReturn);

            return(toReturn);
        }
Exemple #23
0
        public static void ClearGpuArray(GPGPU gpu, int[] gpuArray, int size)
        {
            var array = new int[size];

            Array.Clear(array, 0, array.Length);
            gpu.CopyToDevice(array, gpuArray);
        }
Exemple #24
0
 public void SetUp()
 {
     _gpu    = CudafyHost.GetDevice();
     _sparse = GPGPUSPARSE.Create(_gpu);
     _blas   = GPGPUBLAS.Create(_gpu);
     _solver = new Solver(_gpu, _blas, _sparse);
 }
        /// <summary>
        /// Copies any reference type fields (e.g. arrays) of the object to the device.
        /// </summary>
        private static void CopyReferenceTypeFieldsToDevice <T>(GPGPU gpu, T hostObject)
        {
            var fields = DeviceClassHelper.GetFieldsStandardLayout(hostObject.GetType());

            foreach (FieldInfo field in fields)
            {
                object fieldValue = field.GetValue(hostObject);
                // Ignore if CudafyIgnore
                if (field.GetCustomAttributes(typeof(CudafyIgnoreAttribute), true).Count() > 0)
                {
                    continue;
                }
                // Only copy field to the device if this is not already done.
                if (field.FieldType.IsArray && !deviceObjectFromHostObject[gpu].ContainsKey(fieldValue))
                {
                    // If the elements of the array are value types, then we make a deep copy. Otherwise, we create an array of
                    // pointers to the objects.
                    if (field.FieldType.GetElementType().IsValueType)
                    {
                        Array hostArray = (Array)fieldValue;
                        CopyArrayToDevice(gpu, hostArray);
                    }
                    else
                    {
                        CopyArrayOfReferenceTypeToDevice(gpu, (Array)fieldValue);
                    }
                }
                else if (!field.FieldType.IsValueType && !deviceObjectFromHostObject[gpu].ContainsKey(fieldValue))
                {
                    CreateDeviceObject(gpu, fieldValue);
                }
            }
        }
 private static void AssignPointerFields(GPGPU gpu, object hostObject, object deviceObject, List <FieldMapping> pointerFields)
 {
     foreach (FieldMapping mapping in pointerFields)
     {
         object fieldValue = mapping.HostObjectField.GetValue(hostObject);
         // Get the IntPtr to the device memory for the array.
         var devicePointer = TryGetDeviceMemoryFromHostObject(gpu, fieldValue);
         if (devicePointer == null)
         {
             throw new ArgumentException("No device memory allocated for field " + mapping.Name);
         }
         // The device object contains this pointer.
         mapping.DeviceObjectField.SetValue(deviceObject, devicePointer.Pointer);
         // If the field is an array then set the dimension fields too.
         if (mapping is ArrayFieldMapping)
         {
             ArrayFieldMapping arrayFieldMapping = (ArrayFieldMapping)mapping;
             Array             array             = fieldValue as Array;
             for (int i = 0; i < arrayFieldMapping.ArrayRank; ++i)
             {
                 arrayFieldMapping.DeviceObjectDimensionFields[i].SetValue(deviceObject, array.GetLength(i));
             }
         }
     }
 }
Exemple #27
0
        public static void Execute()
        {
            CudafyModule km = CudafyTranslator.Cudafy(typeof(ParamsStruct), typeof(ImpliedVolatile));

            _gpu = CudafyHost.GetDevice(CudafyModes.Target);
            _gpu.LoadModule(km);

            ParamsStruct[] host_par = new ParamsStruct[1];
            ParamsStruct[] result   = new ParamsStruct[1];
            host_par[0].OP     = 96.95;
            host_par[0].Price  = 1332.24;
            host_par[0].Strike = 1235;
            host_par[0].TD     = 31;
            host_par[0].R      = 0.0001355;
            host_par[0].Q      = 0.0166;
            host_par[0].N      = 100;// 1000;
            host_par[0].kind   = 1;

            ParamsStruct[] dev_par = _gpu.CopyToDevice(host_par);
            float[]        PA      = _gpu.Allocate <float>(1001);
            _gpu.Launch(1, 1, "impliedVolatile", dev_par, PA);

            _gpu.CopyFromDevice(dev_par, 0, result, 0, 1);

            Console.WriteLine("I={0}, B={1}", result[0].i, result[0].B);
            //Console.ReadKey();
        }
        public static void Example2(GPGPU gpu)
        {
            ArrayView view1 = new ArrayView();
            ArrayView view2 = new ArrayView();

            float[] data = Enumerable.Range(0, 1000).Select(t => (float)t).ToArray();
            // Two views of the array, simply applying an offset to the array; could slice instead for example.
            view1.CreateView(data, 100);
            view2.CreateView(data, 200);

            for (int i = 0; i < 1000; ++i)
            {
                data[i] = data[i] * 10f;
            }
            // Should copy the 'large' array to the device only once; this is referenced by each ArrayView instance.
            var dev_view1 = DeviceClassHelper.CreateDeviceObject(gpu, view1);
            var dev_view2 = DeviceClassHelper.CreateDeviceObject(gpu, view2);

            var dev_result = gpu.Allocate <float>(5);
            var hostResult = new float[5];

            gpu.Launch(1, 1).Test2(dev_view1, dev_view2, dev_result);
            gpu.CopyFromDevice(dev_result, hostResult);

            bool pass = (hostResult[0] == 1050f && hostResult[1] == 7f);

            Console.WriteLine(pass ? "Pass" : "Fail");
        }
Exemple #29
0
        public static void Execute()
        {
            CudafyModule km = CudafyModule.TryDeserialize();

            if (km == null || !km.TryVerifyChecksums())
            {
                km = CudafyTranslator.Cudafy(typeof(Generic <ushort, ushort>), typeof(SimpleGeneric));
                km.Serialize();
            }

            GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target);

            gpu.LoadModule(km);

            var input = new Generic <ushort, ushort>();

            input.A = 187;

            int[] devoutput = gpu.Allocate <int>(1);
            gpu.Launch(1, 1, "Kernel", input, devoutput);

            int output;

            gpu.CopyFromDevice(devoutput, out output);

            Console.WriteLine("Simple Generic: " + ((output == 1) ? "PASSED" : "FAILED"));
        }
Exemple #30
0
        public void Test_copyOnHost()
        {
            int len = 35687;

            int[]  bufa = new int[len];
            int[]  bufb = new int[len];
            Random r    = new Random();

            for (int i = 0; i < len; i++)
            {
                bufa[i] = r.Next() + 1;
            }
            IntPtr ha = _gpu.HostAllocate <int>(len);

            ha.Write(bufa, 0, 0, len);
            IntPtr hb = _gpu.HostAllocate <int>(len);

            GPGPU.CopyMemory(hb, ha, (uint)len * sizeof(int));

            hb.Read(bufb, 0, 0, len);
            for (int i = 0; i < len; i++)
            {
                Assert.True(bufa[i] == bufb[i]);
                Assert.False(bufa[i] == 0);
            }
            _gpu.HostFreeAll();
        }
 /// <summary>
 /// Creates a SPARSE wrapper based on the specified gpu. Note only CudaGPU is supported.
 /// </summary>
 /// <param name="gpu">The gpu.</param>
 /// <returns></returns>
 public static GPGPUSPARSE Create(GPGPU gpu)
 {
     if (gpu is CudaGPU)
         return new CudaSPARSE(gpu);
     else
         throw new NotImplementedException(gpu.ToString());
 }
        /// <summary>
        ///     Вызов и исполнение одной элементарной функции по имени функции
        /// </summary>
        /// <param name="function"></param>
        public static void Execute(string function)
        {
            Debug.Assert(_indexes1.Last() == _sequencies1.Length);
            Debug.Assert(_indexes2.Last() == _sequencies2.Length);

            CudafyModule km = CudafyTranslator.Cudafy();

            GPGPU gpu = CudafyHost.GetDevice();

            gpu.LoadModule(km);

            // copy the arrays 'a' and 'b' to the GPU
            int[] devIndexes1    = gpu.CopyToDevice(_indexes1);
            int[] devIndexes2    = gpu.CopyToDevice(_indexes2);
            int[] devSequencies1 = gpu.CopyToDevice(_sequencies1);
            int[] devSequencies2 = gpu.CopyToDevice(_sequencies2);
            int[,] devMatrix = gpu.Allocate(_matrix);

            int rows    = _matrix.GetLength(0);
            int columns = _matrix.GetLength(1);

            dim3 gridSize  = Math.Min(15, (int)Math.Pow((double)rows * columns, 0.33333333333));
            dim3 blockSize = Math.Min(15, (int)Math.Pow((double)rows * columns, 0.33333333333));

            gpu.Launch(gridSize, blockSize, function,
                       devSequencies1, devIndexes1,
                       devSequencies2, devIndexes2,
                       devMatrix);

            // copy the array 'c' back from the GPU to the CPU
            gpu.CopyFromDevice(devMatrix, _matrix);

            // free the memory allocated on the GPU
            gpu.FreeAll();
        }
        public void SetUp()
        {
            _gpu = CudafyHost.CreateDevice(CudafyModes.Target);

            _hostInput      = new double[N * BATCH];
            _hostInputCplx  = new ComplexD[N * BATCH];
            _hostOutput     = new double[N * BATCH];
            _hostOutputCplx = new ComplexD[N * BATCH];
            _devInput       = _gpu.Allocate(_hostInput);
            _devInputCplx   = _gpu.Allocate(_hostInputCplx);
            _devInter       = _gpu.Allocate <double>(N * 2 * BATCH);
            _devInterCplx   = _gpu.Allocate <ComplexD>(N * BATCH);
            _devOutput      = _gpu.Allocate(_hostOutput);
            _devOutputCplx  = _gpu.Allocate(_hostOutputCplx);
            _fft            = GPGPUFFT.Create(_gpu);
            for (int b = 0; b < BATCH; b++)
            {
                for (int i = 0; i < N; i++)
                {
                    ComplexD cf = new ComplexD();
                    cf.x = (double)((10.0F * Math.Sin(100 * 2 * Math.PI * i / N * Math.PI / 180)));
                    cf.y = (double)((10.0F * Math.Sin(200 * 2 * Math.PI * i / N * Math.PI / 180)));
                    _hostInput[i + b * N]     = cf.x;
                    _hostInputCplx[i + b * N] = cf;
                }
            }
        }
Exemple #34
0
        public void Test_TwoThreadTwoGPU()
        {
            _gpu0 = CudafyHost.CreateDevice(CudafyModes.Target, 0);
            _gpu1 = CudafyHost.CreateDevice(CudafyModes.Target, 1);
            _gpu0.EnableMultithreading();
            _gpu1.EnableMultithreading();
            bool j1 = false;
            bool j2 = false;

            for (int i = 0; i < 10; i++)
            {
                Console.WriteLine(i);
                Thread t1 = new Thread(Test_TwoThreadTwoGPU_Thread0);
                Thread t2 = new Thread(Test_TwoThreadTwoGPU_Thread1);
                t1.Start();
                t2.Start();
                j1 = t1.Join(10000);
                j2 = t2.Join(10000);
                if (!j1 || !j2)
                {
                    break;
                }
            }
            _gpu0.DisableMultithreading();
            _gpu0.FreeAll();
            _gpu1.DisableMultithreading();
            _gpu1.FreeAll();
            Assert.IsTrue(j1);
            Assert.IsTrue(j2);
        }
 public void SetUp()
 {
     _gpu = CudafyHost.CreateDevice(CudafyModes.Target);
      Console.WriteLine("CUDA driver version={0}", _gpu.GetDriverVersion());
     _fft = GPGPUFFT.Create(_gpu);
     _hostInput = new float[N * BATCH];
     _hostInputCplx = new ComplexF[N * BATCH];
     _hostOutput = new float[N * BATCH];
     _hostOutputCplx = new ComplexF[N * BATCH];
     _devInput = _gpu.Allocate(_hostInput);
     _devInputCplx = _gpu.Allocate(_hostInputCplx);
     _devInter = _gpu.Allocate<float>(N * 2 * BATCH);
     _devInterCplx = _gpu.Allocate<ComplexF>(N * BATCH);
     _devOutput = _gpu.Allocate(_hostOutput);
     _devOutputCplx = _gpu.Allocate(_hostOutputCplx);
     Console.WriteLine("CUFFT version={0}", _fft.GetVersion());
     for (int b = 0; b < BATCH; b++)
     {
         for (int i = 0; i < N; i++)
         {
             ComplexF cf = new ComplexF();
             cf.x = (float)((10.0F * Math.Sin(100 * 2 * Math.PI * i / N * Math.PI / 180)));
             cf.y = (float)((10.0F * Math.Sin(200 * 2 * Math.PI * i / N * Math.PI / 180)));
             _hostInput[i + b * N] = cf.x;
             _hostInputCplx[i + b * N] = cf;
         }
     }
 }
Exemple #36
0
 public void SetUp()
 {
     _gpu = CudafyHost.CreateDevice(CudafyModes.Target);
     Console.WriteLine(_gpu.GetDriverVersion());
     _fft            = GPGPUFFT.Create(_gpu);
     _hostInput      = new float[N * BATCH];
     _hostInputCplx  = new ComplexF[N * BATCH];
     _hostOutput     = new float[N * BATCH];
     _hostOutputCplx = new ComplexF[N * BATCH];
     _devInput       = _gpu.Allocate(_hostInput);
     _devInputCplx   = _gpu.Allocate(_hostInputCplx);
     _devInter       = _gpu.Allocate <float>(N * 2 * BATCH);
     _devInterCplx   = _gpu.Allocate <ComplexF>(N * BATCH);
     _devOutput      = _gpu.Allocate(_hostOutput);
     _devOutputCplx  = _gpu.Allocate(_hostOutputCplx);
     Console.WriteLine(_fft.GetVersion());
     for (int b = 0; b < BATCH; b++)
     {
         for (int i = 0; i < N; i++)
         {
             ComplexF cf = new ComplexF();
             cf.x = (float)((10.0F * Math.Sin(100 * 2 * Math.PI * i / N * Math.PI / 180)));
             cf.y = (float)((10.0F * Math.Sin(200 * 2 * Math.PI * i / N * Math.PI / 180)));
             _hostInput[i + b * N]     = cf.x;
             _hostInputCplx[i + b * N] = cf;
         }
     }
 }
Exemple #37
0
 public void SetUp()
 {
     _gpu = CudafyHost.GetDevice();
     _sparse = GPGPUSPARSE.Create(_gpu);
     _blas = GPGPUBLAS.Create(_gpu);
     _solver = new Solver(_gpu, _blas, _sparse);
 }
Exemple #38
0
        public void SetUp()
        {
            _gpu = CudafyHost.GetDevice(CudafyModes.Architecture, CudafyModes.DeviceId);

            _byteBufferIn  = new byte[N];
            _byteBufferOut = new byte[N];

            _sbyteBufferIn  = new sbyte[N];
            _sbyteBufferOut = new sbyte[N];

            _ushortBufferIn  = new ushort[N];
            _ushortBufferOut = new ushort[N];

            _uintBufferIn  = new uint[N];
            _uintBufferOut = new uint[N];

            _ulongBufferIn  = new ulong[N];
            _ulongBufferOut = new ulong[N];

            _cplxDBufferIn  = new ComplexD[N];
            _cplxDBufferOut = new ComplexD[N];

            _cplxFBufferIn  = new ComplexF[N];
            _cplxFBufferOut = new ComplexF[N];

            SetInputs();
            ClearOutputsAndGPU();
        }
Exemple #39
0
        //
        // http://stackoverflow.com/questions/18628447/cudafy-throws-an-exception-while-testing
        //
        private static void BlasSample(int deviceId)
        {
            CudafyModes.Target = eGPUType.Emulator;
            GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, deviceId);

            CudafyModes.DeviceId = deviceId;
            eArchitecture arch = gpu.GetArchitecture();
            CudafyModule  km   = CudafyTranslator.Cudafy(arch);

            gpu.LoadModule(km);

            GPGPUBLAS blas = GPGPUBLAS.Create(gpu);

            const int N = 100;

            float[] a     = new float[N];
            float[] b     = new float[N];
            float[] c     = new float[N];
            float   alpha = -1;
            float   beta  = 0;

            float[] device_a = gpu.CopyToDevice(a);
            float[] device_b = gpu.CopyToDevice(b);
            float[] device_c = gpu.CopyToDevice(c);

            int             m  = 10;
            int             n  = 10;
            int             k  = 10;
            cublasOperation Op = cublasOperation.N;

            blas.GEMM(m, k, n, alpha, device_a, device_b, beta, device_c, Op);

            throw new NotImplementedException();
        }
        /// <summary>
        ///     Вызов и исполнение функции проверки что массив отсортирован
        /// </summary>
        public static void ExecuteSorted(int direction = 1)
        {
            CudafyModule km = CudafyTranslator.Cudafy();

            GPGPU gpu = CudafyHost.GetDevice();

            gpu.LoadModule(km);


            int[] devA = gpu.Allocate(_a);
            int[] devB = gpu.Allocate(_b);
            int[] devC = gpu.Allocate(_c);
            int[] devD = gpu.Allocate(D);

            gpu.CopyToDevice(_a, devA);

            gpu.Launch(1, 1).Split(devA, devB, devC, _middle);
            gpu.Launch(_gridSize, _blockSize).Sorted(devA, devB, devC, devD, 0, direction);
            gpu.Launch(1, 1).Sorted(devA, devB, devC, devD, 1, direction);

            gpu.CopyFromDevice(devD, D);

            // free the memory allocated on the GPU
            gpu.FreeAll();
        }
Exemple #41
0
 /// <summary>
 /// Creates a BLAS wrapper based on the specified gpu.
 /// </summary>
 /// <param name="gpu">The gpu.</param>
 /// <returns></returns>
 public static GPGPUBLAS Create(GPGPU gpu)
 {
     if (gpu is CudaGPU)
         return new CudaBLAS(gpu);
     else
         return new HostBLAS(gpu);
         //throw new NotImplementedException(gpu.ToString());
 }
Exemple #42
0
        public GPU_func()
        {
            km = CudafyTranslator.Cudafy();

            gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);
            gpu.LoadModule(km);

            GPU_prop = gpu.GetDeviceProperties();
        }
Exemple #43
0
 public void SetUp()
 {
     _gpu = CudafyHost.CreateDevice(CudafyModes.Target);
     _blas = GPGPUBLAS.Create(_gpu);
     _hostInput =  new float[ciROWS, ciCOLS];
     _hostInput2 = new float[ciROWS, ciCOLS];
     _hostOutput = new float[ciROWS, ciCOLS];
     _devPtr = _gpu.Allocate<float>(_hostInput);
     _devPtr2 = _gpu.Allocate<float>(_hostOutput);
 }
 public void SetUp()
 {
     //var x = CompilerHelper.Create(ePlatform.x64, eArchitecture.OpenCL, eCudafyCompileMode.Default);
     var y = CompilerHelper.Create(ePlatform.x64, CudafyModes.Architecture, eCudafyCompileMode.DynamicParallelism); 
     _cm = CudafyTranslator.Cudafy(new CompileProperties[] {y}, this.GetType());
     Console.WriteLine(_cm.CompilerOutput);
     _cm.Serialize();
     _gpu = CudafyHost.GetDevice(y.Architecture, CudafyModes.DeviceId);
     _gpu.LoadModule(_cm);
 }
Exemple #45
0
 public void SetUp()
 {
     _gpu = CudafyHost.CreateDevice(CudafyModes.Target);
     _blas = GPGPUBLAS.Create(_gpu);
     Console.Write("BLAS Version={0}", _blas.GetVersion());
     _hostInput1 = new float[ciN];
     _hostInput2 = new float[ciN];
     _hostOutput1 = new float[ciN];
     _hostOutput2 = new float[ciN];
     _devPtr1 = _gpu.Allocate<float>(_hostInput1);
     _devPtr2 = _gpu.Allocate<float>(_hostOutput1);
 }
Exemple #46
0
 internal CudaRAND(GPGPU gpu, curandRngType rng_type)
 {
     _gpu = gpu;
     if (IntPtr.Size == 8)
     {
         _driver = new CURANDDriver64();
     }
     else
     {
         _driver = new CURANDDriver32();
     }
 }
Exemple #47
0
        public void Initialize(int bytes)
        {
            CudafyModule km = CudafyTranslator.Cudafy();

            _gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);
            _gpu.LoadModule(km);

            _dev_bitmap = _gpu.Allocate<byte>(bytes);

            _blocks = new dim3(DIM / 16, DIM / 16);
            _threads = new dim3(16, 16);
        }
Exemple #48
0
 public void SetUp()
 {
     CudafyTranslator.GenerateDebug = true;
     _cm = CudafyModule.TryDeserialize();
     _gpu = CudafyHost.GetDevice(CudafyModes.Architecture, CudafyModes.DeviceId);
     if (_cm == null || !_cm.TryVerifyChecksums())
     {
         _cm = CudafyTranslator.Cudafy(_gpu.GetArchitecture(), this.GetType(), (_gpu is OpenCLDevice) ? null  : typeof(StringConstClass));              
         _cm.TrySerialize();
     }
     _gpu.LoadModule(_cm);
 }
Exemple #49
0
 internal CudaRAND(GPGPU gpu, curandRngType rng_type)
 {
     _gpu = gpu;
     if (IntPtr.Size == 8)
     {
         _driver = new CURANDDriver64();
     }
     else
     {
         throw new NotSupportedException();
         //_driver = new CURANDDriver32();
     }
 }
        public virtual void SetUp()
        {
            _gpu = CudafyHost.GetDevice(CudafyModes.Architecture, CudafyModes.DeviceId);
            var types = new List<Type>();
            types.Add(this.GetType());
            types.Add(typeof(MathSingleTest));
            SupportsDouble = _gpu.GetDeviceProperties().SupportsDoublePrecision;
            if (SupportsDouble)
                types.Add(typeof(MathDoubleTest));

            _cm = CudafyTranslator.Cudafy(CudafyModes.Architecture, types.ToArray());
            Debug.WriteLine(_cm.SourceCode);
            _gpu.LoadModule(_cm);
        }
Exemple #51
0
        public Solver(GPGPU gpu, GPGPUBLAS blas, GPGPUSPARSE sparse)
        {
            this.gpu = gpu;
            this.blas = blas;
            this.sparse = sparse;

            var km = CudafyModule.TryDeserialize();
            if (km == null || !km.TryVerifyChecksums())
            {
                km = CudafyTranslator.Cudafy();
                km.TrySerialize();
            }

            gpu.LoadModule(km);
        }
        public void SetUp()
        {
            _gpu = CudafyHost.GetDevice(CudafyModes.Architecture, CudafyModes.DeviceId);

            _cm = CudafyModule.TryDeserialize();
            if (_cm == null || !_cm.TryVerifyChecksums())
            {
                _cm = CudafyTranslator.Cudafy(CudafyModes.Architecture);//typeof(PrimitiveStruct), typeof(BasicFunctionTests));
                Console.WriteLine(_cm.CompilerOutput);
                _cm.TrySerialize();
            }

            _gpu.LoadModule(_cm);

        }
Exemple #53
0
        public void SetUp()
        {
            _gpu = CudafyHost.CreateDevice(CudafyModes.Target);
            _sparse = GPGPUSPARSE.Create(_gpu);

            _hiMatrixMN = new double[M * N];
            _hiMatrixMN2 = new double[M * N];
            _hoMatrixMN = new double[M * N];
            _hoPerVector = new int[M];
            _hoPerVector2 = new int[N];

            _diPerVector2 = _gpu.Allocate(_hoPerVector2);
            _diMatrixMN = _gpu.Allocate(_hiMatrixMN);
            _diMatrixMN2 = _gpu.Allocate(_hiMatrixMN2);
            _diPerVector = _gpu.Allocate(_hoPerVector);

        }
Exemple #54
0
        public void SetUp()
        {
            _gpu = CudafyHost.GetDevice();

            _sparse = GPGPUSPARSE.Create(_gpu);

            hiMatrixMN = new double[M * N];
            hiMatrixMK = new double[M * K];
            hiMatrixKM = new double[K * M];
            hiMatrixKN = new double[K * N];
            hiMatrixNN = new double[N * N];
            hiVectorXM = new double[M];
            hiVectorXN = new double[N];
            hiVectorYM = new double[M];
            hiVectorYN = new double[N];
            gpuResultM = new double[M];
            gpuResultN = new double[N];
            gpuResultMN = new double[M * N];
        }
        public GpuRenderer()
        {
            var availableOpenCLDevices = CudafyHost.GetDeviceProperties(eGPUType.OpenCL);
            if (availableOpenCLDevices.Any() == false)
            {
                throw new Exception("No OpenCL devices found...");
            }
            var device = availableOpenCLDevices.First();
            Module = CudafyTranslator.Cudafy(eArchitecture.OpenCL12);
            var blockSide =
                Enumerable
                .Range(1, 15)
                .Reverse()
                .First(count => count * count <= device.MaxThreadsPerBlock);
            BlockSize = new dim3(blockSide, blockSide);

            // Initialize gpu and load the module (avoids reloading every time)
            gpu = CudafyHost.GetDevice(eGPUType.OpenCL);
            gpu.LoadModule(Module);
        }
Exemple #56
0
        public void SetUp()
        {
            _gpu = CudafyHost.GetDevice(CudafyModes.Target);
            _blas = GPGPUBLAS.Create(_gpu);

            hiMatrixAMM = new double[M * M];
            hiMatrixANN = new double[N * N];
            hiMatrixAMK = new double[M * K];
            hiMatrixAKM = new double[K * M];
            hiMatrixBMN = new double[M * N];
            hiMatrixBKN = new double[K * N];
            hiMatrixBNK = new double[N * K];
            hiMatrixBMK = new double[M * K];
            hiMatrixBKM = new double[K * M];
            hiMatrixCMN = new double[M * N];
            hiMatrixCKN = new double[K * N];
            hiMatrixCMK = new double[M * K];
            hiMatrixCMM = new double[M * M];
            gpuResultMN = new double[M * N];
            gpuResultMM = new double[M * M];
        }
Exemple #57
0
 public void SetUp()
 {
     _gpu = CudafyHost.GetDevice(CudafyModes.Target);
     _blas = GPGPUBLAS.Create(_gpu);
     Console.Write("BLAS Version={0}", _blas.GetVersion());
     // Initialize CPU Buffer
     hiMatrixA = new double[M * N];
     hiMatrixANN = new double[N * N];
     hiMatrixACBC = new double[(KL + KU + 1) * N];
     hiMatrixASCBC = new double[(K + 1) * N];
     hiMatrixAPS = new double[(N * (N + 1)) / 2];
     hiVectorXM = new double[M];
     hiVectorXN = new double[N];
     hiVectorYM = new double[M];
     hiVectorYN = new double[N];
     gpuResultM = new double[M];
     gpuResultN = new double[N];
     gpuResultMN = new double[M * N];
     gpuResultNN = new double[N * N];
     gpuResultP = new double[(N * (N + 1)) / 2];
 }
        public static void Execute()
        {           
            _gpu = CudafyHost.GetDevice(eGPUType.Cuda);

            CudafyModule km = CudafyTranslator.Cudafy(ePlatform.Auto, _gpu.GetArchitecture(), typeof(TextInsertion));
            Console.WriteLine(km.CompilerOutput);
            _gpu.LoadModule(km);

            int[] data = new int[64];
            int[] data_d = _gpu.CopyToDevice(data);
            int[] res_d = _gpu.Allocate(data);
            int[] res = new int[64];
            _gpu.Launch(1, 1, "AHybridMethod", data_d, res_d);
            _gpu.CopyFromDevice(data_d, res);
            for(int i = 0; i < 64; i++)
                if (data[i] != res[i])
                {
                    Console.WriteLine("Failed");
                    break;
                }
        }
        public static void Execute()
        {
            _gpu = CudafyHost.GetDevice(eGPUType.Cuda);

            CudafyModule km = CudafyTranslator.Cudafy(ePlatform.Auto, _gpu.GetArchitecture(), typeof(SIMDFunctions));
            //CudafyModule km = CudafyTranslator.Cudafy(ePlatform.Auto, eArchitecture.sm_12, typeof(SIMDFunctions));
            _gpu.LoadModule(km);
            int w = 1024;
            int h = 1024;

            for (int loop = 0; loop < 3; loop++)
            {
                uint[] a = new uint[w * h];
                Fill(a);
                uint[] dev_a = _gpu.CopyToDevice(a);
                uint[] b = new uint[w * h];
                Fill(b);
                uint[] dev_b = _gpu.CopyToDevice(b);
                uint[] c = new uint[w * h];
                uint[] dev_c = _gpu.Allocate(c);
                _gpu.StartTimer();
                _gpu.Launch(h, w, "SIMDFunctionTest", dev_a, dev_b, dev_c);
                _gpu.CopyFromDevice(dev_c, c);
                float time = _gpu.StopTimer();
                Console.WriteLine("Time: {0}", time);
                if (loop == 0)
                {
                    bool passed = true;
                    GThread thread = new GThread(1, 1, null);
                    for (int i = 0; i < w * h; i++)
                    {
                        uint exp = thread.vadd2(a[i], b[i]);
                        if (exp != c[i])
                            passed = false;
                    }                    
                    Console.WriteLine("Test {0}", passed ? "passed. " : "failed!");
                }
                _gpu.FreeAll();
            }
        }
Exemple #60
0
        public void SetUp()
        {
            _gpu = CudafyHost.GetDevice();
            _sparse = GPGPUSPARSE.Create(_gpu);
                        
            _hiVectorX = new float[N];
            _hiVectorY = new float[N];
            _hoVectorY = new float[N];

            FillBufferSparse(_hiVectorX, out NNZ);
            FillBuffer(_hiVectorY);

            _hiIndicesX = new int[NNZ];
            _hoValsX = new float[NNZ];
            _hiValsX = new float[NNZ];

            GetSparseIndex(_hiVectorX, _hiValsX, _hiIndicesX);

            _diValsX = _gpu.Allocate(_hiValsX);
            _diIndicesX = _gpu.Allocate(_hiIndicesX);
            _diVectorY = _gpu.Allocate(_hiVectorY);

        }