public void SetUp() { _gpu = CudafyHost.GetDevice(); _sparse = GPGPUSPARSE.Create(_gpu); _blas = GPGPUBLAS.Create(_gpu); _solver = new Solver(_gpu, _blas, _sparse); }
public static void cudaTranspose(ref MathNet.Numerics.LinearAlgebra.Double.DenseMatrix dm) { GPGPU gpu = CudafyHost.GetDevice(eGPUType.Cuda); GPGPUBLAS blas = GPGPUBLAS.Create(gpu); int cols = dm.ColumnCount, rows = dm.RowCount; int restRows = rows - cols; //double[] a = dm.Storage.ToColumnMajorArray(); double[] a = dm.SubMatrix(0, cols, 0, cols).Storage.ToColumnMajorArray(); double[] b = dm.SubMatrix(cols, restRows, 0, cols).Storage.ToColumnMajorArray(); dm = null; double[] a_d = gpu.CopyToDevice <double>(a); a = null; double[] c_d = gpu.Allocate <double>(cols * cols); double[] x_d = gpu.CopyToDevice <double>(new double[] { 1 }); blas.GEMV(cols, cols, 1, c_d, x_d, 0, x_d, Cudafy.Maths.BLAS.Types.cublasOperation.T); a = new double[cols * rows]; gpu.CopyFromDevice <double>(c_d, 0, a, 0, cols * cols); gpu.FreeAll(); a_d = gpu.CopyToDevice <double>(b); b = null; c_d = gpu.Allocate <double>(restRows * cols); x_d = gpu.CopyToDevice <double>(new double[] { 1 }); blas.GEMV(restRows, cols, 1, c_d, x_d, 0, x_d, Cudafy.Maths.BLAS.Types.cublasOperation.T); gpu.CopyFromDevice <double>(c_d, 0, a, cols * cols, restRows * cols); gpu.FreeAll(); dm = new MathNet.Numerics.LinearAlgebra.Double.DenseMatrix(cols, rows, a); }
public static void MyFirstBlasEmulatorTest() { Console.WriteLine("MyTest()"); // Get GPU device CudafyModes.Target = eGPUType.Emulator; GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target); // Create GPGPUBLAS (CUBLAS Wrapper) using (GPGPUBLAS blas = GPGPUBLAS.Create(gpu)) { const int N = 100; float[] a = new float[N]; float[] b = new float[N]; float[] c = new float[N]; float alpha = -1; float beta = 0; float[] device_a = gpu.CopyToDevice(a); float[] device_b = gpu.CopyToDevice(b); float[] device_c = gpu.CopyToDevice(c); int m = 10; int n = 10; int k = 10; cublasOperation Op = cublasOperation.N; blas.GEMM(m, k, n, alpha, device_a, device_b, beta, device_c, Op); gpu.CopyFromDevice <float>(device_c, c); } }
// // http://stackoverflow.com/questions/18628447/cudafy-throws-an-exception-while-testing // private static void BlasSample(int deviceId) { CudafyModes.Target = eGPUType.Emulator; GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, deviceId); CudafyModes.DeviceId = deviceId; eArchitecture arch = gpu.GetArchitecture(); CudafyModule km = CudafyTranslator.Cudafy(arch); gpu.LoadModule(km); GPGPUBLAS blas = GPGPUBLAS.Create(gpu); const int N = 100; float[] a = new float[N]; float[] b = new float[N]; float[] c = new float[N]; float alpha = -1; float beta = 0; float[] device_a = gpu.CopyToDevice(a); float[] device_b = gpu.CopyToDevice(b); float[] device_c = gpu.CopyToDevice(c); int m = 10; int n = 10; int k = 10; cublasOperation Op = cublasOperation.N; blas.GEMM(m, k, n, alpha, device_a, device_b, beta, device_c, Op); throw new NotImplementedException(); }
public void SetUp() { _gpu = CudafyHost.CreateDevice(CudafyModes.Target); _blas = GPGPUBLAS.Create(_gpu); _hostInput = new float[ciROWS, ciCOLS]; _hostInput2 = new float[ciROWS, ciCOLS]; _hostOutput = new float[ciROWS, ciCOLS]; _devPtr = _gpu.Allocate <float>(_hostInput); _devPtr2 = _gpu.Allocate <float>(_hostOutput); }
public void SetUp() { _gpu = CudafyHost.CreateDevice(CudafyModes.Target); _blas = GPGPUBLAS.Create(_gpu); Console.Write("BLAS Version={0}", _blas.GetVersion()); _hostInput1 = new float[ciN]; _hostInput2 = new float[ciN]; _hostOutput1 = new float[ciN]; _hostOutput2 = new float[ciN]; _devPtr1 = _gpu.Allocate <float>(_hostInput1); _devPtr2 = _gpu.Allocate <float>(_hostOutput1); }
public Solver(GPGPU gpu, GPGPUBLAS blas, GPGPUSPARSE sparse) { this.gpu = gpu; this.blas = blas; this.sparse = sparse; var km = CudafyModule.TryDeserialize(); if (km == null || !km.TryVerifyChecksums()) { km = CudafyTranslator.Cudafy(); km.TrySerialize(); } gpu.LoadModule(km); }
public void SetUp() { _gpu = CudafyHost.GetDevice(CudafyModes.Target); _blas = GPGPUBLAS.Create(_gpu); // Initialize CPU Buffer hiMatrixA = new double[M * N]; hiMatrixANN = new double[N * N]; hiMatrixACBC = new double[(KL + KU + 1) * N]; hiMatrixASCBC = new double[(K + 1) * N]; hiMatrixAPS = new double[(N * (N + 1)) / 2]; hiVectorXM = new double[M]; hiVectorXN = new double[N]; hiVectorYM = new double[M]; hiVectorYN = new double[N]; gpuResultM = new double[M]; gpuResultN = new double[N]; gpuResultMN = new double[M * N]; gpuResultNN = new double[N * N]; gpuResultP = new double[(N * (N + 1)) / 2]; }
public void SetUp() { _gpu = CudafyHost.GetDevice(CudafyModes.Target); _blas = GPGPUBLAS.Create(_gpu); hiMatrixAMM = new double[M * M]; hiMatrixANN = new double[N * N]; hiMatrixAMK = new double[M * K]; hiMatrixAKM = new double[K * M]; hiMatrixBMN = new double[M * N]; hiMatrixBKN = new double[K * N]; hiMatrixBNK = new double[N * K]; hiMatrixBMK = new double[M * K]; hiMatrixBKM = new double[K * M]; hiMatrixCMN = new double[M * N]; hiMatrixCKN = new double[K * N]; hiMatrixCMK = new double[M * K]; hiMatrixCMM = new double[M * M]; gpuResultMN = new double[M * N]; gpuResultMM = new double[M * M]; }
public static void cudaTransposeAndMultiply(ref MathNet.Numerics.LinearAlgebra.Double.DenseMatrix dm) { Cudafy.CudafyModule km = Cudafy.Translator.CudafyTranslator.Cudafy(); km.Serialize(); GPGPU gpu = CudafyHost.GetDevice(eGPUType.Cuda); int cols = dm.ColumnCount, rows = dm.RowCount; dm.Storage.ToColumnMajorArray(); double[] a = dm.ToColumnWiseArray(); dm = new MathNet.Numerics.LinearAlgebra.Double.DenseMatrix(1, 1); double[] dev_a = gpu.Allocate <double>(a.Length); GPGPUBLAS blas = GPGPUBLAS.Create(gpu); double[] a_d = gpu.CopyToDevice <double>(a); double[] c_d = gpu.Allocate <double>(cols * cols); gpu.StartTimer(); blas.GEMM(cols, rows, cols, 1, a_d, a_d, 0, c_d, Cudafy.Maths.BLAS.Types.cublasOperation.T); a = new double[cols * cols]; gpu.CopyFromDevice <double>(c_d, a); gpu.FreeAll(); dm = new MathNet.Numerics.LinearAlgebra.Double.DenseMatrix(cols, cols, a); }
public SharpBLAS(GPGPU gpu) { Gpu = gpu; Blas = GPGPUBLAS.Create(gpu); Sparse = GPGPUSPARSE.Create(gpu); }