예제 #1
0
 public void SetUp()
 {
     _gpu    = CudafyHost.GetDevice();
     _sparse = GPGPUSPARSE.Create(_gpu);
     _blas   = GPGPUBLAS.Create(_gpu);
     _solver = new Solver(_gpu, _blas, _sparse);
 }
예제 #2
0
        public static void cudaTranspose(ref MathNet.Numerics.LinearAlgebra.Double.DenseMatrix dm)
        {
            GPGPU gpu = CudafyHost.GetDevice(eGPUType.Cuda);

            GPGPUBLAS blas = GPGPUBLAS.Create(gpu);

            int cols = dm.ColumnCount, rows = dm.RowCount;
            int restRows = rows - cols;

            //double[] a = dm.Storage.ToColumnMajorArray();
            double[] a = dm.SubMatrix(0, cols, 0, cols).Storage.ToColumnMajorArray();
            double[] b = dm.SubMatrix(cols, restRows, 0, cols).Storage.ToColumnMajorArray();
            dm = null;

            double[] a_d = gpu.CopyToDevice <double>(a);
            a = null;
            double[] c_d = gpu.Allocate <double>(cols * cols);
            double[] x_d = gpu.CopyToDevice <double>(new double[] { 1 });
            blas.GEMV(cols, cols, 1, c_d, x_d, 0, x_d, Cudafy.Maths.BLAS.Types.cublasOperation.T);
            a = new double[cols * rows];
            gpu.CopyFromDevice <double>(c_d, 0, a, 0, cols * cols);
            gpu.FreeAll();
            a_d = gpu.CopyToDevice <double>(b);
            b   = null;
            c_d = gpu.Allocate <double>(restRows * cols);
            x_d = gpu.CopyToDevice <double>(new double[] { 1 });
            blas.GEMV(restRows, cols, 1, c_d, x_d, 0, x_d, Cudafy.Maths.BLAS.Types.cublasOperation.T);
            gpu.CopyFromDevice <double>(c_d, 0, a, cols * cols, restRows * cols);
            gpu.FreeAll();
            dm = new MathNet.Numerics.LinearAlgebra.Double.DenseMatrix(cols, rows, a);
        }
예제 #3
0
        public static void MyFirstBlasEmulatorTest()
        {
            Console.WriteLine("MyTest()");
            // Get GPU device
            CudafyModes.Target = eGPUType.Emulator;
            GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target);

            // Create GPGPUBLAS (CUBLAS Wrapper)
            using (GPGPUBLAS blas = GPGPUBLAS.Create(gpu))
            {
                const int N     = 100;
                float[]   a     = new float[N];
                float[]   b     = new float[N];
                float[]   c     = new float[N];
                float     alpha = -1;
                float     beta  = 0;

                float[] device_a = gpu.CopyToDevice(a);
                float[] device_b = gpu.CopyToDevice(b);
                float[] device_c = gpu.CopyToDevice(c);

                int             m  = 10;
                int             n  = 10;
                int             k  = 10;
                cublasOperation Op = cublasOperation.N;
                blas.GEMM(m, k, n, alpha, device_a, device_b, beta, device_c, Op);

                gpu.CopyFromDevice <float>(device_c, c);
            }
        }
예제 #4
0
        //
        // http://stackoverflow.com/questions/18628447/cudafy-throws-an-exception-while-testing
        //
        private static void BlasSample(int deviceId)
        {
            CudafyModes.Target = eGPUType.Emulator;
            GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, deviceId);

            CudafyModes.DeviceId = deviceId;
            eArchitecture arch = gpu.GetArchitecture();
            CudafyModule  km   = CudafyTranslator.Cudafy(arch);

            gpu.LoadModule(km);

            GPGPUBLAS blas = GPGPUBLAS.Create(gpu);

            const int N = 100;

            float[] a     = new float[N];
            float[] b     = new float[N];
            float[] c     = new float[N];
            float   alpha = -1;
            float   beta  = 0;

            float[] device_a = gpu.CopyToDevice(a);
            float[] device_b = gpu.CopyToDevice(b);
            float[] device_c = gpu.CopyToDevice(c);

            int             m  = 10;
            int             n  = 10;
            int             k  = 10;
            cublasOperation Op = cublasOperation.N;

            blas.GEMM(m, k, n, alpha, device_a, device_b, beta, device_c, Op);

            throw new NotImplementedException();
        }
예제 #5
0
 public void SetUp()
 {
     _gpu        = CudafyHost.CreateDevice(CudafyModes.Target);
     _blas       = GPGPUBLAS.Create(_gpu);
     _hostInput  = new float[ciROWS, ciCOLS];
     _hostInput2 = new float[ciROWS, ciCOLS];
     _hostOutput = new float[ciROWS, ciCOLS];
     _devPtr     = _gpu.Allocate <float>(_hostInput);
     _devPtr2    = _gpu.Allocate <float>(_hostOutput);
 }
예제 #6
0
 public void SetUp()
 {
     _gpu  = CudafyHost.CreateDevice(CudafyModes.Target);
     _blas = GPGPUBLAS.Create(_gpu);
     Console.Write("BLAS Version={0}", _blas.GetVersion());
     _hostInput1  = new float[ciN];
     _hostInput2  = new float[ciN];
     _hostOutput1 = new float[ciN];
     _hostOutput2 = new float[ciN];
     _devPtr1     = _gpu.Allocate <float>(_hostInput1);
     _devPtr2     = _gpu.Allocate <float>(_hostOutput1);
 }
예제 #7
0
        public Solver(GPGPU gpu, GPGPUBLAS blas, GPGPUSPARSE sparse)
        {
            this.gpu    = gpu;
            this.blas   = blas;
            this.sparse = sparse;

            var km = CudafyModule.TryDeserialize();

            if (km == null || !km.TryVerifyChecksums())
            {
                km = CudafyTranslator.Cudafy();
                km.TrySerialize();
            }

            gpu.LoadModule(km);
        }
예제 #8
0
        public void SetUp()
        {
            _gpu  = CudafyHost.GetDevice(CudafyModes.Target);
            _blas = GPGPUBLAS.Create(_gpu);

            // Initialize CPU Buffer
            hiMatrixA     = new double[M * N];
            hiMatrixANN   = new double[N * N];
            hiMatrixACBC  = new double[(KL + KU + 1) * N];
            hiMatrixASCBC = new double[(K + 1) * N];
            hiMatrixAPS   = new double[(N * (N + 1)) / 2];
            hiVectorXM    = new double[M];
            hiVectorXN    = new double[N];
            hiVectorYM    = new double[M];
            hiVectorYN    = new double[N];
            gpuResultM    = new double[M];
            gpuResultN    = new double[N];
            gpuResultMN   = new double[M * N];
            gpuResultNN   = new double[N * N];
            gpuResultP    = new double[(N * (N + 1)) / 2];
        }
예제 #9
0
파일: BLAS3.cs 프로젝트: rblenis/cudafy
        public void SetUp()
        {
            _gpu  = CudafyHost.GetDevice(CudafyModes.Target);
            _blas = GPGPUBLAS.Create(_gpu);

            hiMatrixAMM = new double[M * M];
            hiMatrixANN = new double[N * N];
            hiMatrixAMK = new double[M * K];
            hiMatrixAKM = new double[K * M];
            hiMatrixBMN = new double[M * N];
            hiMatrixBKN = new double[K * N];
            hiMatrixBNK = new double[N * K];
            hiMatrixBMK = new double[M * K];
            hiMatrixBKM = new double[K * M];
            hiMatrixCMN = new double[M * N];
            hiMatrixCKN = new double[K * N];
            hiMatrixCMK = new double[M * K];
            hiMatrixCMM = new double[M * M];
            gpuResultMN = new double[M * N];
            gpuResultMM = new double[M * M];
        }
예제 #10
0
        public static void cudaTransposeAndMultiply(ref MathNet.Numerics.LinearAlgebra.Double.DenseMatrix dm)
        {
            Cudafy.CudafyModule km = Cudafy.Translator.CudafyTranslator.Cudafy();
            km.Serialize();
            GPGPU gpu = CudafyHost.GetDevice(eGPUType.Cuda);
            int   cols = dm.ColumnCount, rows = dm.RowCount;

            dm.Storage.ToColumnMajorArray();
            double[] a = dm.ToColumnWiseArray();
            dm = new MathNet.Numerics.LinearAlgebra.Double.DenseMatrix(1, 1);
            double[]  dev_a = gpu.Allocate <double>(a.Length);
            GPGPUBLAS blas  = GPGPUBLAS.Create(gpu);

            double[] a_d = gpu.CopyToDevice <double>(a);
            double[] c_d = gpu.Allocate <double>(cols * cols);
            gpu.StartTimer();
            blas.GEMM(cols, rows, cols, 1, a_d, a_d, 0, c_d, Cudafy.Maths.BLAS.Types.cublasOperation.T);
            a = new double[cols * cols];
            gpu.CopyFromDevice <double>(c_d, a);
            gpu.FreeAll();
            dm = new MathNet.Numerics.LinearAlgebra.Double.DenseMatrix(cols, cols, a);
        }
예제 #11
0
 public SharpBLAS(GPGPU gpu)
 {
     Gpu    = gpu;
     Blas   = GPGPUBLAS.Create(gpu);
     Sparse = GPGPUSPARSE.Create(gpu);
 }