public static void cudaTranspose(ref MathNet.Numerics.LinearAlgebra.Double.DenseMatrix dm)
        {
            GPGPU gpu = CudafyHost.GetDevice(eGPUType.Cuda);

            GPGPUBLAS blas = GPGPUBLAS.Create(gpu);

            int cols = dm.ColumnCount, rows = dm.RowCount;
            int restRows = rows - cols;

            //double[] a = dm.Storage.ToColumnMajorArray();
            double[] a = dm.SubMatrix(0, cols, 0, cols).Storage.ToColumnMajorArray();
            double[] b = dm.SubMatrix(cols, restRows, 0, cols).Storage.ToColumnMajorArray();
            dm = null;

            double[] a_d = gpu.CopyToDevice <double>(a);
            a = null;
            double[] c_d = gpu.Allocate <double>(cols * cols);
            double[] x_d = gpu.CopyToDevice <double>(new double[] { 1 });
            blas.GEMV(cols, cols, 1, c_d, x_d, 0, x_d, Cudafy.Maths.BLAS.Types.cublasOperation.T);
            a = new double[cols * rows];
            gpu.CopyFromDevice <double>(c_d, 0, a, 0, cols * cols);
            gpu.FreeAll();
            a_d = gpu.CopyToDevice <double>(b);
            b   = null;
            c_d = gpu.Allocate <double>(restRows * cols);
            x_d = gpu.CopyToDevice <double>(new double[] { 1 });
            blas.GEMV(restRows, cols, 1, c_d, x_d, 0, x_d, Cudafy.Maths.BLAS.Types.cublasOperation.T);
            gpu.CopyFromDevice <double>(c_d, 0, a, cols * cols, restRows * cols);
            gpu.FreeAll();
            dm = new MathNet.Numerics.LinearAlgebra.Double.DenseMatrix(cols, rows, a);
        }
Exemple #2
0
        public void Test_BLAS2_GEMV()
        {
            ClearBuffer(hiMatrixA);
            ClearBuffer(hiVectorXM);
            ClearBuffer(hiVectorXN);
            ClearBuffer(hiVectorYM);
            ClearBuffer(hiVectorYN);

            FillBuffer(hiMatrixA);
            FillBuffer(hiVectorXM);
            FillBuffer(hiVectorXN);
            FillBuffer(hiVectorYM);
            FillBuffer(hiVectorYN);

            diMatrixA  = _gpu.CopyToDevice(hiMatrixA);
            diVectorXM = _gpu.CopyToDevice(hiVectorXM);
            diVectorXN = _gpu.CopyToDevice(hiVectorXN);


            // Test without transpose
            diVectorYM = _gpu.CopyToDevice(hiVectorYM);
            _blas.GEMV(M, N, Alpha, diMatrixA, diVectorXN, Beta, diVectorYM);
            _gpu.CopyFromDevice(diVectorYM, gpuResultM);

            for (int i = 0; i < M; i++)
            {
                double cpuResult = 0.0;

                for (int j = 0; j < N; j++)
                {
                    cpuResult += Alpha * hiMatrixA[GetIndexColumnMajor(i, j, M)] * hiVectorXN[j];
                }

                cpuResult += Beta * hiVectorYM[i];

                Assert.AreEqual(cpuResult, gpuResultM[i]);
            }

            // Test with transpose
            diVectorYN = _gpu.CopyToDevice(hiVectorYN);
            _blas.GEMV(M, N, Alpha, diMatrixA, diVectorXM, Beta, diVectorYN, cublasOperation.T);
            _gpu.CopyFromDevice(diVectorYN, gpuResultN);

            for (int j = 0; j < N; j++)
            {
                double cpuResult = 0.0;

                for (int i = 0; i < M; i++)
                {
                    cpuResult += Alpha * hiMatrixA[GetIndexColumnMajor(i, j, M)] * hiVectorXM[i];
                }

                cpuResult += Beta * hiVectorYN[j];

                Assert.AreEqual(cpuResult, gpuResultN[j]);
            }

            _gpu.FreeAll();
        }