示例#1
0
文件: BLAS3.cs 项目: rblenis/cudafy
        public void Test_BLAS3_TRMM()
        {
            // Lower fill mode, Side left, No transpose
            ClearBuffer(hiMatrixAMM);
            ClearBuffer(hiMatrixBMN);

            CreateBandedMatrix(hiMatrixAMM, M, M, M - 1, 0);
            FillBuffer(hiMatrixBMN);
            FillBuffer(hiMatrixCMN);

            diMatrixA = _gpu.CopyToDevice(hiMatrixAMM);
            diMatrixB = _gpu.CopyToDevice(hiMatrixBMN);
            diMatrixC = _gpu.Allocate(hiMatrixCMN);

            _blas.TRMM(M, N, Alpha, diMatrixA, diMatrixB, diMatrixC);

            _gpu.CopyFromDevice(diMatrixC, gpuResultMN);

            for (int i = 0; i < M; i++)
            {
                for (int j = i; j < N; j++)
                {
                    double cpuResult = 0.0;

                    for (int k = 0; k < M; k++)
                    {
                        cpuResult += Alpha * hiMatrixAMM[GetIndexColumnMajor(i, k, M)] * hiMatrixBMN[GetIndexColumnMajor(k, j, M)];
                    }

                    Assert.AreEqual(cpuResult, gpuResultMN[GetIndexColumnMajor(i, j, M)]);
                }
            }

            _gpu.FreeAll();

            // Lower fill mode, Side left, Transpose
            ClearBuffer(hiMatrixAMM);
            ClearBuffer(hiMatrixBMN);

            CreateBandedMatrix(hiMatrixAMM, M, M, M - 1, 0);
            FillBuffer(hiMatrixBMN);
            FillBuffer(hiMatrixCMN);

            diMatrixA = _gpu.CopyToDevice(hiMatrixAMM);
            diMatrixB = _gpu.CopyToDevice(hiMatrixBMN);
            diMatrixC = _gpu.Allocate(hiMatrixCMN);

            _blas.TRMM(M, N, Alpha, diMatrixA, diMatrixB, diMatrixC, cublasSideMode.Left, cublasOperation.T);

            _gpu.CopyFromDevice(diMatrixC, gpuResultMN);

            for (int i = 0; i < M; i++)
            {
                for (int j = i; j < N; j++)
                {
                    double cpuResult = 0.0;

                    for (int k = 0; k < M; k++)
                    {
                        cpuResult += Alpha * hiMatrixAMM[GetIndexColumnMajor(k, i, M)] * hiMatrixBMN[GetIndexColumnMajor(k, j, M)];
                    }

                    Assert.AreEqual(cpuResult, gpuResultMN[GetIndexColumnMajor(i, j, M)]);
                }
            }

            _gpu.FreeAll();

            // Lower fill mode, Side right, No transpose
            ClearBuffer(hiMatrixANN);
            ClearBuffer(hiMatrixBMN);

            CreateBandedMatrix(hiMatrixANN, N, N, N - 1, 0);
            FillBuffer(hiMatrixBMN);
            FillBuffer(hiMatrixCMN);

            diMatrixA = _gpu.CopyToDevice(hiMatrixANN);
            diMatrixB = _gpu.CopyToDevice(hiMatrixBMN);
            diMatrixC = _gpu.Allocate(hiMatrixCMN);

            _blas.TRMM(M, N, Alpha, diMatrixA, diMatrixB, diMatrixC, cublasSideMode.Right);

            _gpu.CopyFromDevice(diMatrixC, gpuResultMN);

            for (int i = 0; i < M; i++)
            {
                for (int j = i; j < N; j++)
                {
                    double cpuResult = 0.0;

                    for (int k = 0; k < N; k++)
                    {
                        cpuResult += Alpha * hiMatrixBMN[GetIndexColumnMajor(i, k, M)] * hiMatrixANN[GetIndexColumnMajor(k, j, N)];
                    }

                    Assert.AreEqual(cpuResult, gpuResultMN[GetIndexColumnMajor(i, j, M)]);
                }
            }

            _gpu.FreeAll();

            // Lower fill mode, Side right, Transpose
            ClearBuffer(hiMatrixANN);
            ClearBuffer(hiMatrixBMN);

            CreateBandedMatrix(hiMatrixANN, N, N, N - 1, 0);
            FillBuffer(hiMatrixBMN);
            FillBuffer(hiMatrixCMN);

            diMatrixA = _gpu.CopyToDevice(hiMatrixANN);
            diMatrixB = _gpu.CopyToDevice(hiMatrixBMN);
            diMatrixC = _gpu.Allocate(hiMatrixCMN);

            _blas.TRMM(M, N, Alpha, diMatrixA, diMatrixB, diMatrixC, cublasSideMode.Right, cublasOperation.T);

            _gpu.CopyFromDevice(diMatrixC, gpuResultMN);

            for (int i = 0; i < M; i++)
            {
                for (int j = i; j < N; j++)
                {
                    double cpuResult = 0.0;

                    for (int k = 0; k < N; k++)
                    {
                        cpuResult += Alpha * hiMatrixBMN[GetIndexColumnMajor(i, k, M)] * hiMatrixANN[GetIndexColumnMajor(j, k, N)];
                    }

                    Assert.AreEqual(cpuResult, gpuResultMN[GetIndexColumnMajor(i, j, M)]);
                }
            }

            _gpu.FreeAll();

            // Upper fill mode, Side left, No transpose
            ClearBuffer(hiMatrixAMM);
            ClearBuffer(hiMatrixBMN);

            CreateBandedMatrix(hiMatrixAMM, M, M, 0, M - 1);
            FillBuffer(hiMatrixBMN);
            FillBuffer(hiMatrixCMN);

            diMatrixA = _gpu.CopyToDevice(hiMatrixAMM);
            diMatrixB = _gpu.CopyToDevice(hiMatrixBMN);
            diMatrixC = _gpu.Allocate(hiMatrixCMN);

            _blas.TRMM(M, N, Alpha, diMatrixA, diMatrixB, diMatrixC, cublasSideMode.Left, cublasOperation.N, cublasFillMode.Upper);

            _gpu.CopyFromDevice(diMatrixC, gpuResultMN);

            for (int i = 0; i < M; i++)
            {
                for (int j = i; j < N; j++)
                {
                    double cpuResult = 0.0;

                    for (int k = 0; k < M; k++)
                    {
                        cpuResult += Alpha * hiMatrixAMM[GetIndexColumnMajor(i, k, M)] * hiMatrixBMN[GetIndexColumnMajor(k, j, M)];
                    }

                    Assert.AreEqual(cpuResult, gpuResultMN[GetIndexColumnMajor(i, j, M)]);
                }
            }

            _gpu.FreeAll();

            // Upper fill mode, Side left, Transpose
            ClearBuffer(hiMatrixAMM);
            ClearBuffer(hiMatrixBMN);

            CreateBandedMatrix(hiMatrixAMM, M, M, 0, M - 1);
            FillBuffer(hiMatrixBMN);
            FillBuffer(hiMatrixCMN);

            diMatrixA = _gpu.CopyToDevice(hiMatrixAMM);
            diMatrixB = _gpu.CopyToDevice(hiMatrixBMN);
            diMatrixC = _gpu.Allocate(hiMatrixCMN);

            _blas.TRMM(M, N, Alpha, diMatrixA, diMatrixB, diMatrixC, cublasSideMode.Left, cublasOperation.T, cublasFillMode.Upper);

            _gpu.CopyFromDevice(diMatrixC, gpuResultMN);

            for (int i = 0; i < M; i++)
            {
                for (int j = i; j < N; j++)
                {
                    double cpuResult = 0.0;

                    for (int k = 0; k < M; k++)
                    {
                        cpuResult += Alpha * hiMatrixAMM[GetIndexColumnMajor(k, i, M)] * hiMatrixBMN[GetIndexColumnMajor(k, j, M)];
                    }

                    Assert.AreEqual(cpuResult, gpuResultMN[GetIndexColumnMajor(i, j, M)]);
                }
            }

            _gpu.FreeAll();

            // Upper fill mode, Side right, No transpose
            ClearBuffer(hiMatrixANN);
            ClearBuffer(hiMatrixBMN);

            CreateBandedMatrix(hiMatrixANN, N, N, 0, N - 1);
            FillBuffer(hiMatrixBMN);
            FillBuffer(hiMatrixCMN);

            diMatrixA = _gpu.CopyToDevice(hiMatrixANN);
            diMatrixB = _gpu.CopyToDevice(hiMatrixBMN);
            diMatrixC = _gpu.Allocate(hiMatrixCMN);

            _blas.TRMM(M, N, Alpha, diMatrixA, diMatrixB, diMatrixC, cublasSideMode.Right, cublasOperation.N, cublasFillMode.Upper);

            _gpu.CopyFromDevice(diMatrixC, gpuResultMN);

            for (int i = 0; i < M; i++)
            {
                for (int j = i; j < N; j++)
                {
                    double cpuResult = 0.0;

                    for (int k = 0; k < N; k++)
                    {
                        cpuResult += Alpha * hiMatrixBMN[GetIndexColumnMajor(i, k, M)] * hiMatrixANN[GetIndexColumnMajor(k, j, N)];
                    }

                    Assert.AreEqual(cpuResult, gpuResultMN[GetIndexColumnMajor(i, j, M)]);
                }
            }

            _gpu.FreeAll();

            // Upper fill mode, Side right, Transpose
            ClearBuffer(hiMatrixANN);
            ClearBuffer(hiMatrixBMN);

            CreateBandedMatrix(hiMatrixANN, N, N, 0, N - 1);
            FillBuffer(hiMatrixBMN);
            FillBuffer(hiMatrixCMN);

            diMatrixA = _gpu.CopyToDevice(hiMatrixANN);
            diMatrixB = _gpu.CopyToDevice(hiMatrixBMN);
            diMatrixC = _gpu.Allocate(hiMatrixCMN);

            _blas.TRMM(M, N, Alpha, diMatrixA, diMatrixB, diMatrixC, cublasSideMode.Right, cublasOperation.T, cublasFillMode.Upper);

            _gpu.CopyFromDevice(diMatrixC, gpuResultMN);

            for (int i = 0; i < M; i++)
            {
                for (int j = i; j < N; j++)
                {
                    double cpuResult = 0.0;

                    for (int k = 0; k < N; k++)
                    {
                        cpuResult += Alpha * hiMatrixBMN[GetIndexColumnMajor(i, k, M)] * hiMatrixANN[GetIndexColumnMajor(j, k, N)];
                    }

                    Assert.AreEqual(cpuResult, gpuResultMN[GetIndexColumnMajor(i, j, M)]);
                }
            }

            _gpu.FreeAll();
        }