示例#1
0
文件: BLAS3.cs 项目: rblenis/cudafy
        public void Test_BLAS3_SYR2K()
        {
            // Lower fill mode, No transpose
            ClearBuffer(hiMatrixAMK);
            ClearBuffer(hiMatrixBMK);
            ClearBuffer(hiMatrixCMM);

            FillBuffer(hiMatrixAMK);
            FillBuffer(hiMatrixBMK);
            FillBuffer(hiMatrixCMM);
            ConverToSymmetric(hiMatrixCMM, M);

            diMatrixA = _gpu.CopyToDevice(hiMatrixAMK);
            diMatrixB = _gpu.CopyToDevice(hiMatrixBMK);
            diMatrixC = _gpu.CopyToDevice(hiMatrixCMM);

            _blas.SYR2K(M, K, Alpha, diMatrixA, diMatrixB, Beta, diMatrixC);

            _gpu.CopyFromDevice(diMatrixC, gpuResultMM);

            for (int i = 0; i < M; i++)
            {
                for (int j = 0; j <= i; j++)
                {
                    double cpuResult = 0.0;

                    for (int k = 0; k < K; k++)
                    {
                        cpuResult += Alpha * (hiMatrixAMK[GetIndexColumnMajor(i, k, M)] * hiMatrixBMK[GetIndexColumnMajor(j, k, M)] +
                                              hiMatrixBMK[GetIndexColumnMajor(i, k, M)] * hiMatrixAMK[GetIndexColumnMajor(j, k, M)]);
                    }

                    cpuResult += Beta * hiMatrixCMM[GetIndexColumnMajor(i, j, M)];

                    Assert.AreEqual(cpuResult, gpuResultMM[GetIndexColumnMajor(i, j, M)]);
                }
            }

            _gpu.FreeAll();

            // Lower fill mode, With Transpose
            ClearBuffer(hiMatrixAKM);
            ClearBuffer(hiMatrixBKM);
            ClearBuffer(hiMatrixCMM);

            FillBuffer(hiMatrixAKM);
            FillBuffer(hiMatrixBKM);
            FillBuffer(hiMatrixCMM);
            ConverToSymmetric(hiMatrixCMM, M);

            diMatrixA = _gpu.CopyToDevice(hiMatrixAKM);
            diMatrixB = _gpu.CopyToDevice(hiMatrixBKM);
            diMatrixC = _gpu.CopyToDevice(hiMatrixCMM);

            _blas.SYR2K(M, K, Alpha, diMatrixA, diMatrixB, Beta, diMatrixC, cublasOperation.T);

            _gpu.CopyFromDevice(diMatrixC, gpuResultMM);

            for (int i = 0; i < M; i++)
            {
                for (int j = 0; j <= i; j++)
                {
                    double cpuResult = 0.0;

                    for (int k = 0; k < K; k++)
                    {
                        cpuResult += Alpha * (hiMatrixAKM[GetIndexColumnMajor(k, i, K)] * hiMatrixBKM[GetIndexColumnMajor(k, j, K)] +
                                              hiMatrixBKM[GetIndexColumnMajor(k, i, K)] * hiMatrixAKM[GetIndexColumnMajor(k, j, K)]);
                    }

                    cpuResult += Beta * hiMatrixCMM[GetIndexColumnMajor(i, j, M)];

                    Assert.AreEqual(cpuResult, gpuResultMM[GetIndexColumnMajor(i, j, M)]);
                }
            }

            _gpu.FreeAll();

            // Upper fill mode, No transpose
            ClearBuffer(hiMatrixAMK);
            ClearBuffer(hiMatrixBMK);
            ClearBuffer(hiMatrixCMM);

            FillBuffer(hiMatrixAMK);
            FillBuffer(hiMatrixBMK);
            FillBuffer(hiMatrixCMM);
            ConverToSymmetric(hiMatrixCMM, M);

            diMatrixA = _gpu.CopyToDevice(hiMatrixAMK);
            diMatrixB = _gpu.CopyToDevice(hiMatrixBMK);
            diMatrixC = _gpu.CopyToDevice(hiMatrixCMM);

            _blas.SYR2K(M, K, Alpha, diMatrixA, diMatrixB, Beta, diMatrixC, cublasOperation.N, cublasFillMode.Upper);

            _gpu.CopyFromDevice(diMatrixC, gpuResultMM);

            for (int i = 0; i < M; i++)
            {
                for (int j = i; j < M; j++)
                {
                    double cpuResult = 0.0;

                    for (int k = 0; k < K; k++)
                    {
                        cpuResult += Alpha * (hiMatrixAMK[GetIndexColumnMajor(i, k, M)] * hiMatrixBMK[GetIndexColumnMajor(j, k, M)] +
                                              hiMatrixBMK[GetIndexColumnMajor(i, k, M)] * hiMatrixAMK[GetIndexColumnMajor(j, k, M)]);
                    }

                    cpuResult += Beta * hiMatrixCMM[GetIndexColumnMajor(i, j, M)];

                    Assert.AreEqual(cpuResult, gpuResultMM[GetIndexColumnMajor(i, j, M)]);
                }
            }

            _gpu.FreeAll();

            // Lower fill mode, With Transpose
            ClearBuffer(hiMatrixAKM);
            ClearBuffer(hiMatrixBKM);
            ClearBuffer(hiMatrixCMM);

            FillBuffer(hiMatrixAKM);
            FillBuffer(hiMatrixBKM);
            FillBuffer(hiMatrixCMM);
            ConverToSymmetric(hiMatrixCMM, M);

            diMatrixA = _gpu.CopyToDevice(hiMatrixAKM);
            diMatrixB = _gpu.CopyToDevice(hiMatrixBKM);
            diMatrixC = _gpu.CopyToDevice(hiMatrixCMM);

            _blas.SYR2K(M, K, Alpha, diMatrixA, diMatrixB, Beta, diMatrixC, cublasOperation.T, cublasFillMode.Upper);

            _gpu.CopyFromDevice(diMatrixC, gpuResultMM);

            for (int i = 0; i < M; i++)
            {
                for (int j = i; j < M; j++)
                {
                    double cpuResult = 0.0;

                    for (int k = 0; k < K; k++)
                    {
                        cpuResult += Alpha * (hiMatrixAKM[GetIndexColumnMajor(k, i, K)] * hiMatrixBKM[GetIndexColumnMajor(k, j, K)] +
                                              hiMatrixBKM[GetIndexColumnMajor(k, i, K)] * hiMatrixAKM[GetIndexColumnMajor(k, j, K)]);
                    }

                    cpuResult += Beta * hiMatrixCMM[GetIndexColumnMajor(i, j, M)];

                    Assert.AreEqual(cpuResult, gpuResultMM[GetIndexColumnMajor(i, j, M)]);
                }
            }

            _gpu.FreeAll();
        }