예제 #1
0
        public static void MyFirstBlasEmulatorTest()
        {
            Console.WriteLine("MyTest()");
            // Get GPU device
            CudafyModes.Target = eGPUType.Emulator;
            GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target);

            // Create GPGPUBLAS (CUBLAS Wrapper)
            using (GPGPUBLAS blas = GPGPUBLAS.Create(gpu))
            {
                const int N     = 100;
                float[]   a     = new float[N];
                float[]   b     = new float[N];
                float[]   c     = new float[N];
                float     alpha = -1;
                float     beta  = 0;

                float[] device_a = gpu.CopyToDevice(a);
                float[] device_b = gpu.CopyToDevice(b);
                float[] device_c = gpu.CopyToDevice(c);

                int             m  = 10;
                int             n  = 10;
                int             k  = 10;
                cublasOperation Op = cublasOperation.N;
                blas.GEMM(m, k, n, alpha, device_a, device_b, beta, device_c, Op);

                gpu.CopyFromDevice <float>(device_c, c);
            }
        }
예제 #2
0
        //
        // http://stackoverflow.com/questions/18628447/cudafy-throws-an-exception-while-testing
        //
        private static void BlasSample(int deviceId)
        {
            CudafyModes.Target = eGPUType.Emulator;
            GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, deviceId);

            CudafyModes.DeviceId = deviceId;
            eArchitecture arch = gpu.GetArchitecture();
            CudafyModule  km   = CudafyTranslator.Cudafy(arch);

            gpu.LoadModule(km);

            GPGPUBLAS blas = GPGPUBLAS.Create(gpu);

            const int N = 100;

            float[] a     = new float[N];
            float[] b     = new float[N];
            float[] c     = new float[N];
            float   alpha = -1;
            float   beta  = 0;

            float[] device_a = gpu.CopyToDevice(a);
            float[] device_b = gpu.CopyToDevice(b);
            float[] device_c = gpu.CopyToDevice(c);

            int             m  = 10;
            int             n  = 10;
            int             k  = 10;
            cublasOperation Op = cublasOperation.N;

            blas.GEMM(m, k, n, alpha, device_a, device_b, beta, device_c, Op);

            throw new NotImplementedException();
        }
예제 #3
0
        //http://peterwittek.com/2013/06/cublas-matrix-c-style/
        //row major to col major "trick"
        public void GemmRowMajor(CpuGpuArray A, CpuGpuArray B, CpuGpuArray C, float cMultiplier = 0f, bool transposeA = false, bool transposeB = false)
        {
            var blasA = B;
            var blasB = A;
            // M = rowcount A(T),C
            // N = colcount B(T),C
            // K = colcount A(T), rowcount B(T)
            // However, we flip so
            // M = colcount B(T),C
            // N = rowcoun A(T), C
            // K = cols A(T), rows B
            var m = blasA.ColCount;
            var n = blasB.RowCount;
            var k = blasB.ColCount;

            var lda = blasA.ColCount;
            var ldb = blasB.ColCount;
            var ldc = blasA.ColCount;

            var transb = cublasOperation.N;

            if (transposeA)
            {
                transb = cublasOperation.T;
                n      = blasB.ColCount;
                k      = blasB.RowCount;
            }

            var transa = cublasOperation.N;

            if (transposeB)
            {
                transa = cublasOperation.T;
                m      = blasA.RowCount;
                ldc    = blasA.RowCount;
            }

            Blas.GEMM(m, k, n, 1f, B.GPUArray, A.GPUArray, cMultiplier, C.GPUArray, lda: lda, ldb: ldb, ldc: ldc, transb: transb, transa: transa);
        }
예제 #4
0
        public static void cudaTransposeAndMultiply(ref MathNet.Numerics.LinearAlgebra.Double.DenseMatrix dm)
        {
            Cudafy.CudafyModule km = Cudafy.Translator.CudafyTranslator.Cudafy();
            km.Serialize();
            GPGPU gpu = CudafyHost.GetDevice(eGPUType.Cuda);
            int   cols = dm.ColumnCount, rows = dm.RowCount;

            dm.Storage.ToColumnMajorArray();
            double[] a = dm.ToColumnWiseArray();
            dm = new MathNet.Numerics.LinearAlgebra.Double.DenseMatrix(1, 1);
            double[]  dev_a = gpu.Allocate <double>(a.Length);
            GPGPUBLAS blas  = GPGPUBLAS.Create(gpu);

            double[] a_d = gpu.CopyToDevice <double>(a);
            double[] c_d = gpu.Allocate <double>(cols * cols);
            gpu.StartTimer();
            blas.GEMM(cols, rows, cols, 1, a_d, a_d, 0, c_d, Cudafy.Maths.BLAS.Types.cublasOperation.T);
            a = new double[cols * cols];
            gpu.CopyFromDevice <double>(c_d, a);
            gpu.FreeAll();
            dm = new MathNet.Numerics.LinearAlgebra.Double.DenseMatrix(cols, cols, a);
        }
예제 #5
0
파일: BLAS3.cs 프로젝트: rblenis/cudafy
        public void Test_BLAS3_GEMM()
        {
            // A : No transpose, B : No transpose
            ClearBuffer(hiMatrixAMK);
            ClearBuffer(hiMatrixBKN);
            ClearBuffer(hiMatrixCMN);

            FillBuffer(hiMatrixAMK);
            FillBuffer(hiMatrixBKN);
            FillBuffer(hiMatrixCMN);

            diMatrixA = _gpu.CopyToDevice(hiMatrixAMK);
            diMatrixB = _gpu.CopyToDevice(hiMatrixBKN);
            diMatrixC = _gpu.CopyToDevice(hiMatrixCMN);

            _blas.GEMM(M, K, N, Alpha, diMatrixA, diMatrixB, Beta, diMatrixC);

            _gpu.CopyFromDevice(diMatrixC, gpuResultMN);

            for (int i = 0; i < M; i++)
            {
                for (int j = 0; j < N; j++)
                {
                    double cpuResult = 0.0;

                    for (int k = 0; k < K; k++)
                    {
                        cpuResult += Alpha * hiMatrixAMK[GetIndexColumnMajor(i, k, M)] * hiMatrixBKN[GetIndexColumnMajor(k, j, K)];
                    }

                    cpuResult += Beta * hiMatrixCMN[GetIndexColumnMajor(i, j, M)];

                    Assert.AreEqual(cpuResult, gpuResultMN[GetIndexColumnMajor(i, j, M)]);
                }
            }

            _gpu.FreeAll();

            // A : Transpose, B : No transpose
            ClearBuffer(hiMatrixAKM);
            ClearBuffer(hiMatrixBKN);
            ClearBuffer(hiMatrixCMN);

            FillBuffer(hiMatrixAKM);
            FillBuffer(hiMatrixBKN);
            FillBuffer(hiMatrixCMN);

            diMatrixA = _gpu.CopyToDevice(hiMatrixAKM);
            diMatrixB = _gpu.CopyToDevice(hiMatrixBKN);
            diMatrixC = _gpu.CopyToDevice(hiMatrixCMN);

            _blas.GEMM(M, K, N, Alpha, diMatrixA, diMatrixB, Beta, diMatrixC, cublasOperation.T);

            _gpu.CopyFromDevice(diMatrixC, gpuResultMN);

            for (int i = 0; i < M; i++)
            {
                for (int j = 0; j < N; j++)
                {
                    double cpuResult = 0.0;

                    for (int k = 0; k < K; k++)
                    {
                        cpuResult += Alpha * hiMatrixAKM[GetIndexColumnMajor(k, i, K)] * hiMatrixBKN[GetIndexColumnMajor(k, j, K)];
                    }

                    cpuResult += Beta * hiMatrixCMN[GetIndexColumnMajor(i, j, M)];

                    Assert.AreEqual(cpuResult, gpuResultMN[GetIndexColumnMajor(i, j, M)]);
                }
            }

            _gpu.FreeAll();

            // A : No transpose, B : Transpose
            ClearBuffer(hiMatrixAMK);
            ClearBuffer(hiMatrixBNK);
            ClearBuffer(hiMatrixCMN);

            FillBuffer(hiMatrixAMK);
            FillBuffer(hiMatrixBNK);
            FillBuffer(hiMatrixCMN);

            diMatrixA = _gpu.CopyToDevice(hiMatrixAMK);
            diMatrixB = _gpu.CopyToDevice(hiMatrixBNK);
            diMatrixC = _gpu.CopyToDevice(hiMatrixCMN);

            _blas.GEMM(M, K, N, Alpha, diMatrixA, diMatrixB, Beta, diMatrixC, cublasOperation.N, cublasOperation.T);

            _gpu.CopyFromDevice(diMatrixC, gpuResultMN);

            for (int i = 0; i < M; i++)
            {
                for (int j = 0; j < N; j++)
                {
                    double cpuResult = 0.0;

                    for (int k = 0; k < K; k++)
                    {
                        cpuResult += Alpha * hiMatrixAMK[GetIndexColumnMajor(i, k, M)] * hiMatrixBNK[GetIndexColumnMajor(j, k, N)];
                    }

                    cpuResult += Beta * hiMatrixCMN[GetIndexColumnMajor(i, j, M)];

                    Assert.AreEqual(cpuResult, gpuResultMN[GetIndexColumnMajor(i, j, M)]);
                }
            }

            _gpu.FreeAll();

            // A : Transpose, B : Transpose
            ClearBuffer(hiMatrixAKM);
            ClearBuffer(hiMatrixBNK);
            ClearBuffer(hiMatrixCMN);

            FillBuffer(hiMatrixAKM);
            FillBuffer(hiMatrixBNK);
            FillBuffer(hiMatrixCMN);

            diMatrixA = _gpu.CopyToDevice(hiMatrixAKM);
            diMatrixB = _gpu.CopyToDevice(hiMatrixBNK);
            diMatrixC = _gpu.CopyToDevice(hiMatrixCMN);

            _blas.GEMM(M, K, N, Alpha, diMatrixA, diMatrixB, Beta, diMatrixC, cublasOperation.T, cublasOperation.T);

            _gpu.CopyFromDevice(diMatrixC, gpuResultMN);

            for (int i = 0; i < M; i++)
            {
                for (int j = 0; j < N; j++)
                {
                    double cpuResult = 0.0;

                    for (int k = 0; k < K; k++)
                    {
                        cpuResult += Alpha * hiMatrixAKM[GetIndexColumnMajor(k, i, K)] * hiMatrixBNK[GetIndexColumnMajor(j, k, N)];
                    }

                    cpuResult += Beta * hiMatrixCMN[GetIndexColumnMajor(i, j, M)];

                    Assert.AreEqual(cpuResult, gpuResultMN[GetIndexColumnMajor(i, j, M)]);
                }
            }

            _gpu.FreeAll();
        }