예제 #1
0
파일: cublas_api.cs 프로젝트: DNRY/CSCuda
 public static extern cublasStatus_t cublasCgemm_v2(
     IntPtr handle,
     cublasOperation_t transa,
     cublasOperation_t transb,
     int m,
     int n,
     int k,
     ref cuComplex alpha,
     IntPtr A,
     int lda,
     IntPtr B,
     int ldb,
     ref cuComplex beta,
     IntPtr C,
     int ldc
     );
예제 #2
0
        public void cublasCgemm_test()
        {
            int devCount = 0;
            var status = CudaRuntimeApi.cudaGetDeviceCount(ref devCount);
            Assert.AreEqual(cudaError.cudaSuccess, status);

            int devId = 0;
            status = CudaRuntimeApi.cudaSetDevice(devId);

            var handle = IntPtr.Zero;
            var cblasStatus = Cublas_api.cublasCreate_v2(ref handle);

            Random rand = new Random();
            int rows_a = rand.Next(2, 10);
            int cols_a = rand.Next(2, 10);

            int rows_b = cols_a;
            int cols_b = rand.Next(2, 10);

            int rows_c = rows_a;
            int cols_c = cols_b;
            var A = new float2[rows_a * cols_a];
            var B = new float2[rows_b * cols_b];
            var C = new float2[rows_c * cols_c];
            var resultC = new float2[rows_c * cols_c];

            var cA = new Complex32[rows_a * cols_a];
            var cB = new Complex32[rows_b * cols_b];
            var cC = new Complex32[rows_c * cols_c];
            var cResultC = new Complex32[rows_c * cols_c];

            for (int i = 0; i < A.Length; i++)
            {
                var real = Convert.ToSingle(rand.Next(0, 10));
                var imag = Convert.ToSingle(rand.Next(0, 10));
                A[i] = new float2(real, imag);
                cA[i] = new Complex32(real, imag);
            }

            for (int i = 0; i < B.Length; i++)
            {
                var real = Convert.ToSingle(rand.Next(0, 10));
                var imag = Convert.ToSingle(rand.Next(0, 10));
                B[i] = new float2(real, imag);
                cB[i] = new Complex32(real, imag);
            }

            for (int i = 0; i < C.Length; i++)
            {
                var real = Convert.ToSingle(rand.Next(0, 10));
                var imag = Convert.ToSingle(rand.Next(0, 10));
                C[i] = new float2(real, imag);
                cC[i] = new Complex32(real, imag);
            }

            var alphaReal = Convert.ToSingle(rand.Next(0, 10));
            var alphaImag = Convert.ToSingle(rand.Next(0, 10));
            var alpha = new float2(alphaReal, alphaImag);
            var cAlpha = new Complex32(alphaReal, alphaImag);

            var betaReal = Convert.ToSingle(rand.Next(0, 10));
            var betaImag = Convert.ToSingle(rand.Next(0, 10));
            var beta = new float2(betaReal, betaImag);
            var cBeta = new Complex32(betaReal, betaImag);

            var d_a = IntPtr.Zero;
            var d_b = IntPtr.Zero;
            var d_c = IntPtr.Zero;

            status = CudaRuntimeApi.cudaMalloc(ref d_a, (ulong)(A.Length * Marshal.SizeOf(typeof(float2))));
            status = CudaRuntimeApi.cudaMalloc(ref d_b, (ulong)(B.Length * Marshal.SizeOf(typeof(float2))));
            status = CudaRuntimeApi.cudaMalloc(ref d_c, (ulong)(C.Length * Marshal.SizeOf(typeof(float2))));

            var gch_a = GCHandle.Alloc(A, GCHandleType.Pinned);
            var gch_b = GCHandle.Alloc(B, GCHandleType.Pinned);
            var gch_c = GCHandle.Alloc(C, GCHandleType.Pinned);
            var gch_resultC = GCHandle.Alloc(resultC, GCHandleType.Pinned);

            var h_a = Marshal.UnsafeAddrOfPinnedArrayElement(A, 0);
            var h_b = Marshal.UnsafeAddrOfPinnedArrayElement(B, 0);
            var h_c = Marshal.UnsafeAddrOfPinnedArrayElement(C, 0);
            var h_resultC = Marshal.UnsafeAddrOfPinnedArrayElement(resultC, 0);

            status = CudaRuntimeApi.cudaMemcpy(d_a, h_a, (ulong)(A.Length * Marshal.SizeOf(typeof(float2))), cudaMemcpyKind.HostToDevice);
            status = CudaRuntimeApi.cudaMemcpy(d_b, h_b, (ulong)(B.Length * Marshal.SizeOf(typeof(float2))), cudaMemcpyKind.HostToDevice);
            status = CudaRuntimeApi.cudaMemcpy(d_c, h_c, (ulong)(C.Length * Marshal.SizeOf(typeof(float2))), cudaMemcpyKind.HostToDevice);

            cblasStatus = Cublas_api.cublasCgemm_v2(
                        handle,
                        cublasOperation_t.CUBLAS_OP_N,
                        cublasOperation_t.CUBLAS_OP_N,
                        rows_a,
                        cols_b,
                        cols_a,
                        ref alpha,
                        d_a,
                        rows_a,
                        d_b,
                        rows_b,
                        ref beta,
                        d_c,
                        rows_c
                        );

            status = CudaRuntimeApi.cudaMemcpy(h_resultC, d_c, (ulong)(resultC.Length * Marshal.SizeOf(typeof(float2))), cudaMemcpyKind.DeviceToHost);
            for (int i = 0; i < rows_c * cols_c; i++)
            {
                cResultC[i] = new Complex32(resultC[i].X, resultC[i].Y);
            }
            var mResultC = Matrix<Complex32>.Build.Dense(rows_c, cols_c, cResultC);

            var mA = Matrix<Complex32>.Build.Dense(rows_a, cols_a, cA);
            var mB = Matrix<Complex32>.Build.Dense(rows_b, cols_b, cB);
            var mExpectedC = Matrix<Complex32>.Build.Dense(rows_c, cols_c, cC).Clone();
            mExpectedC = cAlpha * mA * mB + cBeta * mExpectedC;
            Complex32[] expected = mExpectedC.ToColumnWiseArray();

            Console.WriteLine("alpha : {0}, beta : {1}", alpha, beta);
            Console.WriteLine("A");
            Console.WriteLine(mA.ToString());
            Console.WriteLine();
            Console.WriteLine("B");
            Console.WriteLine(mB.ToString());
            Console.WriteLine();
            Console.WriteLine("resultC");
            Console.WriteLine(mResultC.ToString());
            Console.WriteLine();
            Console.WriteLine("expectedC");
            Console.WriteLine(mExpectedC.ToString());

            for (int i = 0; i < C.Length; i++)
            {
                Assert.AreEqual(expected[i], cResultC[i]);
            }

            cblasStatus = Cublas_api.cublasDestroy_v2(handle);

            status = CudaRuntimeApi.cudaFree(d_a);
            status = CudaRuntimeApi.cudaFree(d_b);
            status = CudaRuntimeApi.cudaFree(d_c);

            gch_a.Free();
            gch_b.Free();
            gch_c.Free();
        }