Example #1
0
        public void cudaMalloc_cudaFree_cudaMemcpy_test_cudaMemset_test()
        {
            int  length    = 1024 * 2;
            byte testValue = 5;

            byte[] test   = new byte[length];
            byte[] result = new byte[length];

            IntPtr d_ptr  = IntPtr.Zero;
            var    size   = length * sizeof(byte);
            var    status = CudaRuntimeApi.cudaMalloc(ref d_ptr, (ulong)size);

            Assert.AreEqual(status, cudaError.cudaSuccess);
            Console.WriteLine($"ptr : {d_ptr}");

            GCHandle gchTest     = GCHandle.Alloc(test, GCHandleType.Pinned);
            GCHandle gchResult   = GCHandle.Alloc(result, GCHandleType.Pinned);
            IntPtr   h_ptrTest   = Marshal.UnsafeAddrOfPinnedArrayElement(test, 0);
            IntPtr   h_ptrResult = Marshal.UnsafeAddrOfPinnedArrayElement(result, 0);

            status = CudaRuntimeApi.cudaMemcpy(d_ptr, h_ptrTest, (ulong)size, cudaMemcpyKind.HostToDevice);
            Assert.AreEqual(status, cudaError.cudaSuccess);

            status = CudaRuntimeApi.cudaMemset(d_ptr, testValue, (ulong)size);
            Assert.AreEqual(status, cudaError.cudaSuccess);

            status = CudaRuntimeApi.cudaMemcpy(h_ptrResult, d_ptr, (ulong)size, cudaMemcpyKind.DeviceToHost);
            Assert.AreEqual(status, cudaError.cudaSuccess);

            for (int i = 0; i < length; i++)
            {
                Assert.AreEqual(result[i], testValue);
            }

            status = CudaRuntimeApi.cudaFree(d_ptr);
            Assert.AreEqual(status, cudaError.cudaSuccess);

            gchTest.Free();
            gchResult.Free();
        }
Example #2
0
        public void cublasCgemm_test()
        {
            int devCount = 0;
            var status   = CudaRuntimeApi.cudaGetDeviceCount(ref devCount);

            Assert.AreEqual(cudaError.cudaSuccess, status);

            int devId = 0;

            status = CudaRuntimeApi.cudaSetDevice(devId);

            var handle      = IntPtr.Zero;
            var cblasStatus = Cublas_api.cublasCreate_v2(ref handle);

            Random rand   = new Random();
            int    rows_a = rand.Next(2, 10);
            int    cols_a = rand.Next(2, 10);

            int rows_b = cols_a;
            int cols_b = rand.Next(2, 10);

            int rows_c  = rows_a;
            int cols_c  = cols_b;
            var A       = new float2[rows_a * cols_a];
            var B       = new float2[rows_b * cols_b];
            var C       = new float2[rows_c * cols_c];
            var resultC = new float2[rows_c * cols_c];

            var cA       = new Complex32[rows_a * cols_a];
            var cB       = new Complex32[rows_b * cols_b];
            var cC       = new Complex32[rows_c * cols_c];
            var cResultC = new Complex32[rows_c * cols_c];

            for (int i = 0; i < A.Length; i++)
            {
                var real = Convert.ToSingle(rand.Next(0, 10));
                var imag = Convert.ToSingle(rand.Next(0, 10));
                A[i]  = new float2(real, imag);
                cA[i] = new Complex32(real, imag);
            }

            for (int i = 0; i < B.Length; i++)
            {
                var real = Convert.ToSingle(rand.Next(0, 10));
                var imag = Convert.ToSingle(rand.Next(0, 10));
                B[i]  = new float2(real, imag);
                cB[i] = new Complex32(real, imag);
            }

            for (int i = 0; i < C.Length; i++)
            {
                var real = Convert.ToSingle(rand.Next(0, 10));
                var imag = Convert.ToSingle(rand.Next(0, 10));
                C[i]  = new float2(real, imag);
                cC[i] = new Complex32(real, imag);
            }

            var alphaReal = Convert.ToSingle(rand.Next(0, 10));
            var alphaImag = Convert.ToSingle(rand.Next(0, 10));
            var alpha     = new float2(alphaReal, alphaImag);
            var cAlpha    = new Complex32(alphaReal, alphaImag);

            var betaReal = Convert.ToSingle(rand.Next(0, 10));
            var betaImag = Convert.ToSingle(rand.Next(0, 10));
            var beta     = new float2(betaReal, betaImag);
            var cBeta    = new Complex32(betaReal, betaImag);

            var d_a = IntPtr.Zero;
            var d_b = IntPtr.Zero;
            var d_c = IntPtr.Zero;

            status = CudaRuntimeApi.cudaMalloc(ref d_a, (ulong)(A.Length * Marshal.SizeOf(typeof(float2))));
            status = CudaRuntimeApi.cudaMalloc(ref d_b, (ulong)(B.Length * Marshal.SizeOf(typeof(float2))));
            status = CudaRuntimeApi.cudaMalloc(ref d_c, (ulong)(C.Length * Marshal.SizeOf(typeof(float2))));

            var gch_a       = GCHandle.Alloc(A, GCHandleType.Pinned);
            var gch_b       = GCHandle.Alloc(B, GCHandleType.Pinned);
            var gch_c       = GCHandle.Alloc(C, GCHandleType.Pinned);
            var gch_resultC = GCHandle.Alloc(resultC, GCHandleType.Pinned);

            var h_a       = Marshal.UnsafeAddrOfPinnedArrayElement(A, 0);
            var h_b       = Marshal.UnsafeAddrOfPinnedArrayElement(B, 0);
            var h_c       = Marshal.UnsafeAddrOfPinnedArrayElement(C, 0);
            var h_resultC = Marshal.UnsafeAddrOfPinnedArrayElement(resultC, 0);

            status = CudaRuntimeApi.cudaMemcpy(d_a, h_a, (ulong)(A.Length * Marshal.SizeOf(typeof(float2))), cudaMemcpyKind.HostToDevice);
            status = CudaRuntimeApi.cudaMemcpy(d_b, h_b, (ulong)(B.Length * Marshal.SizeOf(typeof(float2))), cudaMemcpyKind.HostToDevice);
            status = CudaRuntimeApi.cudaMemcpy(d_c, h_c, (ulong)(C.Length * Marshal.SizeOf(typeof(float2))), cudaMemcpyKind.HostToDevice);

            cblasStatus = Cublas_api.cublasCgemm_v2(
                handle,
                cublasOperation_t.CUBLAS_OP_N,
                cublasOperation_t.CUBLAS_OP_N,
                rows_a,
                cols_b,
                cols_a,
                ref alpha,
                d_a,
                rows_a,
                d_b,
                rows_b,
                ref beta,
                d_c,
                rows_c
                );

            status = CudaRuntimeApi.cudaMemcpy(h_resultC, d_c, (ulong)(resultC.Length * Marshal.SizeOf(typeof(float2))), cudaMemcpyKind.DeviceToHost);
            for (int i = 0; i < rows_c * cols_c; i++)
            {
                cResultC[i] = new Complex32(resultC[i].X, resultC[i].Y);
            }
            var mResultC = Matrix <Complex32> .Build.Dense(rows_c, cols_c, cResultC);

            var mA = Matrix <Complex32> .Build.Dense(rows_a, cols_a, cA);

            var mB = Matrix <Complex32> .Build.Dense(rows_b, cols_b, cB);

            var mExpectedC = Matrix <Complex32> .Build.Dense(rows_c, cols_c, cC).Clone();

            mExpectedC = cAlpha * mA * mB + cBeta * mExpectedC;
            Complex32[] expected = mExpectedC.ToColumnMajorArray();

            Console.WriteLine("alpha : {0}, beta : {1}", alpha, beta);
            Console.WriteLine("A");
            Console.WriteLine(mA.ToString());
            Console.WriteLine();
            Console.WriteLine("B");
            Console.WriteLine(mB.ToString());
            Console.WriteLine();
            Console.WriteLine("resultC");
            Console.WriteLine(mResultC.ToString());
            Console.WriteLine();
            Console.WriteLine("expectedC");
            Console.WriteLine(mExpectedC.ToString());

            for (int i = 0; i < C.Length; i++)
            {
                Assert.AreEqual(expected[i], cResultC[i]);
            }

            cblasStatus = Cublas_api.cublasDestroy_v2(handle);

            status = CudaRuntimeApi.cudaFree(d_a);
            status = CudaRuntimeApi.cudaFree(d_b);
            status = CudaRuntimeApi.cudaFree(d_c);

            gch_a.Free();
            gch_b.Free();
            gch_c.Free();
        }
Example #3
0
        public void cublasSgemm_test()
        {
            int devCount = 0;
            var status   = CudaRuntimeApi.cudaGetDeviceCount(ref devCount);

            Assert.AreEqual(cudaError.cudaSuccess, status);

            int devId = 0;

            status = CudaRuntimeApi.cudaSetDevice(devId);

            var handle      = IntPtr.Zero;
            var cblasStatus = Cublas_api.cublasCreate_v2(ref handle);

            Random rand   = new Random();
            int    rows_a = rand.Next(2, 10);
            int    cols_a = rand.Next(2, 10);

            int rows_b = cols_a;
            int cols_b = rand.Next(2, 10);

            int rows_c = rows_a;
            int cols_c = cols_b;

            float alpha = 1.0F;
            float beta  = 0.0F;

            float[] A       = new float[rows_a * cols_a];
            float[] B       = new float[rows_b * cols_b];
            float[] C       = new float[rows_c * cols_c];
            float[] resultC = new float[rows_c * cols_c];

            for (int i = 0; i < A.Length; i++)
            {
                A[i] = Convert.ToSingle(rand.Next(0, 10));
            }

            for (int i = 0; i < B.Length; i++)
            {
                B[i] = Convert.ToSingle(rand.Next(0, 10));
            }

            var d_a = IntPtr.Zero;
            var d_b = IntPtr.Zero;
            var d_c = IntPtr.Zero;

            status = CudaRuntimeApi.cudaMalloc(ref d_a, (ulong)A.Length * sizeof(float));
            status = CudaRuntimeApi.cudaMalloc(ref d_b, (ulong)B.Length * sizeof(float));
            status = CudaRuntimeApi.cudaMalloc(ref d_c, (ulong)C.Length * sizeof(float));

            var gch_a       = GCHandle.Alloc(A, GCHandleType.Pinned);
            var gch_b       = GCHandle.Alloc(B, GCHandleType.Pinned);
            var gch_c       = GCHandle.Alloc(C, GCHandleType.Pinned);
            var gch_resultC = GCHandle.Alloc(resultC, GCHandleType.Pinned);

            var h_a       = Marshal.UnsafeAddrOfPinnedArrayElement(A, 0);
            var h_b       = Marshal.UnsafeAddrOfPinnedArrayElement(B, 0);
            var h_c       = Marshal.UnsafeAddrOfPinnedArrayElement(C, 0);
            var h_resultC = Marshal.UnsafeAddrOfPinnedArrayElement(resultC, 0);

            status = CudaRuntimeApi.cudaMemcpy(d_a, h_a, (ulong)A.Length * sizeof(float), cudaMemcpyKind.HostToDevice);
            status = CudaRuntimeApi.cudaMemcpy(d_b, h_b, (ulong)B.Length * sizeof(float), cudaMemcpyKind.HostToDevice);
            status = CudaRuntimeApi.cudaMemcpy(d_c, h_c, (ulong)C.Length * sizeof(float), cudaMemcpyKind.HostToDevice);

            cblasStatus = Cublas_api.cublasSgemm_v2(
                handle,
                cublasOperation_t.CUBLAS_OP_N,
                cublasOperation_t.CUBLAS_OP_N,
                rows_a,
                cols_b,
                cols_a,
                ref alpha,
                d_a,
                rows_a,
                d_b,
                rows_b,
                ref beta,
                d_c,
                rows_c
                );

            status = CudaRuntimeApi.cudaMemcpy(h_resultC, d_c, (ulong)C.Length * sizeof(float), cudaMemcpyKind.DeviceToHost);
            var mResultC = Matrix <float> .Build.Dense(rows_c, cols_c, resultC);

            var mA = Matrix <float> .Build.Dense(rows_a, cols_a, A);

            var mB = Matrix <float> .Build.Dense(rows_b, cols_b, B);

            var mExpectedC = Matrix <float> .Build.Dense(rows_c, cols_c, C).Clone();

            mExpectedC = alpha * mA * mB + beta * mExpectedC;
            float[] expected = mExpectedC.ToColumnMajorArray();

            Console.WriteLine("alpha : {0}, beta : {1}", alpha, beta);
            Console.WriteLine("A");
            Console.WriteLine(mA.ToString());
            Console.WriteLine();
            Console.WriteLine("B");
            Console.WriteLine(mB.ToString());
            Console.WriteLine();
            Console.WriteLine("resultC");
            Console.WriteLine(mResultC.ToString());
            Console.WriteLine();
            Console.WriteLine("expectedC");
            Console.WriteLine(mExpectedC.ToString());

            for (int i = 0; i < C.Length; i++)
            {
                Assert.AreEqual(expected[i], resultC[i]);
            }

            cblasStatus = Cublas_api.cublasDestroy_v2(handle);

            status = CudaRuntimeApi.cudaFree(d_a);
            status = CudaRuntimeApi.cudaFree(d_b);
            status = CudaRuntimeApi.cudaFree(d_c);

            gch_a.Free();
            gch_b.Free();
            gch_c.Free();
        }