public void cudaGetDeviceCount_test()
        {
            int count  = 0;
            var status = CudaRuntimeApi.cudaGetDeviceCount(ref count);

            Assert.AreEqual(status, cudaError.cudaSuccess);
            Console.WriteLine("cuda device count : {0}", count);
        }
        public void cudaSetDevice_cudaGetDeviceProperties_cudaDriverGetVersion_cudaRuntimeGetVersion_test()
        {
            int deviceCount = 0;
            var status      = CudaRuntimeApi.cudaGetDeviceCount(ref deviceCount);

            Assert.AreEqual(status, cudaError.cudaSuccess);

            // This function call returns 0 if there are no CUDA capable devices.
            if (deviceCount == 0)
            {
                Console.WriteLine("There are no available device(s) that support CUDA");
            }
            else
            {
                Console.WriteLine("Detected {0} CUDA Capable device(s)", deviceCount);
            }

            for (int i = 0; i < deviceCount; i++)
            {
                status = CudaRuntimeApi.cudaSetDevice(i);
                Assert.AreEqual(status, cudaError.cudaSuccess);
                int driverVersion  = 0;
                int runtimeVersion = 0;
                status = CudaRuntimeApi.cudaDriverGetVersion(ref driverVersion);
                Assert.AreEqual(status, cudaError.cudaSuccess);
                status = CudaRuntimeApi.cudaRuntimeGetVersion(ref runtimeVersion);
                Assert.AreEqual(status, cudaError.cudaSuccess);

                var deviceProp = new cudaDeviceProp();
                CudaRuntimeApi.cudaGetDeviceProperties(ref deviceProp, i);

                var uuid    = HexStringFromByteArray(UnsignedBytesFromSignedBytes(deviceProp.uuid.bytes));
                var devName = Encoding.Default.GetString(UnsignedBytesFromSignedBytes(deviceProp.name)).TrimEnd('\0');
                Console.WriteLine("\nDevice {0}: \"{1}\", uuid = {2}", i, devName, uuid);
                Console.WriteLine("  CUDA Driver Version / Runtime Version          {0}.{1} / {2}.{3}",
                                  driverVersion / 1000, (driverVersion % 100) / 10,
                                  runtimeVersion / 1000, (runtimeVersion % 100) / 10);
                Console.WriteLine("  CUDA Capability Major/Minor version number:    {0}.{1}",
                                  deviceProp.major, deviceProp.minor);
                Console.WriteLine(
                    "  Total amount of global memory:                 {0:0} MBytes ({1} bytes)",
                    Convert.ToSingle(deviceProp.totalGlobalMem / 1048576.0f), deviceProp.totalGlobalMem);
            }
        }
Exemple #3
0
        public void cublasCgemm_test()
        {
            int devCount = 0;
            var status   = CudaRuntimeApi.cudaGetDeviceCount(ref devCount);

            Assert.AreEqual(cudaError.cudaSuccess, status);

            int devId = 0;

            status = CudaRuntimeApi.cudaSetDevice(devId);

            var handle      = IntPtr.Zero;
            var cblasStatus = Cublas_api.cublasCreate_v2(ref handle);

            Random rand   = new Random();
            int    rows_a = rand.Next(2, 10);
            int    cols_a = rand.Next(2, 10);

            int rows_b = cols_a;
            int cols_b = rand.Next(2, 10);

            int rows_c  = rows_a;
            int cols_c  = cols_b;
            var A       = new float2[rows_a * cols_a];
            var B       = new float2[rows_b * cols_b];
            var C       = new float2[rows_c * cols_c];
            var resultC = new float2[rows_c * cols_c];

            var cA       = new Complex32[rows_a * cols_a];
            var cB       = new Complex32[rows_b * cols_b];
            var cC       = new Complex32[rows_c * cols_c];
            var cResultC = new Complex32[rows_c * cols_c];

            for (int i = 0; i < A.Length; i++)
            {
                var real = Convert.ToSingle(rand.Next(0, 10));
                var imag = Convert.ToSingle(rand.Next(0, 10));
                A[i]  = new float2(real, imag);
                cA[i] = new Complex32(real, imag);
            }

            for (int i = 0; i < B.Length; i++)
            {
                var real = Convert.ToSingle(rand.Next(0, 10));
                var imag = Convert.ToSingle(rand.Next(0, 10));
                B[i]  = new float2(real, imag);
                cB[i] = new Complex32(real, imag);
            }

            for (int i = 0; i < C.Length; i++)
            {
                var real = Convert.ToSingle(rand.Next(0, 10));
                var imag = Convert.ToSingle(rand.Next(0, 10));
                C[i]  = new float2(real, imag);
                cC[i] = new Complex32(real, imag);
            }

            var alphaReal = Convert.ToSingle(rand.Next(0, 10));
            var alphaImag = Convert.ToSingle(rand.Next(0, 10));
            var alpha     = new float2(alphaReal, alphaImag);
            var cAlpha    = new Complex32(alphaReal, alphaImag);

            var betaReal = Convert.ToSingle(rand.Next(0, 10));
            var betaImag = Convert.ToSingle(rand.Next(0, 10));
            var beta     = new float2(betaReal, betaImag);
            var cBeta    = new Complex32(betaReal, betaImag);

            var d_a = IntPtr.Zero;
            var d_b = IntPtr.Zero;
            var d_c = IntPtr.Zero;

            status = CudaRuntimeApi.cudaMalloc(ref d_a, (ulong)(A.Length * Marshal.SizeOf(typeof(float2))));
            status = CudaRuntimeApi.cudaMalloc(ref d_b, (ulong)(B.Length * Marshal.SizeOf(typeof(float2))));
            status = CudaRuntimeApi.cudaMalloc(ref d_c, (ulong)(C.Length * Marshal.SizeOf(typeof(float2))));

            var gch_a       = GCHandle.Alloc(A, GCHandleType.Pinned);
            var gch_b       = GCHandle.Alloc(B, GCHandleType.Pinned);
            var gch_c       = GCHandle.Alloc(C, GCHandleType.Pinned);
            var gch_resultC = GCHandle.Alloc(resultC, GCHandleType.Pinned);

            var h_a       = Marshal.UnsafeAddrOfPinnedArrayElement(A, 0);
            var h_b       = Marshal.UnsafeAddrOfPinnedArrayElement(B, 0);
            var h_c       = Marshal.UnsafeAddrOfPinnedArrayElement(C, 0);
            var h_resultC = Marshal.UnsafeAddrOfPinnedArrayElement(resultC, 0);

            status = CudaRuntimeApi.cudaMemcpy(d_a, h_a, (ulong)(A.Length * Marshal.SizeOf(typeof(float2))), cudaMemcpyKind.HostToDevice);
            status = CudaRuntimeApi.cudaMemcpy(d_b, h_b, (ulong)(B.Length * Marshal.SizeOf(typeof(float2))), cudaMemcpyKind.HostToDevice);
            status = CudaRuntimeApi.cudaMemcpy(d_c, h_c, (ulong)(C.Length * Marshal.SizeOf(typeof(float2))), cudaMemcpyKind.HostToDevice);

            cblasStatus = Cublas_api.cublasCgemm_v2(
                handle,
                cublasOperation_t.CUBLAS_OP_N,
                cublasOperation_t.CUBLAS_OP_N,
                rows_a,
                cols_b,
                cols_a,
                ref alpha,
                d_a,
                rows_a,
                d_b,
                rows_b,
                ref beta,
                d_c,
                rows_c
                );

            status = CudaRuntimeApi.cudaMemcpy(h_resultC, d_c, (ulong)(resultC.Length * Marshal.SizeOf(typeof(float2))), cudaMemcpyKind.DeviceToHost);
            for (int i = 0; i < rows_c * cols_c; i++)
            {
                cResultC[i] = new Complex32(resultC[i].X, resultC[i].Y);
            }
            var mResultC = Matrix <Complex32> .Build.Dense(rows_c, cols_c, cResultC);

            var mA = Matrix <Complex32> .Build.Dense(rows_a, cols_a, cA);

            var mB = Matrix <Complex32> .Build.Dense(rows_b, cols_b, cB);

            var mExpectedC = Matrix <Complex32> .Build.Dense(rows_c, cols_c, cC).Clone();

            mExpectedC = cAlpha * mA * mB + cBeta * mExpectedC;
            Complex32[] expected = mExpectedC.ToColumnMajorArray();

            Console.WriteLine("alpha : {0}, beta : {1}", alpha, beta);
            Console.WriteLine("A");
            Console.WriteLine(mA.ToString());
            Console.WriteLine();
            Console.WriteLine("B");
            Console.WriteLine(mB.ToString());
            Console.WriteLine();
            Console.WriteLine("resultC");
            Console.WriteLine(mResultC.ToString());
            Console.WriteLine();
            Console.WriteLine("expectedC");
            Console.WriteLine(mExpectedC.ToString());

            for (int i = 0; i < C.Length; i++)
            {
                Assert.AreEqual(expected[i], cResultC[i]);
            }

            cblasStatus = Cublas_api.cublasDestroy_v2(handle);

            status = CudaRuntimeApi.cudaFree(d_a);
            status = CudaRuntimeApi.cudaFree(d_b);
            status = CudaRuntimeApi.cudaFree(d_c);

            gch_a.Free();
            gch_b.Free();
            gch_c.Free();
        }
Exemple #4
0
        public void cublasSgemm_test()
        {
            int devCount = 0;
            var status   = CudaRuntimeApi.cudaGetDeviceCount(ref devCount);

            Assert.AreEqual(cudaError.cudaSuccess, status);

            int devId = 0;

            status = CudaRuntimeApi.cudaSetDevice(devId);

            var handle      = IntPtr.Zero;
            var cblasStatus = Cublas_api.cublasCreate_v2(ref handle);

            Random rand   = new Random();
            int    rows_a = rand.Next(2, 10);
            int    cols_a = rand.Next(2, 10);

            int rows_b = cols_a;
            int cols_b = rand.Next(2, 10);

            int rows_c = rows_a;
            int cols_c = cols_b;

            float alpha = 1.0F;
            float beta  = 0.0F;

            float[] A       = new float[rows_a * cols_a];
            float[] B       = new float[rows_b * cols_b];
            float[] C       = new float[rows_c * cols_c];
            float[] resultC = new float[rows_c * cols_c];

            for (int i = 0; i < A.Length; i++)
            {
                A[i] = Convert.ToSingle(rand.Next(0, 10));
            }

            for (int i = 0; i < B.Length; i++)
            {
                B[i] = Convert.ToSingle(rand.Next(0, 10));
            }

            var d_a = IntPtr.Zero;
            var d_b = IntPtr.Zero;
            var d_c = IntPtr.Zero;

            status = CudaRuntimeApi.cudaMalloc(ref d_a, (ulong)A.Length * sizeof(float));
            status = CudaRuntimeApi.cudaMalloc(ref d_b, (ulong)B.Length * sizeof(float));
            status = CudaRuntimeApi.cudaMalloc(ref d_c, (ulong)C.Length * sizeof(float));

            var gch_a       = GCHandle.Alloc(A, GCHandleType.Pinned);
            var gch_b       = GCHandle.Alloc(B, GCHandleType.Pinned);
            var gch_c       = GCHandle.Alloc(C, GCHandleType.Pinned);
            var gch_resultC = GCHandle.Alloc(resultC, GCHandleType.Pinned);

            var h_a       = Marshal.UnsafeAddrOfPinnedArrayElement(A, 0);
            var h_b       = Marshal.UnsafeAddrOfPinnedArrayElement(B, 0);
            var h_c       = Marshal.UnsafeAddrOfPinnedArrayElement(C, 0);
            var h_resultC = Marshal.UnsafeAddrOfPinnedArrayElement(resultC, 0);

            status = CudaRuntimeApi.cudaMemcpy(d_a, h_a, (ulong)A.Length * sizeof(float), cudaMemcpyKind.HostToDevice);
            status = CudaRuntimeApi.cudaMemcpy(d_b, h_b, (ulong)B.Length * sizeof(float), cudaMemcpyKind.HostToDevice);
            status = CudaRuntimeApi.cudaMemcpy(d_c, h_c, (ulong)C.Length * sizeof(float), cudaMemcpyKind.HostToDevice);

            cblasStatus = Cublas_api.cublasSgemm_v2(
                handle,
                cublasOperation_t.CUBLAS_OP_N,
                cublasOperation_t.CUBLAS_OP_N,
                rows_a,
                cols_b,
                cols_a,
                ref alpha,
                d_a,
                rows_a,
                d_b,
                rows_b,
                ref beta,
                d_c,
                rows_c
                );

            status = CudaRuntimeApi.cudaMemcpy(h_resultC, d_c, (ulong)C.Length * sizeof(float), cudaMemcpyKind.DeviceToHost);
            var mResultC = Matrix <float> .Build.Dense(rows_c, cols_c, resultC);

            var mA = Matrix <float> .Build.Dense(rows_a, cols_a, A);

            var mB = Matrix <float> .Build.Dense(rows_b, cols_b, B);

            var mExpectedC = Matrix <float> .Build.Dense(rows_c, cols_c, C).Clone();

            mExpectedC = alpha * mA * mB + beta * mExpectedC;
            float[] expected = mExpectedC.ToColumnMajorArray();

            Console.WriteLine("alpha : {0}, beta : {1}", alpha, beta);
            Console.WriteLine("A");
            Console.WriteLine(mA.ToString());
            Console.WriteLine();
            Console.WriteLine("B");
            Console.WriteLine(mB.ToString());
            Console.WriteLine();
            Console.WriteLine("resultC");
            Console.WriteLine(mResultC.ToString());
            Console.WriteLine();
            Console.WriteLine("expectedC");
            Console.WriteLine(mExpectedC.ToString());

            for (int i = 0; i < C.Length; i++)
            {
                Assert.AreEqual(expected[i], resultC[i]);
            }

            cblasStatus = Cublas_api.cublasDestroy_v2(handle);

            status = CudaRuntimeApi.cudaFree(d_a);
            status = CudaRuntimeApi.cudaFree(d_b);
            status = CudaRuntimeApi.cudaFree(d_c);

            gch_a.Free();
            gch_b.Free();
            gch_c.Free();
        }