public void nppsSet_32s_test()
        {
            int    length = 1024;
            int    value  = 75;
            IntPtr ptr    = Npps.nppsMalloc_32s(length);

            int[] result = new int[length];

            GCHandle gcHandle = GCHandle.Alloc(result, GCHandleType.Pinned);
            IntPtr   h_result = Marshal.UnsafeAddrOfPinnedArrayElement(result, 0);
            UInt64   size     = Convert.ToUInt64(sizeof(int) * result.Length);

            NppStatus status = Npps.nppsSet_32s(value, ptr, length);

            if (status != NppStatus.NPP_SUCCESS)
            {
                Assert.Fail(String.Format("Fail {0}", status.ToString()));
            }

            cudaError cudaStatus = CudaRuntimeApi.cudaMemcpy(h_result, ptr, size, cudaMemcpyKind.DeviceToHost);

            if (cudaStatus != cudaError.cudaSuccess)
            {
                Assert.Fail(String.Format("Fail {0}", cudaStatus.ToString()));
            }

            for (int i = 0; i < result.Length; i++)
            {
                Assert.AreEqual(value, result[i]);
            }

            gcHandle.Free();
            Npps.nppsFree(ptr);
        }
        public void nppiAddC_32f_C3R_test()
        {
            NppStatus nppStatus;
            cudaError cudaStatus;
            int       width       = 256;
            int       height      = 256;
            int       channel     = 3;
            int       stepInBytes = width * channel * sizeof(float);

            IntPtr d_src = Nppi.nppiMalloc_32f_C3(width, height, ref stepInBytes);
            IntPtr d_dst = Nppi.nppiMalloc_32f_C3(width, height, ref stepInBytes);

            float[] input     = new float[width * height * channel];
            float[] result    = new float[width * height * channel];
            float[] aconstant = Array.ConvertAll(Enumerable.Range(0, 3).ToArray(), Convert.ToSingle);

            UInt64   size     = Convert.ToUInt64(sizeof(float) * result.Length);
            GCHandle gchInput = GCHandle.Alloc(input, GCHandleType.Pinned);
            IntPtr   h_input  = Marshal.UnsafeAddrOfPinnedArrayElement(input, 0);

            cudaStatus = CudaRuntimeApi.cudaMemcpy(d_src, h_input, size, cudaMemcpyKind.HostToDevice);
            if (cudaStatus != cudaError.cudaSuccess)
            {
                Assert.Fail(cudaStatus.ToString());
            }

            NppiSize roi;

            roi.width  = width;
            roi.height = width;

            nppStatus = Nppi.nppiAddC_32f_C3R(d_src, width * channel * sizeof(float), aconstant, d_dst, width * channel * sizeof(float), roi);
            if (nppStatus != NppStatus.NPP_SUCCESS)
            {
                Assert.Fail(nppStatus.ToString());
            }

            GCHandle gchResult = GCHandle.Alloc(result, GCHandleType.Pinned);
            IntPtr   h_result  = Marshal.UnsafeAddrOfPinnedArrayElement(result, 0);

            cudaStatus = CudaRuntimeApi.cudaMemcpy(h_result, d_dst, size, cudaMemcpyKind.DeviceToHost);
            if (cudaStatus != cudaError.cudaSuccess)
            {
                Assert.Fail(cudaStatus.ToString());
            }

            for (int i = 0; i < result.Length; i++)
            {
                int aId = i % aconstant.Length;
                Assert.AreEqual(aconstant[aId], result[i]);
            }

            gchInput.Free();
            gchResult.Free();

            Nppi.nppiFree(d_src);
            Nppi.nppiFree(d_dst);
        }
        public void nppsConvert_32f16s_test()
        {
            NppStatus nppStatus;
            cudaError cudaStatus;
            int       length   = 1024;
            int       value    = 75;
            float     expected = 75.0F;

            IntPtr d_src = Npps.nppsMalloc_32f(length);
            IntPtr d_dst = Npps.nppsMalloc_16s(length);

            short[] result = new short[length];

            GCHandle gcHandle = GCHandle.Alloc(result, GCHandleType.Pinned);
            IntPtr   h_result = Marshal.UnsafeAddrOfPinnedArrayElement(result, 0);
            UInt64   size     = Convert.ToUInt64(sizeof(short) * result.Length);

            nppStatus = Npps.nppsSet_32f((float)value, d_src, length);
            if (nppStatus != NppStatus.NPP_SUCCESS)
            {
                Assert.Fail(String.Format("Fail {0}", nppStatus.ToString()));
            }

            nppStatus = Npps.nppsConvert_32f16s_Sfs(d_src, d_dst, length, NppRoundMode.NPP_RND_NEAR, 0);
            if (nppStatus != NppStatus.NPP_SUCCESS)
            {
                Assert.Fail(String.Format("Fail {0}", nppStatus.ToString()));
            }


            cudaStatus = CudaRuntimeApi.cudaMemcpy(h_result, d_dst, size, cudaMemcpyKind.DeviceToHost);
            if (cudaStatus != cudaError.cudaSuccess)
            {
                Assert.Fail(String.Format("Fail {0}", cudaStatus.ToString()));
            }

            for (int i = 0; i < result.Length; i++)
            {
                Assert.AreEqual(expected, result[i]);
            }

            gcHandle.Free();
            Npps.nppsFree(d_src);
            Npps.nppsFree(d_dst);
        }
        public void cudaMalloc_cudaFree_cudaMemcpy_test_cudaMemset_test()
        {
            int  length    = 1024 * 2;
            byte testValue = 5;

            byte[] test   = new byte[length];
            byte[] result = new byte[length];

            IntPtr d_ptr  = IntPtr.Zero;
            var    size   = length * sizeof(byte);
            var    status = CudaRuntimeApi.cudaMalloc(ref d_ptr, (ulong)size);

            Assert.AreEqual(status, cudaError.cudaSuccess);
            Console.WriteLine($"ptr : {d_ptr}");

            GCHandle gchTest     = GCHandle.Alloc(test, GCHandleType.Pinned);
            GCHandle gchResult   = GCHandle.Alloc(result, GCHandleType.Pinned);
            IntPtr   h_ptrTest   = Marshal.UnsafeAddrOfPinnedArrayElement(test, 0);
            IntPtr   h_ptrResult = Marshal.UnsafeAddrOfPinnedArrayElement(result, 0);

            status = CudaRuntimeApi.cudaMemcpy(d_ptr, h_ptrTest, (ulong)size, cudaMemcpyKind.HostToDevice);
            Assert.AreEqual(status, cudaError.cudaSuccess);

            status = CudaRuntimeApi.cudaMemset(d_ptr, testValue, (ulong)size);
            Assert.AreEqual(status, cudaError.cudaSuccess);

            status = CudaRuntimeApi.cudaMemcpy(h_ptrResult, d_ptr, (ulong)size, cudaMemcpyKind.DeviceToHost);
            Assert.AreEqual(status, cudaError.cudaSuccess);

            for (int i = 0; i < length; i++)
            {
                Assert.AreEqual(result[i], testValue);
            }

            status = CudaRuntimeApi.cudaFree(d_ptr);
            Assert.AreEqual(status, cudaError.cudaSuccess);

            gchTest.Free();
            gchResult.Free();
        }
Beispiel #5
0
        public void cublasCgemm_test()
        {
            int devCount = 0;
            var status   = CudaRuntimeApi.cudaGetDeviceCount(ref devCount);

            Assert.AreEqual(cudaError.cudaSuccess, status);

            int devId = 0;

            status = CudaRuntimeApi.cudaSetDevice(devId);

            var handle      = IntPtr.Zero;
            var cblasStatus = Cublas_api.cublasCreate_v2(ref handle);

            Random rand   = new Random();
            int    rows_a = rand.Next(2, 10);
            int    cols_a = rand.Next(2, 10);

            int rows_b = cols_a;
            int cols_b = rand.Next(2, 10);

            int rows_c  = rows_a;
            int cols_c  = cols_b;
            var A       = new float2[rows_a * cols_a];
            var B       = new float2[rows_b * cols_b];
            var C       = new float2[rows_c * cols_c];
            var resultC = new float2[rows_c * cols_c];

            var cA       = new Complex32[rows_a * cols_a];
            var cB       = new Complex32[rows_b * cols_b];
            var cC       = new Complex32[rows_c * cols_c];
            var cResultC = new Complex32[rows_c * cols_c];

            for (int i = 0; i < A.Length; i++)
            {
                var real = Convert.ToSingle(rand.Next(0, 10));
                var imag = Convert.ToSingle(rand.Next(0, 10));
                A[i]  = new float2(real, imag);
                cA[i] = new Complex32(real, imag);
            }

            for (int i = 0; i < B.Length; i++)
            {
                var real = Convert.ToSingle(rand.Next(0, 10));
                var imag = Convert.ToSingle(rand.Next(0, 10));
                B[i]  = new float2(real, imag);
                cB[i] = new Complex32(real, imag);
            }

            for (int i = 0; i < C.Length; i++)
            {
                var real = Convert.ToSingle(rand.Next(0, 10));
                var imag = Convert.ToSingle(rand.Next(0, 10));
                C[i]  = new float2(real, imag);
                cC[i] = new Complex32(real, imag);
            }

            var alphaReal = Convert.ToSingle(rand.Next(0, 10));
            var alphaImag = Convert.ToSingle(rand.Next(0, 10));
            var alpha     = new float2(alphaReal, alphaImag);
            var cAlpha    = new Complex32(alphaReal, alphaImag);

            var betaReal = Convert.ToSingle(rand.Next(0, 10));
            var betaImag = Convert.ToSingle(rand.Next(0, 10));
            var beta     = new float2(betaReal, betaImag);
            var cBeta    = new Complex32(betaReal, betaImag);

            var d_a = IntPtr.Zero;
            var d_b = IntPtr.Zero;
            var d_c = IntPtr.Zero;

            status = CudaRuntimeApi.cudaMalloc(ref d_a, (ulong)(A.Length * Marshal.SizeOf(typeof(float2))));
            status = CudaRuntimeApi.cudaMalloc(ref d_b, (ulong)(B.Length * Marshal.SizeOf(typeof(float2))));
            status = CudaRuntimeApi.cudaMalloc(ref d_c, (ulong)(C.Length * Marshal.SizeOf(typeof(float2))));

            var gch_a       = GCHandle.Alloc(A, GCHandleType.Pinned);
            var gch_b       = GCHandle.Alloc(B, GCHandleType.Pinned);
            var gch_c       = GCHandle.Alloc(C, GCHandleType.Pinned);
            var gch_resultC = GCHandle.Alloc(resultC, GCHandleType.Pinned);

            var h_a       = Marshal.UnsafeAddrOfPinnedArrayElement(A, 0);
            var h_b       = Marshal.UnsafeAddrOfPinnedArrayElement(B, 0);
            var h_c       = Marshal.UnsafeAddrOfPinnedArrayElement(C, 0);
            var h_resultC = Marshal.UnsafeAddrOfPinnedArrayElement(resultC, 0);

            status = CudaRuntimeApi.cudaMemcpy(d_a, h_a, (ulong)(A.Length * Marshal.SizeOf(typeof(float2))), cudaMemcpyKind.HostToDevice);
            status = CudaRuntimeApi.cudaMemcpy(d_b, h_b, (ulong)(B.Length * Marshal.SizeOf(typeof(float2))), cudaMemcpyKind.HostToDevice);
            status = CudaRuntimeApi.cudaMemcpy(d_c, h_c, (ulong)(C.Length * Marshal.SizeOf(typeof(float2))), cudaMemcpyKind.HostToDevice);

            cblasStatus = Cublas_api.cublasCgemm_v2(
                handle,
                cublasOperation_t.CUBLAS_OP_N,
                cublasOperation_t.CUBLAS_OP_N,
                rows_a,
                cols_b,
                cols_a,
                ref alpha,
                d_a,
                rows_a,
                d_b,
                rows_b,
                ref beta,
                d_c,
                rows_c
                );

            status = CudaRuntimeApi.cudaMemcpy(h_resultC, d_c, (ulong)(resultC.Length * Marshal.SizeOf(typeof(float2))), cudaMemcpyKind.DeviceToHost);
            for (int i = 0; i < rows_c * cols_c; i++)
            {
                cResultC[i] = new Complex32(resultC[i].X, resultC[i].Y);
            }
            var mResultC = Matrix <Complex32> .Build.Dense(rows_c, cols_c, cResultC);

            var mA = Matrix <Complex32> .Build.Dense(rows_a, cols_a, cA);

            var mB = Matrix <Complex32> .Build.Dense(rows_b, cols_b, cB);

            var mExpectedC = Matrix <Complex32> .Build.Dense(rows_c, cols_c, cC).Clone();

            mExpectedC = cAlpha * mA * mB + cBeta * mExpectedC;
            Complex32[] expected = mExpectedC.ToColumnMajorArray();

            Console.WriteLine("alpha : {0}, beta : {1}", alpha, beta);
            Console.WriteLine("A");
            Console.WriteLine(mA.ToString());
            Console.WriteLine();
            Console.WriteLine("B");
            Console.WriteLine(mB.ToString());
            Console.WriteLine();
            Console.WriteLine("resultC");
            Console.WriteLine(mResultC.ToString());
            Console.WriteLine();
            Console.WriteLine("expectedC");
            Console.WriteLine(mExpectedC.ToString());

            for (int i = 0; i < C.Length; i++)
            {
                Assert.AreEqual(expected[i], cResultC[i]);
            }

            cblasStatus = Cublas_api.cublasDestroy_v2(handle);

            status = CudaRuntimeApi.cudaFree(d_a);
            status = CudaRuntimeApi.cudaFree(d_b);
            status = CudaRuntimeApi.cudaFree(d_c);

            gch_a.Free();
            gch_b.Free();
            gch_c.Free();
        }
Beispiel #6
0
        public void cublasSgemm_test()
        {
            int devCount = 0;
            var status   = CudaRuntimeApi.cudaGetDeviceCount(ref devCount);

            Assert.AreEqual(cudaError.cudaSuccess, status);

            int devId = 0;

            status = CudaRuntimeApi.cudaSetDevice(devId);

            var handle      = IntPtr.Zero;
            var cblasStatus = Cublas_api.cublasCreate_v2(ref handle);

            Random rand   = new Random();
            int    rows_a = rand.Next(2, 10);
            int    cols_a = rand.Next(2, 10);

            int rows_b = cols_a;
            int cols_b = rand.Next(2, 10);

            int rows_c = rows_a;
            int cols_c = cols_b;

            float alpha = 1.0F;
            float beta  = 0.0F;

            float[] A       = new float[rows_a * cols_a];
            float[] B       = new float[rows_b * cols_b];
            float[] C       = new float[rows_c * cols_c];
            float[] resultC = new float[rows_c * cols_c];

            for (int i = 0; i < A.Length; i++)
            {
                A[i] = Convert.ToSingle(rand.Next(0, 10));
            }

            for (int i = 0; i < B.Length; i++)
            {
                B[i] = Convert.ToSingle(rand.Next(0, 10));
            }

            var d_a = IntPtr.Zero;
            var d_b = IntPtr.Zero;
            var d_c = IntPtr.Zero;

            status = CudaRuntimeApi.cudaMalloc(ref d_a, (ulong)A.Length * sizeof(float));
            status = CudaRuntimeApi.cudaMalloc(ref d_b, (ulong)B.Length * sizeof(float));
            status = CudaRuntimeApi.cudaMalloc(ref d_c, (ulong)C.Length * sizeof(float));

            var gch_a       = GCHandle.Alloc(A, GCHandleType.Pinned);
            var gch_b       = GCHandle.Alloc(B, GCHandleType.Pinned);
            var gch_c       = GCHandle.Alloc(C, GCHandleType.Pinned);
            var gch_resultC = GCHandle.Alloc(resultC, GCHandleType.Pinned);

            var h_a       = Marshal.UnsafeAddrOfPinnedArrayElement(A, 0);
            var h_b       = Marshal.UnsafeAddrOfPinnedArrayElement(B, 0);
            var h_c       = Marshal.UnsafeAddrOfPinnedArrayElement(C, 0);
            var h_resultC = Marshal.UnsafeAddrOfPinnedArrayElement(resultC, 0);

            status = CudaRuntimeApi.cudaMemcpy(d_a, h_a, (ulong)A.Length * sizeof(float), cudaMemcpyKind.HostToDevice);
            status = CudaRuntimeApi.cudaMemcpy(d_b, h_b, (ulong)B.Length * sizeof(float), cudaMemcpyKind.HostToDevice);
            status = CudaRuntimeApi.cudaMemcpy(d_c, h_c, (ulong)C.Length * sizeof(float), cudaMemcpyKind.HostToDevice);

            cblasStatus = Cublas_api.cublasSgemm_v2(
                handle,
                cublasOperation_t.CUBLAS_OP_N,
                cublasOperation_t.CUBLAS_OP_N,
                rows_a,
                cols_b,
                cols_a,
                ref alpha,
                d_a,
                rows_a,
                d_b,
                rows_b,
                ref beta,
                d_c,
                rows_c
                );

            status = CudaRuntimeApi.cudaMemcpy(h_resultC, d_c, (ulong)C.Length * sizeof(float), cudaMemcpyKind.DeviceToHost);
            var mResultC = Matrix <float> .Build.Dense(rows_c, cols_c, resultC);

            var mA = Matrix <float> .Build.Dense(rows_a, cols_a, A);

            var mB = Matrix <float> .Build.Dense(rows_b, cols_b, B);

            var mExpectedC = Matrix <float> .Build.Dense(rows_c, cols_c, C).Clone();

            mExpectedC = alpha * mA * mB + beta * mExpectedC;
            float[] expected = mExpectedC.ToColumnMajorArray();

            Console.WriteLine("alpha : {0}, beta : {1}", alpha, beta);
            Console.WriteLine("A");
            Console.WriteLine(mA.ToString());
            Console.WriteLine();
            Console.WriteLine("B");
            Console.WriteLine(mB.ToString());
            Console.WriteLine();
            Console.WriteLine("resultC");
            Console.WriteLine(mResultC.ToString());
            Console.WriteLine();
            Console.WriteLine("expectedC");
            Console.WriteLine(mExpectedC.ToString());

            for (int i = 0; i < C.Length; i++)
            {
                Assert.AreEqual(expected[i], resultC[i]);
            }

            cblasStatus = Cublas_api.cublasDestroy_v2(handle);

            status = CudaRuntimeApi.cudaFree(d_a);
            status = CudaRuntimeApi.cudaFree(d_b);
            status = CudaRuntimeApi.cudaFree(d_c);

            gch_a.Free();
            gch_b.Free();
            gch_c.Free();
        }
        public void nppsThreshold_32f_test()
        {
            NppStatus nppStatus;
            cudaError cudaStatus;
            int       width  = 128;
            int       height = 24;
            int       length = width * height;

            float level = 10.0F;

            IntPtr d_src = Npps.nppsMalloc_32s(length);
            IntPtr d_dst = Npps.nppsMalloc_32f(length);

            float[] input  = new float[length];
            float[] result = new float[length];

            float[] line = Array.ConvertAll(Enumerable.Range(0, width).ToArray(), Convert.ToSingle);
            for (int i = 0; i < height; i++)
            {
                Array.Copy(line, 0, input, i * width, width);
            }

            UInt64 size = Convert.ToUInt64(sizeof(int) * result.Length);

            GCHandle gchInput = GCHandle.Alloc(input, GCHandleType.Pinned);
            IntPtr   h_input  = Marshal.UnsafeAddrOfPinnedArrayElement(input, 0);

            cudaStatus = CudaRuntimeApi.cudaMemcpy(d_src, h_input, size, cudaMemcpyKind.HostToDevice);
            if (cudaStatus != cudaError.cudaSuccess)
            {
                Assert.Fail(String.Format("Fail {0}", cudaStatus.ToString()));
            }

            nppStatus = Npps.nppsThreshold_32f(d_src, d_dst, length, level, NppCmpOp.NPP_CMP_LESS);
            if (nppStatus != NppStatus.NPP_SUCCESS)
            {
                Assert.Fail(String.Format("Fail {0}", nppStatus.ToString()));
            }

            GCHandle gchResult = GCHandle.Alloc(result, GCHandleType.Pinned);
            IntPtr   h_result  = Marshal.UnsafeAddrOfPinnedArrayElement(result, 0);

            cudaStatus = CudaRuntimeApi.cudaMemcpy(h_result, d_dst, size, cudaMemcpyKind.DeviceToHost);
            if (cudaStatus != cudaError.cudaSuccess)
            {
                Assert.Fail(String.Format("Fail {0}", cudaStatus.ToString()));
            }

            for (int i = 0; i < result.Length; i++)
            {
                if (result[i] < level)
                {
                    Assert.Fail(String.Format("Fail. level : {0}, value :{1}", level, result[i]));
                }
            }

            gchInput.Free();
            gchResult.Free();

            Npps.nppsFree(d_src);
            Npps.nppsFree(d_dst);
        }