public void nppsSet_32s_test() { int length = 1024; int value = 75; IntPtr ptr = Npps.nppsMalloc_32s(length); int[] result = new int[length]; GCHandle gcHandle = GCHandle.Alloc(result, GCHandleType.Pinned); IntPtr h_result = Marshal.UnsafeAddrOfPinnedArrayElement(result, 0); UInt64 size = Convert.ToUInt64(sizeof(int) * result.Length); NppStatus status = Npps.nppsSet_32s(value, ptr, length); if (status != NppStatus.NPP_SUCCESS) { Assert.Fail(String.Format("Fail {0}", status.ToString())); } cudaError cudaStatus = CudaRuntimeApi.cudaMemcpy(h_result, ptr, size, cudaMemcpyKind.DeviceToHost); if (cudaStatus != cudaError.cudaSuccess) { Assert.Fail(String.Format("Fail {0}", cudaStatus.ToString())); } for (int i = 0; i < result.Length; i++) { Assert.AreEqual(value, result[i]); } gcHandle.Free(); Npps.nppsFree(ptr); }
public void cudaHostRegister_cudaHostUnRegister_test() { var length = 1024 * 1024; var size = 1024 * 1024 * sizeof(float); IntPtr ptr = Marshal.AllocHGlobal(size); Assert.IsFalse(VirtualLock(ptr, (ulong)size)); var status = CudaRuntimeApi.cudaHostRegister(ptr, (ulong)size, DriverTypes.cudaHostRegisterDefault); Assert.AreEqual(status, cudaError.cudaSuccess); Console.WriteLine($"ptr : {ptr}"); float[] test = new float[length]; float[] result = new float[length]; for (int i = 0; i < length; i++) { test[i] = Convert.ToSingle(i); } Marshal.Copy(test, 0, ptr, length); Marshal.Copy(ptr, result, 0, length); for (int i = 0; i < length; i++) { Assert.AreEqual(result[i], test[i]); } status = CudaRuntimeApi.cudaHostUnregister(ptr); Assert.AreEqual(status, cudaError.cudaSuccess); Marshal.FreeHGlobal(ptr); }
public void cudaHostAlloc_cudaFreeHost_test() { IntPtr ptr = IntPtr.Zero; var length = 1024 * 1024; var size = 1024 * 1024 * sizeof(float); var status = CudaRuntimeApi.cudaHostAlloc(ref ptr, (ulong)size, 0); Assert.AreEqual(status, cudaError.cudaSuccess); Console.WriteLine($"ptr : {ptr}"); float[] test = new float[length]; float[] result = new float[length]; for (int i = 0; i < length; i++) { test[i] = Convert.ToSingle(i); } Marshal.Copy(test, 0, ptr, length); Marshal.Copy(ptr, result, 0, length); for (int i = 0; i < length; i++) { Assert.AreEqual(result[i], test[i]); } status = CudaRuntimeApi.cudaFreeHost(ptr); Assert.AreEqual(status, cudaError.cudaSuccess); }
public void cudaGetDeviceCount_test() { int count = 0; var status = CudaRuntimeApi.cudaGetDeviceCount(ref count); Assert.AreEqual(status, cudaError.cudaSuccess); Console.WriteLine("cuda device count : {0}", count); }
public void nppiAddC_32f_C3R_test() { NppStatus nppStatus; cudaError cudaStatus; int width = 256; int height = 256; int channel = 3; int stepInBytes = width * channel * sizeof(float); IntPtr d_src = Nppi.nppiMalloc_32f_C3(width, height, ref stepInBytes); IntPtr d_dst = Nppi.nppiMalloc_32f_C3(width, height, ref stepInBytes); float[] input = new float[width * height * channel]; float[] result = new float[width * height * channel]; float[] aconstant = Array.ConvertAll(Enumerable.Range(0, 3).ToArray(), Convert.ToSingle); UInt64 size = Convert.ToUInt64(sizeof(float) * result.Length); GCHandle gchInput = GCHandle.Alloc(input, GCHandleType.Pinned); IntPtr h_input = Marshal.UnsafeAddrOfPinnedArrayElement(input, 0); cudaStatus = CudaRuntimeApi.cudaMemcpy(d_src, h_input, size, cudaMemcpyKind.HostToDevice); if (cudaStatus != cudaError.cudaSuccess) { Assert.Fail(cudaStatus.ToString()); } NppiSize roi; roi.width = width; roi.height = width; nppStatus = Nppi.nppiAddC_32f_C3R(d_src, width * channel * sizeof(float), aconstant, d_dst, width * channel * sizeof(float), roi); if (nppStatus != NppStatus.NPP_SUCCESS) { Assert.Fail(nppStatus.ToString()); } GCHandle gchResult = GCHandle.Alloc(result, GCHandleType.Pinned); IntPtr h_result = Marshal.UnsafeAddrOfPinnedArrayElement(result, 0); cudaStatus = CudaRuntimeApi.cudaMemcpy(h_result, d_dst, size, cudaMemcpyKind.DeviceToHost); if (cudaStatus != cudaError.cudaSuccess) { Assert.Fail(cudaStatus.ToString()); } for (int i = 0; i < result.Length; i++) { int aId = i % aconstant.Length; Assert.AreEqual(aconstant[aId], result[i]); } gchInput.Free(); gchResult.Free(); Nppi.nppiFree(d_src); Nppi.nppiFree(d_dst); }
public void cudaMemGetInfo_test() { ulong free = 0; ulong total = 0; var status = CudaRuntimeApi.cudaMemGetInfo(ref free, ref total); Assert.AreEqual(status, cudaError.cudaSuccess); Console.WriteLine($"free : {free}, total : {total}"); }
public void cudaStreamCreate_cudaStreamDestroy_test() { IntPtr stream = IntPtr.Zero; var status = CudaRuntimeApi.cudaStreamCreate(ref stream); Assert.AreEqual(status, cudaError.cudaSuccess); status = CudaRuntimeApi.cudaStreamDestroy(stream); Assert.AreEqual(status, cudaError.cudaSuccess); }
public void nppsConvert_32f16s_test() { NppStatus nppStatus; cudaError cudaStatus; int length = 1024; int value = 75; float expected = 75.0F; IntPtr d_src = Npps.nppsMalloc_32f(length); IntPtr d_dst = Npps.nppsMalloc_16s(length); short[] result = new short[length]; GCHandle gcHandle = GCHandle.Alloc(result, GCHandleType.Pinned); IntPtr h_result = Marshal.UnsafeAddrOfPinnedArrayElement(result, 0); UInt64 size = Convert.ToUInt64(sizeof(short) * result.Length); nppStatus = Npps.nppsSet_32f((float)value, d_src, length); if (nppStatus != NppStatus.NPP_SUCCESS) { Assert.Fail(String.Format("Fail {0}", nppStatus.ToString())); } nppStatus = Npps.nppsConvert_32f16s_Sfs(d_src, d_dst, length, NppRoundMode.NPP_RND_NEAR, 0); if (nppStatus != NppStatus.NPP_SUCCESS) { Assert.Fail(String.Format("Fail {0}", nppStatus.ToString())); } cudaStatus = CudaRuntimeApi.cudaMemcpy(h_result, d_dst, size, cudaMemcpyKind.DeviceToHost); if (cudaStatus != cudaError.cudaSuccess) { Assert.Fail(String.Format("Fail {0}", cudaStatus.ToString())); } for (int i = 0; i < result.Length; i++) { Assert.AreEqual(expected, result[i]); } gcHandle.Free(); Npps.nppsFree(d_src); Npps.nppsFree(d_dst); }
public void cudaSetDevice_cudaGetDeviceProperties_cudaDriverGetVersion_cudaRuntimeGetVersion_test() { int deviceCount = 0; var status = CudaRuntimeApi.cudaGetDeviceCount(ref deviceCount); Assert.AreEqual(status, cudaError.cudaSuccess); // This function call returns 0 if there are no CUDA capable devices. if (deviceCount == 0) { Console.WriteLine("There are no available device(s) that support CUDA"); } else { Console.WriteLine("Detected {0} CUDA Capable device(s)", deviceCount); } for (int i = 0; i < deviceCount; i++) { status = CudaRuntimeApi.cudaSetDevice(i); Assert.AreEqual(status, cudaError.cudaSuccess); int driverVersion = 0; int runtimeVersion = 0; status = CudaRuntimeApi.cudaDriverGetVersion(ref driverVersion); Assert.AreEqual(status, cudaError.cudaSuccess); status = CudaRuntimeApi.cudaRuntimeGetVersion(ref runtimeVersion); Assert.AreEqual(status, cudaError.cudaSuccess); var deviceProp = new cudaDeviceProp(); CudaRuntimeApi.cudaGetDeviceProperties(ref deviceProp, i); var uuid = HexStringFromByteArray(UnsignedBytesFromSignedBytes(deviceProp.uuid.bytes)); var devName = Encoding.Default.GetString(UnsignedBytesFromSignedBytes(deviceProp.name)).TrimEnd('\0'); Console.WriteLine("\nDevice {0}: \"{1}\", uuid = {2}", i, devName, uuid); Console.WriteLine(" CUDA Driver Version / Runtime Version {0}.{1} / {2}.{3}", driverVersion / 1000, (driverVersion % 100) / 10, runtimeVersion / 1000, (runtimeVersion % 100) / 10); Console.WriteLine(" CUDA Capability Major/Minor version number: {0}.{1}", deviceProp.major, deviceProp.minor); Console.WriteLine( " Total amount of global memory: {0:0} MBytes ({1} bytes)", Convert.ToSingle(deviceProp.totalGlobalMem / 1048576.0f), deviceProp.totalGlobalMem); } }
public void cudaMalloc_cudaFree_cudaMemcpy_test_cudaMemset_test() { int length = 1024 * 2; byte testValue = 5; byte[] test = new byte[length]; byte[] result = new byte[length]; IntPtr d_ptr = IntPtr.Zero; var size = length * sizeof(byte); var status = CudaRuntimeApi.cudaMalloc(ref d_ptr, (ulong)size); Assert.AreEqual(status, cudaError.cudaSuccess); Console.WriteLine($"ptr : {d_ptr}"); GCHandle gchTest = GCHandle.Alloc(test, GCHandleType.Pinned); GCHandle gchResult = GCHandle.Alloc(result, GCHandleType.Pinned); IntPtr h_ptrTest = Marshal.UnsafeAddrOfPinnedArrayElement(test, 0); IntPtr h_ptrResult = Marshal.UnsafeAddrOfPinnedArrayElement(result, 0); status = CudaRuntimeApi.cudaMemcpy(d_ptr, h_ptrTest, (ulong)size, cudaMemcpyKind.HostToDevice); Assert.AreEqual(status, cudaError.cudaSuccess); status = CudaRuntimeApi.cudaMemset(d_ptr, testValue, (ulong)size); Assert.AreEqual(status, cudaError.cudaSuccess); status = CudaRuntimeApi.cudaMemcpy(h_ptrResult, d_ptr, (ulong)size, cudaMemcpyKind.DeviceToHost); Assert.AreEqual(status, cudaError.cudaSuccess); for (int i = 0; i < length; i++) { Assert.AreEqual(result[i], testValue); } status = CudaRuntimeApi.cudaFree(d_ptr); Assert.AreEqual(status, cudaError.cudaSuccess); gchTest.Free(); gchResult.Free(); }
public void cudaDeviceSynchronize_test() { var status = CudaRuntimeApi.cudaDeviceSynchronize(); Assert.AreEqual(status, cudaError.cudaSuccess); }
public void cudaDeviceReset_test() { var status = CudaRuntimeApi.cudaDeviceReset(); Assert.AreEqual(status, cudaError.cudaSuccess); }
public void cublasCgemm_test() { int devCount = 0; var status = CudaRuntimeApi.cudaGetDeviceCount(ref devCount); Assert.AreEqual(cudaError.cudaSuccess, status); int devId = 0; status = CudaRuntimeApi.cudaSetDevice(devId); var handle = IntPtr.Zero; var cblasStatus = Cublas_api.cublasCreate_v2(ref handle); Random rand = new Random(); int rows_a = rand.Next(2, 10); int cols_a = rand.Next(2, 10); int rows_b = cols_a; int cols_b = rand.Next(2, 10); int rows_c = rows_a; int cols_c = cols_b; var A = new float2[rows_a * cols_a]; var B = new float2[rows_b * cols_b]; var C = new float2[rows_c * cols_c]; var resultC = new float2[rows_c * cols_c]; var cA = new Complex32[rows_a * cols_a]; var cB = new Complex32[rows_b * cols_b]; var cC = new Complex32[rows_c * cols_c]; var cResultC = new Complex32[rows_c * cols_c]; for (int i = 0; i < A.Length; i++) { var real = Convert.ToSingle(rand.Next(0, 10)); var imag = Convert.ToSingle(rand.Next(0, 10)); A[i] = new float2(real, imag); cA[i] = new Complex32(real, imag); } for (int i = 0; i < B.Length; i++) { var real = Convert.ToSingle(rand.Next(0, 10)); var imag = Convert.ToSingle(rand.Next(0, 10)); B[i] = new float2(real, imag); cB[i] = new Complex32(real, imag); } for (int i = 0; i < C.Length; i++) { var real = Convert.ToSingle(rand.Next(0, 10)); var imag = Convert.ToSingle(rand.Next(0, 10)); C[i] = new float2(real, imag); cC[i] = new Complex32(real, imag); } var alphaReal = Convert.ToSingle(rand.Next(0, 10)); var alphaImag = Convert.ToSingle(rand.Next(0, 10)); var alpha = new float2(alphaReal, alphaImag); var cAlpha = new Complex32(alphaReal, alphaImag); var betaReal = Convert.ToSingle(rand.Next(0, 10)); var betaImag = Convert.ToSingle(rand.Next(0, 10)); var beta = new float2(betaReal, betaImag); var cBeta = new Complex32(betaReal, betaImag); var d_a = IntPtr.Zero; var d_b = IntPtr.Zero; var d_c = IntPtr.Zero; status = CudaRuntimeApi.cudaMalloc(ref d_a, (ulong)(A.Length * Marshal.SizeOf(typeof(float2)))); status = CudaRuntimeApi.cudaMalloc(ref d_b, (ulong)(B.Length * Marshal.SizeOf(typeof(float2)))); status = CudaRuntimeApi.cudaMalloc(ref d_c, (ulong)(C.Length * Marshal.SizeOf(typeof(float2)))); var gch_a = GCHandle.Alloc(A, GCHandleType.Pinned); var gch_b = GCHandle.Alloc(B, GCHandleType.Pinned); var gch_c = GCHandle.Alloc(C, GCHandleType.Pinned); var gch_resultC = GCHandle.Alloc(resultC, GCHandleType.Pinned); var h_a = Marshal.UnsafeAddrOfPinnedArrayElement(A, 0); var h_b = Marshal.UnsafeAddrOfPinnedArrayElement(B, 0); var h_c = Marshal.UnsafeAddrOfPinnedArrayElement(C, 0); var h_resultC = Marshal.UnsafeAddrOfPinnedArrayElement(resultC, 0); status = CudaRuntimeApi.cudaMemcpy(d_a, h_a, (ulong)(A.Length * Marshal.SizeOf(typeof(float2))), cudaMemcpyKind.HostToDevice); status = CudaRuntimeApi.cudaMemcpy(d_b, h_b, (ulong)(B.Length * Marshal.SizeOf(typeof(float2))), cudaMemcpyKind.HostToDevice); status = CudaRuntimeApi.cudaMemcpy(d_c, h_c, (ulong)(C.Length * Marshal.SizeOf(typeof(float2))), cudaMemcpyKind.HostToDevice); cblasStatus = Cublas_api.cublasCgemm_v2( handle, cublasOperation_t.CUBLAS_OP_N, cublasOperation_t.CUBLAS_OP_N, rows_a, cols_b, cols_a, ref alpha, d_a, rows_a, d_b, rows_b, ref beta, d_c, rows_c ); status = CudaRuntimeApi.cudaMemcpy(h_resultC, d_c, (ulong)(resultC.Length * Marshal.SizeOf(typeof(float2))), cudaMemcpyKind.DeviceToHost); for (int i = 0; i < rows_c * cols_c; i++) { cResultC[i] = new Complex32(resultC[i].X, resultC[i].Y); } var mResultC = Matrix <Complex32> .Build.Dense(rows_c, cols_c, cResultC); var mA = Matrix <Complex32> .Build.Dense(rows_a, cols_a, cA); var mB = Matrix <Complex32> .Build.Dense(rows_b, cols_b, cB); var mExpectedC = Matrix <Complex32> .Build.Dense(rows_c, cols_c, cC).Clone(); mExpectedC = cAlpha * mA * mB + cBeta * mExpectedC; Complex32[] expected = mExpectedC.ToColumnMajorArray(); Console.WriteLine("alpha : {0}, beta : {1}", alpha, beta); Console.WriteLine("A"); Console.WriteLine(mA.ToString()); Console.WriteLine(); Console.WriteLine("B"); Console.WriteLine(mB.ToString()); Console.WriteLine(); Console.WriteLine("resultC"); Console.WriteLine(mResultC.ToString()); Console.WriteLine(); Console.WriteLine("expectedC"); Console.WriteLine(mExpectedC.ToString()); for (int i = 0; i < C.Length; i++) { Assert.AreEqual(expected[i], cResultC[i]); } cblasStatus = Cublas_api.cublasDestroy_v2(handle); status = CudaRuntimeApi.cudaFree(d_a); status = CudaRuntimeApi.cudaFree(d_b); status = CudaRuntimeApi.cudaFree(d_c); gch_a.Free(); gch_b.Free(); gch_c.Free(); }
public void cublasSgemm_test() { int devCount = 0; var status = CudaRuntimeApi.cudaGetDeviceCount(ref devCount); Assert.AreEqual(cudaError.cudaSuccess, status); int devId = 0; status = CudaRuntimeApi.cudaSetDevice(devId); var handle = IntPtr.Zero; var cblasStatus = Cublas_api.cublasCreate_v2(ref handle); Random rand = new Random(); int rows_a = rand.Next(2, 10); int cols_a = rand.Next(2, 10); int rows_b = cols_a; int cols_b = rand.Next(2, 10); int rows_c = rows_a; int cols_c = cols_b; float alpha = 1.0F; float beta = 0.0F; float[] A = new float[rows_a * cols_a]; float[] B = new float[rows_b * cols_b]; float[] C = new float[rows_c * cols_c]; float[] resultC = new float[rows_c * cols_c]; for (int i = 0; i < A.Length; i++) { A[i] = Convert.ToSingle(rand.Next(0, 10)); } for (int i = 0; i < B.Length; i++) { B[i] = Convert.ToSingle(rand.Next(0, 10)); } var d_a = IntPtr.Zero; var d_b = IntPtr.Zero; var d_c = IntPtr.Zero; status = CudaRuntimeApi.cudaMalloc(ref d_a, (ulong)A.Length * sizeof(float)); status = CudaRuntimeApi.cudaMalloc(ref d_b, (ulong)B.Length * sizeof(float)); status = CudaRuntimeApi.cudaMalloc(ref d_c, (ulong)C.Length * sizeof(float)); var gch_a = GCHandle.Alloc(A, GCHandleType.Pinned); var gch_b = GCHandle.Alloc(B, GCHandleType.Pinned); var gch_c = GCHandle.Alloc(C, GCHandleType.Pinned); var gch_resultC = GCHandle.Alloc(resultC, GCHandleType.Pinned); var h_a = Marshal.UnsafeAddrOfPinnedArrayElement(A, 0); var h_b = Marshal.UnsafeAddrOfPinnedArrayElement(B, 0); var h_c = Marshal.UnsafeAddrOfPinnedArrayElement(C, 0); var h_resultC = Marshal.UnsafeAddrOfPinnedArrayElement(resultC, 0); status = CudaRuntimeApi.cudaMemcpy(d_a, h_a, (ulong)A.Length * sizeof(float), cudaMemcpyKind.HostToDevice); status = CudaRuntimeApi.cudaMemcpy(d_b, h_b, (ulong)B.Length * sizeof(float), cudaMemcpyKind.HostToDevice); status = CudaRuntimeApi.cudaMemcpy(d_c, h_c, (ulong)C.Length * sizeof(float), cudaMemcpyKind.HostToDevice); cblasStatus = Cublas_api.cublasSgemm_v2( handle, cublasOperation_t.CUBLAS_OP_N, cublasOperation_t.CUBLAS_OP_N, rows_a, cols_b, cols_a, ref alpha, d_a, rows_a, d_b, rows_b, ref beta, d_c, rows_c ); status = CudaRuntimeApi.cudaMemcpy(h_resultC, d_c, (ulong)C.Length * sizeof(float), cudaMemcpyKind.DeviceToHost); var mResultC = Matrix <float> .Build.Dense(rows_c, cols_c, resultC); var mA = Matrix <float> .Build.Dense(rows_a, cols_a, A); var mB = Matrix <float> .Build.Dense(rows_b, cols_b, B); var mExpectedC = Matrix <float> .Build.Dense(rows_c, cols_c, C).Clone(); mExpectedC = alpha * mA * mB + beta * mExpectedC; float[] expected = mExpectedC.ToColumnMajorArray(); Console.WriteLine("alpha : {0}, beta : {1}", alpha, beta); Console.WriteLine("A"); Console.WriteLine(mA.ToString()); Console.WriteLine(); Console.WriteLine("B"); Console.WriteLine(mB.ToString()); Console.WriteLine(); Console.WriteLine("resultC"); Console.WriteLine(mResultC.ToString()); Console.WriteLine(); Console.WriteLine("expectedC"); Console.WriteLine(mExpectedC.ToString()); for (int i = 0; i < C.Length; i++) { Assert.AreEqual(expected[i], resultC[i]); } cblasStatus = Cublas_api.cublasDestroy_v2(handle); status = CudaRuntimeApi.cudaFree(d_a); status = CudaRuntimeApi.cudaFree(d_b); status = CudaRuntimeApi.cudaFree(d_c); gch_a.Free(); gch_b.Free(); gch_c.Free(); }
public void nppsThreshold_32f_test() { NppStatus nppStatus; cudaError cudaStatus; int width = 128; int height = 24; int length = width * height; float level = 10.0F; IntPtr d_src = Npps.nppsMalloc_32s(length); IntPtr d_dst = Npps.nppsMalloc_32f(length); float[] input = new float[length]; float[] result = new float[length]; float[] line = Array.ConvertAll(Enumerable.Range(0, width).ToArray(), Convert.ToSingle); for (int i = 0; i < height; i++) { Array.Copy(line, 0, input, i * width, width); } UInt64 size = Convert.ToUInt64(sizeof(int) * result.Length); GCHandle gchInput = GCHandle.Alloc(input, GCHandleType.Pinned); IntPtr h_input = Marshal.UnsafeAddrOfPinnedArrayElement(input, 0); cudaStatus = CudaRuntimeApi.cudaMemcpy(d_src, h_input, size, cudaMemcpyKind.HostToDevice); if (cudaStatus != cudaError.cudaSuccess) { Assert.Fail(String.Format("Fail {0}", cudaStatus.ToString())); } nppStatus = Npps.nppsThreshold_32f(d_src, d_dst, length, level, NppCmpOp.NPP_CMP_LESS); if (nppStatus != NppStatus.NPP_SUCCESS) { Assert.Fail(String.Format("Fail {0}", nppStatus.ToString())); } GCHandle gchResult = GCHandle.Alloc(result, GCHandleType.Pinned); IntPtr h_result = Marshal.UnsafeAddrOfPinnedArrayElement(result, 0); cudaStatus = CudaRuntimeApi.cudaMemcpy(h_result, d_dst, size, cudaMemcpyKind.DeviceToHost); if (cudaStatus != cudaError.cudaSuccess) { Assert.Fail(String.Format("Fail {0}", cudaStatus.ToString())); } for (int i = 0; i < result.Length; i++) { if (result[i] < level) { Assert.Fail(String.Format("Fail. level : {0}, value :{1}", level, result[i])); } } gchInput.Free(); gchResult.Free(); Npps.nppsFree(d_src); Npps.nppsFree(d_dst); }