public void TestAddCUDA() { //Alloc device memory int count_rows_a = 2; int count_shared = 2; int count_cols_b = 2; double alfa = 1.0; double beta = 1.0; CudaDeviceVariable <double> A = new double[] { 0, 0, 0, 0 }; CudaDeviceVariable <double> B = new double[] { 0, 0, 0, 0 }; CudaDeviceVariable <double> C = new double[] { 0, 0, 0, 0 }; //Clean up CudaBlas blas = new CudaBlas(); blas.Gemm(Operation.NonTranspose, Operation.NonTranspose, count_rows_a, count_shared, count_cols_b, alfa, A, count_rows_a, B, count_shared, beta, C, count_rows_a); //Copy data back to host double[] result = C; ToolsArray.print(result); Assert.AreEqual(0, result[0], 0.001); // Um right this should be moved blas.Dispose(); }
protected virtual void Dispose(bool disposing) { if (disposing && !_disposed) { _blas.Dispose(); _cuda.Dispose(); //if(_solver.IsValueCreated) // _solver.Value.Dispose(); _numerics.Dispose(); _disposed = true; } }
public override void Dispose() { cuBlas.Dispose(); cudaContext.Dispose(); }
static void Main(string[] args) { int N = 275; float[] h_A; float[] h_B; float[] h_C; float[] h_C_ref; CudaDeviceVariable <float> d_A; CudaDeviceVariable <float> d_B; CudaDeviceVariable <float> d_C; float alpha = 1.0f; float beta = 0.0f; int n2 = N * N; int i; float error_norm; float ref_norm; float diff; CudaBlas handle; /* Initialize CUBLAS */ Console.WriteLine("simpleCUBLAS test running."); handle = new CudaBlas(); /* Allocate host memory for the matrices */ h_A = new float[n2]; h_B = new float[n2]; //h_C = new float[n2]; h_C_ref = new float[n2]; Random rand = new Random(0); /* Fill the matrices with test data */ for (i = 0; i < n2; i++) { h_A[i] = (float)rand.NextDouble(); h_B[i] = (float)rand.NextDouble(); //h_C[i] = (float)rand.NextDouble(); } /* Allocate device memory for the matrices */ d_A = new CudaDeviceVariable <float>(n2); d_B = new CudaDeviceVariable <float>(n2); d_C = new CudaDeviceVariable <float>(n2); /* Initialize the device matrices with the host matrices */ d_A.CopyToDevice(h_A); d_B.CopyToDevice(h_B); //d_C.CopyToDevice(h_C); /* Performs operation using plain C code */ simple_sgemm(N, alpha, h_A, h_B, beta, h_C_ref); /* Performs operation using cublas */ handle.Gemm(Operation.NonTranspose, Operation.NonTranspose, N, N, N, alpha, d_A, N, d_B, N, beta, d_C, N); /* Allocate host memory for reading back the result from device memory */ h_C = d_C; /* Check result against reference */ error_norm = 0; ref_norm = 0; for (i = 0; i < n2; ++i) { diff = h_C_ref[i] - h_C[i]; error_norm += diff * diff; ref_norm += h_C_ref[i] * h_C_ref[i]; } error_norm = (float)Math.Sqrt((double)error_norm); ref_norm = (float)Math.Sqrt((double)ref_norm); if (Math.Abs(ref_norm) < 1e-7) { Console.WriteLine("!!!! reference norm is 0"); return; } /* Memory clean up */ d_A.Dispose(); d_B.Dispose(); d_C.Dispose(); /* Shutdown */ handle.Dispose(); if (error_norm / ref_norm < 1e-6f) { Console.WriteLine("simpleCUBLAS test passed."); return; } else { Console.WriteLine("simpleCUBLAS test failed."); return; } }
static void Main(string[] args) { int N = 275; float[] h_A; float[] h_B; float[] h_C; float[] h_C_ref; CudaDeviceVariable<float> d_A; CudaDeviceVariable<float> d_B; CudaDeviceVariable<float> d_C; float alpha = 1.0f; float beta = 0.0f; int n2 = N * N; int i; float error_norm; float ref_norm; float diff; CudaBlas handle; /* Initialize CUBLAS */ Console.WriteLine("simpleCUBLAS test running."); handle = new CudaBlas(); /* Allocate host memory for the matrices */ h_A = new float[n2]; h_B = new float[n2]; //h_C = new float[n2]; h_C_ref = new float[n2]; Random rand = new Random(0); /* Fill the matrices with test data */ for (i = 0; i < n2; i++) { h_A[i] = (float)rand.NextDouble(); h_B[i] = (float)rand.NextDouble(); //h_C[i] = (float)rand.NextDouble(); } /* Allocate device memory for the matrices */ d_A = new CudaDeviceVariable<float>(n2); d_B = new CudaDeviceVariable<float>(n2); d_C = new CudaDeviceVariable<float>(n2); /* Initialize the device matrices with the host matrices */ d_A.CopyToDevice(h_A); d_B.CopyToDevice(h_B); //d_C.CopyToDevice(h_C); /* Performs operation using plain C code */ simple_sgemm(N, alpha, h_A, h_B, beta, h_C_ref); /* Performs operation using cublas */ handle.Gemm(Operation.NonTranspose, Operation.NonTranspose, N, N, N, alpha, d_A, N, d_B, N, beta, d_C, N); /* Allocate host memory for reading back the result from device memory */ h_C = d_C; /* Check result against reference */ error_norm = 0; ref_norm = 0; for (i = 0; i < n2; ++i) { diff = h_C_ref[i] - h_C[i]; error_norm += diff * diff; ref_norm += h_C_ref[i] * h_C_ref[i]; } error_norm = (float)Math.Sqrt((double)error_norm); ref_norm = (float)Math.Sqrt((double)ref_norm); if (Math.Abs(ref_norm) < 1e-7) { Console.WriteLine("!!!! reference norm is 0"); return; } /* Memory clean up */ d_A.Dispose(); d_B.Dispose(); d_C.Dispose(); /* Shutdown */ handle.Dispose(); if (error_norm / ref_norm < 1e-6f) { Console.WriteLine("simpleCUBLAS test passed."); return; } else { Console.WriteLine("simpleCUBLAS test failed."); return; } }