public void SetUp() { _gpu = CudafyHost.CreateDevice(CudafyModes.Target); Console.WriteLine(_gpu.GetDriverVersion()); _fft = GPGPUFFT.Create(_gpu); _hostInput = new float[N * BATCH]; _hostInputCplx = new ComplexF[N * BATCH]; _hostOutput = new float[N * BATCH]; _hostOutputCplx = new ComplexF[N * BATCH]; _devInput = _gpu.Allocate(_hostInput); _devInputCplx = _gpu.Allocate(_hostInputCplx); _devInter = _gpu.Allocate <float>(N * 2 * BATCH); _devInterCplx = _gpu.Allocate <ComplexF>(N * BATCH); _devOutput = _gpu.Allocate(_hostOutput); _devOutputCplx = _gpu.Allocate(_hostOutputCplx); Console.WriteLine(_fft.GetVersion()); for (int b = 0; b < BATCH; b++) { for (int i = 0; i < N; i++) { ComplexF cf = new ComplexF(); cf.x = (float)((10.0F * Math.Sin(100 * 2 * Math.PI * i / N * Math.PI / 180))); cf.y = (float)((10.0F * Math.Sin(200 * 2 * Math.PI * i / N * Math.PI / 180))); _hostInput[i + b * N] = cf.x; _hostInputCplx[i + b * N] = cf; } } }
public void SetUp() { _gpu = CudafyHost.CreateDevice(CudafyModes.Target); _hostInput = new double[N * BATCH]; _hostInputCplx = new ComplexD[N * BATCH]; _hostOutput = new double[N * BATCH]; _hostOutputCplx = new ComplexD[N * BATCH]; _devInput = _gpu.Allocate(_hostInput); _devInputCplx = _gpu.Allocate(_hostInputCplx); _devInter = _gpu.Allocate <double>(N * 2 * BATCH); _devInterCplx = _gpu.Allocate <ComplexD>(N * BATCH); _devOutput = _gpu.Allocate(_hostOutput); _devOutputCplx = _gpu.Allocate(_hostOutputCplx); _fft = GPGPUFFT.Create(_gpu); for (int b = 0; b < BATCH; b++) { for (int i = 0; i < N; i++) { ComplexD cf = new ComplexD(); cf.x = (double)((10.0F * Math.Sin(100 * 2 * Math.PI * i / N * Math.PI / 180))); cf.y = (double)((10.0F * Math.Sin(200 * 2 * Math.PI * i / N * Math.PI / 180))); _hostInput[i + b * N] = cf.x; _hostInputCplx[i + b * N] = cf; } } }
public static IEnumerable <string> TestCUDASDK() { StringBuilder sb = new StringBuilder(); NvccCompilerOptions nvcc = null; if (IntPtr.Size == 8) { nvcc = NvccCompilerOptions.Createx64(); } else { nvcc = NvccCompilerOptions.Createx86(); } yield return(string.Format("Platform={0}", nvcc.Platform)); yield return("Checking for CUDA SDK at " + nvcc.CompilerPath); if (!nvcc.TryTest()) { yield return("Could not locate CUDA Include directory."); } else { yield return(string.Format("CUDA SDK Version={0}", nvcc.Version)); yield return("Attempting to cudafy a kernel function."); var mod = CudafyTranslator.Cudafy(nvcc.Platform, eArchitecture.sm_11, nvcc.Version, false, typeof(CUDACheck)); yield return("Successfully translated to CUDA C."); yield return("Attempting to compile CUDA C code."); string s = mod.Compile(eGPUCompiler.CudaNvcc, true); yield return("Successfully compiled CUDA C into a module."); if (CudafyHost.GetDeviceCount(eGPUType.Cuda) > 0) { yield return("Attempting to instantiate CUDA device object (GPGPU)."); var gpu = CudafyHost.GetDevice(eGPUType.Cuda, 0); yield return("Successfully got CUDA device 0."); yield return("Attempting to load module."); gpu.LoadModule(mod); yield return("Successfully loaded module."); yield return("Attempting to transfer data to GPU."); int[] a = new int[1024]; int[] b = new int[1024]; int[] c = new int[1024]; Random rand = new Random(); for (int i = 0; i < 1024; i++) { a[i] = rand.Next(16384); b[i] = rand.Next(16384); } int[] dev_a = gpu.CopyToDevice(a); int[] dev_b = gpu.CopyToDevice(b); int[] dev_c = gpu.Allocate(c); yield return("Successfully transferred data to GPU."); yield return("Attempting to launch function on GPU."); gpu.Launch(1, 1024).TestKernelFunction(dev_a, dev_b, dev_c); yield return("Successfully launched function on GPU."); yield return("Attempting to transfer results back from GPU."); gpu.CopyFromDevice(dev_c, c); yield return("Successfully transferred results from GPU."); yield return("Testing results."); int errors = 0; for (int i = 0; i < 1024; i++) { if (a[i] + b[i] != c[i]) { errors++; } } if (errors == 0) { yield return("Successfully tested results."); } else { yield return("Test failed - results not as expected."); } yield return("Checking for math libraries (FFT, BLAS, SPARSE, RAND)."); var fft = GPGPUFFT.Create(gpu); int version = fft.GetVersion(); if (version > 0) { yield return("Successfully detected."); } } } }