/// <summary> /// Tries cudafying the assembly producing a *.cdfy file with same name as assembly. /// </summary> /// <param name="assembly">The assembly.</param> /// <param name="messages">Output messages of the cudafycl.exe process.</param> /// <param name="arch">The architecture.</param> /// <returns> /// <c>true</c> if successful; otherwise, <c>false</c>. /// </returns> public static bool TryCudafy(this Assembly assembly, out string messages, eArchitecture arch = eArchitecture.sm_12) { var assemblyName = assembly.Location; Process process = new Process(); process.StartInfo.UseShellExecute = false; process.StartInfo.RedirectStandardOutput = true; process.StartInfo.RedirectStandardError = true; process.StartInfo.FileName = "cudafycl.exe"; StringBuilder sb = new StringBuilder(); process.StartInfo.Arguments = string.Format("{0} -arch={1} -cdfy", assemblyName, arch); process.Start(); while (!process.HasExited) { System.Threading.Thread.Sleep(10); } if (process.ExitCode != 0) { messages = process.StandardError.ReadToEnd() + "\r\n"; messages += process.StandardOutput.ReadToEnd(); return(false); } else { messages = process.StandardOutput.ReadToEnd(); return(true); } }
/// <summary> /// Creates a compiler instance for creating 32-bit apps. /// </summary> /// <param name="cudaVersion">The cuda version.</param> /// <param name="arch">Architecture.</param> /// <returns></returns> public static NvccCompilerOptions Createx86(Version cudaVersion, eArchitecture arch) { string progFiles = Utility.ProgramFiles(); string toolkitbasedir = progFiles + Path.DirectorySeparatorChar + csGPUTOOLKIT; Version selVer; string cvStr = GetCudaVersion(cudaVersion, toolkitbasedir, out selVer); if (string.IsNullOrEmpty(cvStr)) { progFiles = "C:\\Program Files"; toolkitbasedir = progFiles + Path.DirectorySeparatorChar + csGPUTOOLKIT; cvStr = GetCudaVersion(cudaVersion, toolkitbasedir); } Debug.WriteLineIf(!string.IsNullOrEmpty(cvStr), "Compiler version: " + cvStr); string gpuToolKit = progFiles + Path.DirectorySeparatorChar + csGPUTOOLKIT + cvStr; string compiler = gpuToolKit + Path.DirectorySeparatorChar + @"bin" + Path.DirectorySeparatorChar + csNVCC; string includeDir = gpuToolKit + Path.DirectorySeparatorChar + @"include"; NvccCompilerOptions opt = new NvccCompilerOptions("NVidia CC (x86)", compiler, includeDir, selVer, ePlatform.x86); if (!opt.TryTest()) { opt = new NvccCompilerOptions("NVidia CC (x86)", csNVCC, string.Empty, selVer, ePlatform.x86); //#if DEBUG // throw new CudafyCompileException("Test failed for NvccCompilerOptions for x86"); //#endif } opt.AddOption("-m32"); opt.Platform = ePlatform.x86; AddArchOptions(opt, arch); return(opt); }
/// <summary> /// Cudafies the assembly producing a *.cdfy file with same name as assembly. /// </summary> /// <param name="assembly">The assembly.</param> /// <param name="arch">The architecture.</param> /// <returns>Output messages of the cudafycl.exe process.</returns> public static string Cudafy(this Assembly assembly, eArchitecture arch = eArchitecture.sm_12) { string messages; if(!TryCudafy(assembly, out messages, arch)) throw new CudafyCompileException(CudafyCompileException.csCOMPILATION_ERROR_X, messages); return messages; }
/// <summary> /// Creates a compiler instance for creating 64-bit apps. /// </summary> /// <param name="cudaVersion">The cuda version or null for auto.</param> /// <param name="arch">Architecture.</param> /// <returns></returns> /// <exception cref="NotSupportedException">ProgramFilesx64 not found.</exception> public static NvccCompilerOptions Createx64(Version cudaVersion, eArchitecture arch) { string progFiles = Utility.ProgramFiles(); string toolkitbasedir = progFiles + Path.DirectorySeparatorChar + csGPUTOOLKIT; Version selVer; string cvStr = GetCudaVersion(cudaVersion, toolkitbasedir, out selVer); Debug.WriteLineIf(!string.IsNullOrEmpty(cvStr), "Compiler version: " + cvStr); string gpuToolKit = progFiles + Path.DirectorySeparatorChar + csGPUTOOLKIT + cvStr;// cudaVersion; string compiler = gpuToolKit + Path.DirectorySeparatorChar + @"bin" + Path.DirectorySeparatorChar + csNVCC; string includeDir = gpuToolKit + Path.DirectorySeparatorChar + @"include"; NvccCompilerOptions opt = new NvccCompilerOptions("NVidia CC (x64)", compiler, includeDir, selVer, ePlatform.x64); if (!opt.TryTest()) { opt = new NvccCompilerOptions("NVidia CC (x64)", csNVCC, string.Empty, selVer, ePlatform.x64); //#if DEBUG // throw new CudafyCompileException("Test failed for NvccCompilerOptions for x64"); //#endif } opt.AddOption("-m64"); //opt.AddOption("-DCUDA_FORCE_API_VERSION=3010"); //For mixed bitness mode //if(Directory.Exists(@"C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include")) // opt.AddOption(@"-I""C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include"""); //else // opt.AddOption(@"-I""C:\Program Files (x86)\Microsoft Visual Studio 9.0\VC\include"""); opt.Platform = ePlatform.x64; AddArchOptions(opt, arch); return(opt); }
public void ExeTestKernel() { GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, 0); eArchitecture arch = gpu.GetArchitecture(); CudafyModule km = CudafyTranslator.Cudafy(arch); gpu.LoadModule(km); int[] host_results = new int[N]; // Either assign a new block of memory to hold results on device var dev_results = gpu.Allocate <int>(N); // Or fill your array with values first and then for (int i = 0; i < N; i++) { host_results[i] = i * 3; } // Copy array with ints to device var dev_filled_results = gpu.CopyToDevice(host_results); // 64*16 = 1024 threads per block (which is max for sm_30) dim3 threadsPerBlock = new dim3(64, 16); // 8*8 = 64 blocks per grid , just for show so you get varying numbers dim3 blocksPerGrid = new dim3(8, 8); //var threadsPerBlock = 1024; // this will only give you blockDim.x = 1024, .y = 0, .z = 0 //var blocksPerGrid = 1; // just for show gpu.Launch(blocksPerGrid, threadsPerBlock, "GenerateRipples", dev_results, dev_filled_results); gpu.CopyFromDevice(dev_results, host_results); }
/// <summary> /// Tries cudafying the assembly producing a *.cdfy file with same name as assembly. /// </summary> /// <param name="assembly">The assembly.</param> /// <param name="messages">Output messages of the cudafycl.exe process.</param> /// <param name="arch">The architecture.</param> /// <returns> /// <c>true</c> if successful; otherwise, <c>false</c>. /// </returns> public static bool TryCudafy(this Assembly assembly, out string messages, eArchitecture arch = eArchitecture.sm_12) { var assemblyName = assembly.Location; Process process = new Process(); process.StartInfo.UseShellExecute = false; process.StartInfo.RedirectStandardOutput = true; process.StartInfo.RedirectStandardError = true; process.StartInfo.FileName = "cudafycl.exe"; StringBuilder sb = new StringBuilder(); process.StartInfo.Arguments = string.Format("{0} -arch={1} -cdfy", assemblyName, arch); process.Start(); while (!process.HasExited) System.Threading.Thread.Sleep(10); if (process.ExitCode != 0) { messages = process.StandardError.ReadToEnd() + "\r\n"; messages += process.StandardOutput.ReadToEnd(); return false; } else { messages = process.StandardOutput.ReadToEnd(); return true; } }
// // http://stackoverflow.com/questions/18628447/cudafy-throws-an-exception-while-testing // private static void BlasSample(int deviceId) { CudafyModes.Target = eGPUType.Emulator; GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, deviceId); CudafyModes.DeviceId = deviceId; eArchitecture arch = gpu.GetArchitecture(); CudafyModule km = CudafyTranslator.Cudafy(arch); gpu.LoadModule(km); GPGPUBLAS blas = GPGPUBLAS.Create(gpu); const int N = 100; float[] a = new float[N]; float[] b = new float[N]; float[] c = new float[N]; float alpha = -1; float beta = 0; float[] device_a = gpu.CopyToDevice(a); float[] device_b = gpu.CopyToDevice(b); float[] device_c = gpu.CopyToDevice(c); int m = 10; int n = 10; int k = 10; cublasOperation Op = cublasOperation.N; blas.GEMM(m, k, n, alpha, device_a, device_b, beta, device_c, Op); throw new NotImplementedException(); }
public static eGPUType GetGPUType(eArchitecture arch) { //return arch.HasFlag((eArchitecture)32768) ? eGPUType.OpenCL : eGPUType.Cuda; if (arch == eArchitecture.Emulator) { return(eGPUType.Emulator); } return((((uint)arch & (uint)32768) == (uint)32768) ? eGPUType.OpenCL : eGPUType.Cuda); }
private static void CreateCudafyModule(string dllName, eArchitecture arch) { var assembly = Assembly.LoadFrom(dllName);//Assembly.LoadFrom(dllName); var types = assembly.GetTypes(); var cm = CudafyTranslator.Cudafy(ePlatform.All, arch, types); var newFilename = Path.ChangeExtension(dllName, "cdfy"); cm.Serialize(newFilename); }
private static Version GetComputeCapability(eArchitecture arch, params Type[] types) { if (arch == eArchitecture.sm_10) { return(new Version(1, 0)); } else if (arch == eArchitecture.sm_11) { return(new Version(1, 1)); } else if (arch == eArchitecture.sm_12) { return(new Version(1, 2)); } else if (arch == eArchitecture.sm_13) { return(new Version(1, 3)); } else if (arch == eArchitecture.sm_20) { return(new Version(2, 0)); } else if (arch == eArchitecture.sm_21) { return(new Version(2, 1)); } else if (arch == eArchitecture.sm_30) { return(new Version(3, 0)); } else if (arch == eArchitecture.sm_35) { return(new Version(3, 5)); } else if (arch == eArchitecture.OpenCL) { return(new Version(1, 0)); } else if (arch == eArchitecture.OpenCL11) { return(new Version(1, 1)); } else if (arch == eArchitecture.OpenCL12) { return(new Version(1, 2)); } else if (arch == eArchitecture.Unknown && Language == eLanguage.OpenCL) { return(new Version(1, 0)); } else if (arch == eArchitecture.Unknown && Language == eLanguage.Cuda) { return(new Version(1, 3)); } throw new ArgumentException("Unknown architecture."); }
/// <summary> /// Cudafies the specified types. Working directory will be as per CudafyTranslator.WorkingDirectory. /// </summary> /// <param name="platform">The platform.</param> /// <param name="arch">The CUDA or OpenCL architecture.</param> /// <param name="cudaVersion">The CUDA version. Specify null to automatically use the highest installed version.</param> /// <param name="compile">if set to <c>true</c> compile to PTX.</param> /// <param name="types">The types.</param> /// <returns>A CudafyModule.</returns> public static CudafyModule Cudafy(ePlatform platform, eArchitecture arch, Version cudaVersion, bool compile, params Type[] types) { var cp = CompilerHelper.Create(ePlatform.Auto, arch, eCudafyCompileMode.Default, WorkingDirectory, GenerateDebug); if (!compile) { cp.CompileMode = eCudafyCompileMode.TranslateOnly; } return(Cudafy(cp, types)); }
/// <summary> /// Cudafies the assembly producing a *.cdfy file with same name as assembly. /// </summary> /// <param name="assembly">The assembly.</param> /// <param name="arch">The architecture.</param> /// <returns>Output messages of the cudafycl.exe process.</returns> public static string Cudafy(this Assembly assembly, eArchitecture arch = eArchitecture.sm_12) { string messages; if (!TryCudafy(assembly, out messages, arch)) { throw new CudafyCompileException(CudafyCompileException.csCOMPILATION_ERROR_X, messages); } return(messages); }
public static eLanguage GetLanguage(eArchitecture arch) { //return (((uint)arch & (uint)eArchitecture.OpenCL) == (uint)32768) ? eLanguage.OpenCL : eLanguage.Cuda; if (arch == eArchitecture.Unknown) { return(CudafyModes.Language); } //return arch.HasFlag((eArchitecture)32768) ? eLanguage.OpenCL : eLanguage.Cuda; return((((uint)arch & (uint)32768) == (uint)32768) ? eLanguage.OpenCL : eLanguage.Cuda); }
/// <summary> /// Cudafies for the specified platform. /// </summary> /// <param name="platform">The platform.</param> /// <param name="arch">The CUDA or OpenCL architecture.</param> /// <returns>A CudafyModule.</returns> public static CudafyModule Cudafy(ePlatform platform, eArchitecture arch) { StackTrace stackTrace = new StackTrace(); Type type = stackTrace.GetFrame(1).GetMethod().ReflectedType; CudafyModule km = CudafyModule.TryDeserialize(type.Name); if (km == null || !km.TryVerifyChecksums()) { km = Cudafy(platform, arch, type); km.Name = type.Name; km.TrySerialize(); } return(km); }
private void btnCompile_Click(object sender, EventArgs e) { try { if (_module == null) { _module = new CudafyModule(); } if (_module != null) { eArchitecture arch = (eArchitecture)Enum.Parse(typeof(eArchitecture), cbArch.SelectedItem as string); if (arch == eArchitecture.OpenCL) { MessageBox.Show(this, "OpenCL modules are not compiled.", "Information", MessageBoxButtons.OK, MessageBoxIcon.Warning); return; } if (!cb32bit.Checked && !cb64bit.Checked) { MessageBox.Show(this, "Select a platform.", "Information", MessageBoxButtons.OK, MessageBoxIcon.Information); return; } _module.SourceCode = tbSource.Text; NvccCompilerOptions opt = null; _module.CompilerOptionsList.Clear(); _module.RemovePTXModules(); if (cb64bit.Checked) { opt = NvccCompilerOptions.Createx64(arch); _module.CompilerOptionsList.Add(opt); } if (cb32bit.Checked) { opt = NvccCompilerOptions.Createx86(arch); _module.CompilerOptionsList.Add(opt); } _module.Compile(eGPUCompiler.CudaNvcc, false); FillForm(); } } catch (Exception ex) { HandleException(ex); } }
public void Test_TwoThreadTwoGPUVer2() { eArchitecture arch = CudafyModes.Target == eGPUType.OpenCL ? eArchitecture.OpenCL : eArchitecture.sm_11; _gpu0 = CudafyHost.GetDevice(CudafyModes.Target, 0); var cm = CudafyTranslator.Cudafy(arch, typeof(MultiGPUTests)); _gpu0.SetCurrentContext(); _gpu0.LoadModule(cm); _gpuuintBufferIn0 = _gpu0.Allocate(_uintBufferIn0); _gpu1 = CudafyHost.GetDevice(CudafyModes.Target, 1); // Cannot load same module to two devices, therefore need to clone. var cm1 = cm.Clone(); _gpu1.SetCurrentContext(); _gpu1.LoadModule(cm1); _gpuuintBufferIn1 = _gpu1.Allocate(_uintBufferIn1); _gpu0.EnableMultithreading(); _gpu1.EnableMultithreading(); bool j1 = false; bool j2 = false; for (int i = 0; i < 10; i++) { Console.WriteLine(i); Thread t1 = new Thread(Test_TwoThreadTwoGPU_Thread0V2); Thread t2 = new Thread(Test_TwoThreadTwoGPU_Thread1V2); t1.Start(); t2.Start(); j1 = t1.Join(10000); j2 = t2.Join(10000); if (!j1 || !j2) { break; } } _gpu0.DisableMultithreading(); _gpu0.FreeAll(); _gpu1.DisableMultithreading(); _gpu1.FreeAll(); Assert.IsTrue(j1); Assert.IsTrue(j2); }
public static CompileProperties Create(ePlatform platform = ePlatform.Auto, eArchitecture arch = eArchitecture.Default, eCudafyCompileMode mode = eCudafyCompileMode.Default, string workingDir = null, bool debugInfo = false) { CompileProperties tp = new CompileProperties(); eLanguage language = GetLanguage(arch); if (language == eLanguage.Cuda) { string progFiles = Utility.ProgramFiles(); tp.CompilerPath = NvccExe.getCompilerPath(); tp.IncludeDirectoryPath = NvccExe.getIncludePath(); tp.PathEnvVarExtraEntries = new string[1] { NvccExe.getClExeDirectory() }; tp.Architecture = (arch == eArchitecture.Unknown) ? eArchitecture.Default : arch; bool binary = ((mode & eCudafyCompileMode.Binary) == eCudafyCompileMode.Binary); string tempFileName = "CUDAFYSOURCETEMP.tmp"; string cuFileName = tempFileName.Replace(".tmp", ".cu"); string outputFileName = tempFileName.Replace(".tmp", binary ? ".cubin" : ".ptx"); tp.InputFile = cuFileName; tp.OutputFile = outputFileName; if ((mode & eCudafyCompileMode.DynamicParallelism) == eCudafyCompileMode.DynamicParallelism) { tp.AdditionalInputArgs = "cudadevrt.lib cublas_device.lib -dlink"; } if (arch == eArchitecture.Emulator) { mode = eCudafyCompileMode.TranslateOnly; } } else { mode = eCudafyCompileMode.TranslateOnly; tp.Architecture = (arch == eArchitecture.Unknown) ? eArchitecture.OpenCL : arch; } tp.WorkingDirectory = Directory.Exists(workingDir) ? workingDir : Environment.CurrentDirectory; tp.Platform = platform; tp.CompileMode = mode; tp.GenerateDebugInfo = debugInfo; return(tp); }
public static void init(eArchitecture archi = eArchitecture.sm_20, bool hasSdk = false, bool generate = false) { if (archi == eArchitecture.Emulator) { CudafyModes.Target = eGPUType.Emulator; } else if (archi >= eArchitecture.OpenCL) { CudafyModes.Target = eGPUType.OpenCL; } if (hasSdk) { // Build the module if (generate || CudafyModes.Target != eGPUType.Cuda) { if (CudafyModes.Target == eGPUType.Cuda) { CudafyTranslator.Language = eLanguage.OpenCL; } km = CudafyTranslator.Cudafy(archi); km.Serialize("bespoke_" + archi); } else { km = new CudafyModule(); km.SourceCode = System.IO.File.ReadAllText("cuda.cu"); km.Compile(eGPUCompiler.CudaNvcc); } } else { // Load the module km = CudafyModule.Deserialize(archi.ToString()); } // pretend it has the function it actually has if (!generate && !km.Functions.ContainsKey("calc_r")) { km.Functions.Add("calc_r", new KernelMethodInfo(typeof(RuneCalc), typeof(RuneCalc).GetMethod("calc_r"), eKernelMethodType.Global, false, eCudafyDummyBehaviour.Default, km)); } gpu = CudafyHost.GetDevice(CudafyModes.Target, 0); gpu.LoadModule(km); }
/// <summary> /// Cudafies the specified types. Working directory will be as per CudafyTranslator.WorkingDirectory. /// </summary> /// <param name="platform">The platform.</param> /// <param name="arch">The CUDA or OpenCL architecture.</param> /// <param name="cudaVersion">The CUDA version. Specify null to automatically use the highest installed version.</param> /// <param name="compile">if set to <c>true</c> compile to PTX.</param> /// <param name="types">The types.</param> /// <returns>A CudafyModule.</returns> public static CudafyModule CudafyOld(ePlatform platform, eArchitecture arch, Version cudaVersion, bool compile, params Type[] types) { CudafyModule km = null; CUDALanguage.ComputeCapability = GetComputeCapability(arch); _architecture = arch; if (arch > eArchitecture.OpenCL) { CudafyTranslator.Language = eLanguage.OpenCL; } km = DoCudafy(null, types); if (km == null) { throw new CudafyFatalException(CudafyFatalException.csUNEXPECTED_STATE_X, "CudafyModule km = null"); } km.WorkingDirectory = WorkingDirectory; if (compile && LanguageSpecifics.Language == eLanguage.Cuda) { if (platform == ePlatform.Auto) { platform = IntPtr.Size == 8 ? ePlatform.x64 : ePlatform.x86; } if (platform != ePlatform.x86) { km.CompilerOptionsList.Add(NvccCompilerOptions.Createx64(cudaVersion, arch)); } if (platform != ePlatform.x64) { km.CompilerOptionsList.Add(NvccCompilerOptions.Createx86(cudaVersion, arch)); } km.GenerateDebug = GenerateDebug; km.TimeOut = TimeOut; km.Compile(eGPUCompiler.CudaNvcc, DeleteTempFiles); } Type lastType = types.Last(t => t != null); if (lastType != null) { km.Name = lastType.Name; } return(km); }
private static void AddArchOptions(CompilerOptions co, eArchitecture arch) { //if (arch == eArchitecture.sm_11) // co.AddOption("-arch=sm_11"); //else if (arch == eArchitecture.sm_12) // co.AddOption("-arch=sm_12"); //else if (arch == eArchitecture.sm_13) // co.AddOption("-arch=sm_13"); //else if (arch == eArchitecture.sm_20) // co.AddOption("-arch=sm_20"); //else if (arch == eArchitecture.sm_21) // co.AddOption("-arch=sm_21"); //else if (arch == eArchitecture.sm_30) // co.AddOption("-arch=sm_30"); //else if (arch == eArchitecture.sm_35) // co.AddOption("-arch=sm_35"); //else // throw new NotImplementedException(arch.ToString()); co.AddOption("-arch=" + arch.ToString()); co.Architecture = arch; }
public static void GPU_generator(List <double> peakList, List <double> intensities, ref double molWeight, double molTolerance) { int I_size = (int)molTolerance * 2 * 10; int[] array = new int[I_size]; Stopwatch aaa = new Stopwatch(); aaa.Start(); GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, 0); eArchitecture arch = gpu.GetArchitecture(); CudafyModule km = CudafyTranslator.Cudafy(arch); gpu.LoadModule(km); double[,] output; int len1; peakList.Sort(); double[] difference = new double[26];// Does not contain modifications !!!!!!!!!!!!!!!!! int length = peakList.Count * peakList.Count; len1 = peakList.Count; double[] peaks = peakList.ToArray(); // peaks int[] len = new int[1]; len[0] = peakList.Count; // length of peaklist output = new double[peakList.Count, peakList.Count]; double[] peaks_d = gpu.Allocate <double>(peaks); int[] len_d = gpu.Allocate <int>(len); double[,] output_d = gpu.Allocate <double>(output); gpu.CopyToDevice(peaks, peaks_d); gpu.CopyToDevice(len, len_d); gpu.CopyToDevice(output, output_d); int block = (int)Math.Ceiling((double)(length * 26 / N)); gpu.Launch(block, N).TG(peaks_d, len_d, output_d); gpu.CopyFromDevice(output_d, output); gpu.FreeAll(); int temp; double[] array2 = new double[I_size]; double value = 0; int max = 0; for (int i = 0; i < peakList.Count; i++) { for (int j = i; j < peakList.Count; j++) { temp = (int)(Math.Round((output[i, j] - (molWeight - molTolerance)), 2) * 10); if (temp >= 0 && temp < array.Length) { add_val(temp, array, output[i, j], array2); } } } for (int i = 0; i < array.Length; i++) { if (array[i] > max) { max = array[i]; value = array2[i] / array[i]; } } molWeight = value; string a = ""; }
public static CompileProperties Create(ePlatform platform = ePlatform.Auto, eArchitecture arch = eArchitecture.sm_13, eCudafyCompileMode mode = eCudafyCompileMode.Default, string workingDir = null, bool debugInfo = false) { CompileProperties tp = new CompileProperties(); eLanguage language = GetLanguage(arch); if (language == eLanguage.Cuda) { // Get ProgramFiles directory and CUDA directories // Get architecture string progFiles = null; switch (platform) { case ePlatform.x64: progFiles = Utility.ProgramFilesx64(); break; case ePlatform.x86: progFiles = Utility.ProgramFilesx86(); break; default: progFiles = Utility.ProgramFiles(); if (platform == ePlatform.Auto) platform = IntPtr.Size == 4 ? ePlatform.x86 : ePlatform.x64; break; } string toolkitbasedir = progFiles + Path.DirectorySeparatorChar + csGPUTOOLKIT; Version selVer; string cvStr = GetCudaVersion(toolkitbasedir, out selVer); if (string.IsNullOrEmpty(cvStr)) throw new CudafyCompileException(CudafyCompileException.csCUDA_DIR_NOT_FOUND); string gpuToolKit = progFiles + Path.DirectorySeparatorChar + csGPUTOOLKIT + cvStr; tp.CompilerPath = gpuToolKit + Path.DirectorySeparatorChar + @"bin" + Path.DirectorySeparatorChar + csNVCC; tp.IncludeDirectoryPath = gpuToolKit + Path.DirectorySeparatorChar + @"include"; tp.Architecture = (arch == eArchitecture.Unknown) ? eArchitecture.sm_13 : arch; bool binary = ((mode & eCudafyCompileMode.Binary) == eCudafyCompileMode.Binary); string tempFileName = "CUDAFYSOURCETEMP.tmp"; string cuFileName = tempFileName.Replace(".tmp", ".cu"); string outputFileName = tempFileName.Replace(".tmp", binary ? ".cubin" : ".ptx"); tp.InputFile = cuFileName; tp.OutputFile = outputFileName; if ((mode & eCudafyCompileMode.DynamicParallelism) == eCudafyCompileMode.DynamicParallelism) { tp.AdditionalInputArgs = "cudadevrt.lib cublas_device.lib -dlink"; } } else { mode = eCudafyCompileMode.TranslateOnly; tp.Architecture = (arch == eArchitecture.Unknown) ? eArchitecture.OpenCL : arch; } tp.WorkingDirectory = Directory.Exists(workingDir) ? workingDir : Environment.CurrentDirectory; tp.Platform = platform; tp.CompileMode = mode; tp.GenerateDebugInfo = debugInfo; return tp; }
/// <summary> /// Determines whether module has binary for the specified platform and architecture. /// </summary> /// <param name="platform">The platform.</param> /// <param name="arch">The architecture.</param> /// <returns> /// <c>true</c> if module has binary for the specified platform and an architecture equal or less than that specified; otherwise, <c>false</c>. /// </returns> public bool HasPTXForPlatform(ePlatform platform, eArchitecture arch) { return _PTXModules.Count(b => b.Platform == platform && b.Architecture <= arch) > 0; }
///// <summary> ///// Determines whether module has binary for the specified platform. ///// </summary> ///// <param name="platform">The platform.</param> ///// <returns> ///// <c>true</c> if module has binary for the specified platform; otherwise, <c>false</c>. ///// </returns> //public bool HasBinaryForPlatform(ePlatform platform) //{ // return _BinaryModules.Count(b => b.Platform == platform) > 0; //} /// <summary> /// Determines whether module has binary for the specified platform and architecture. /// </summary> /// <param name="platform">The platform.</param> /// <param name="arch">The architecture.</param> /// <returns> /// <c>true</c> if module has binary for the specified platform and architecture; otherwise, <c>false</c>. /// </returns> public bool HasBinaryForPlatform(ePlatform platform, eArchitecture arch) { return _BinaryModules.Count(b => b.Platform == platform && b.Architecture == arch) > 0; }
public static CompileProperties Create(ePlatform platform = ePlatform.Auto, eArchitecture arch = eArchitecture.sm_20, eCudafyCompileMode mode = eCudafyCompileMode.Default, string workingDir = null, bool debugInfo = false) { CompileProperties tp = new CompileProperties(); eLanguage language = GetLanguage(arch); if (language == eLanguage.Cuda) { string progFiles = Utility.ProgramFiles(); tp.CompilerPath = NvccExe.getCompilerPath(); tp.IncludeDirectoryPath = NvccExe.getIncludePath(); tp.PathEnvVarExtraEntries = new string[ 1 ] { NvccExe.getClExeDirectory() }; tp.Architecture = (arch == eArchitecture.Unknown) ? eArchitecture.sm_20 : arch; bool binary = ((mode & eCudafyCompileMode.Binary) == eCudafyCompileMode.Binary); string tempFileName = "CUDAFYSOURCETEMP.tmp"; string cuFileName = tempFileName.Replace(".tmp", ".cu"); string outputFileName = tempFileName.Replace(".tmp", binary ? ".cubin" : ".ptx"); tp.InputFile = cuFileName; tp.OutputFile = outputFileName; if ((mode & eCudafyCompileMode.DynamicParallelism) == eCudafyCompileMode.DynamicParallelism) { tp.AdditionalInputArgs = "cudadevrt.lib cublas_device.lib -dlink"; } if (arch == eArchitecture.Emulator) mode = eCudafyCompileMode.TranslateOnly; } else { mode = eCudafyCompileMode.TranslateOnly; tp.Architecture = (arch == eArchitecture.Unknown) ? eArchitecture.OpenCL : arch; } tp.WorkingDirectory = Directory.Exists(workingDir) ? workingDir : Environment.CurrentDirectory; tp.Platform = platform; tp.CompileMode = mode; tp.GenerateDebugInfo = debugInfo; return tp; }
public SourceCodeFile GetSourceCodeFile(eArchitecture arch = eArchitecture.Unknown) { eLanguage language = GetLanguageFromArchitecture(arch); var file = _sourceCodes.Where(scf => scf.Architecture <= arch && scf.Language == language).OrderByDescending(scf => scf.Architecture).FirstOrDefault(); return file; }
private static void CreateCudafyModule(string dllName, eArchitecture arch) { var assembly = Assembly.LoadFrom(dllName);//Assembly.LoadFrom(dllName); var types = assembly.GetTypes(); var cm = CudafyTranslator.Cudafy(ePlatform.All, arch, types); var newFilename = Path.ChangeExtension(dllName, "cdfy"); cm.Serialize(newFilename); }
/// <summary> /// Verifies the checksums of all functions, constants and types. /// </summary> /// <param name="platform">Platform.</param> /// <param name="arch">Architecture.</param> /// <returns>True if checksums match and total number of members is greater than one, else false.</returns> public bool TryVerifyChecksums(ePlatform platform, eArchitecture arch) { if (GetTotalMembers() == 0) return false; if (arch != eArchitecture.Unknown && !HasProgramModuleForPlatform(platform, arch)) return false; foreach (var kvp in Functions) if (kvp.Value.TryVerifyChecksums() == false) return false; foreach (var kvp in Constants) if (kvp.Value.TryVerifyChecksums() == false) return false; foreach (var kvp in Types) if (kvp.Value.TryVerifyChecksums() == false) return false; return true; }
private eLanguage GetLanguageFromArchitecture(eArchitecture arch) { return arch == eArchitecture.OpenCL ? eLanguage.OpenCL : eLanguage.Cuda; }
/// <summary> /// Cudafies the specified types. Working directory will be as per CudafyTranslator.WorkingDirectory. /// </summary> /// <param name="platform">The platform.</param> /// <param name="arch">The CUDA or OpenCL architecture.</param> /// <param name="cudaVersion">The CUDA version. Specify null to automatically use the highest installed version.</param> /// <param name="compile">if set to <c>true</c> compile to PTX.</param> /// <param name="types">The types.</param> /// <returns>A CudafyModule.</returns> public static CudafyModule CudafyOld(ePlatform platform, eArchitecture arch, Version cudaVersion, bool compile, params Type[] types) { CudafyModule km = null; CUDALanguage.ComputeCapability = GetComputeCapability(arch); _architecture = arch; if (arch > eArchitecture.OpenCL) CudafyTranslator.Language = eLanguage.OpenCL; km = DoCudafy(null, types); if (km == null) throw new CudafyFatalException(CudafyFatalException.csUNEXPECTED_STATE_X, "CudafyModule km = null"); km.WorkingDirectory = WorkingDirectory; if (compile && LanguageSpecifics.Language == eLanguage.Cuda) { if (platform == ePlatform.Auto) platform = IntPtr.Size == 8 ? ePlatform.x64 : ePlatform.x86; if (platform != ePlatform.x86) km.CompilerOptionsList.Add(NvccCompilerOptions.Createx64(cudaVersion, arch)); if (platform != ePlatform.x64) km.CompilerOptionsList.Add(NvccCompilerOptions.Createx86(cudaVersion, arch)); km.GenerateDebug = GenerateDebug; km.TimeOut = TimeOut; km.Compile(eGPUCompiler.CudaNvcc, DeleteTempFiles); } Type lastType = types.Last(t => t != null); if(lastType != null) km.Name = lastType.Name; return km; }
/// <summary> /// Creates a default x64 instance for specified architecture. /// </summary> /// <param name="arch">The architecture.</param> /// <returns></returns> public static NvccCompilerOptions Createx64(eArchitecture arch) { return(Createx64(null, arch)); }
/// <summary> /// Cudafies the specified types. Working directory will be as per CudafyTranslator.WorkingDirectory. /// </summary> /// <param name="platform">The platform.</param> /// <param name="arch">The CUDA or OpenCL architecture.</param> /// <param name="cudaVersion">The CUDA version. Specify null to automatically use the highest installed version.</param> /// <param name="compile">if set to <c>true</c> compile to PTX.</param> /// <param name="types">The types.</param> /// <returns>A CudafyModule.</returns> public static CudafyModule Cudafy(ePlatform platform, eArchitecture arch, Version cudaVersion, bool compile, params Type[] types) { var cp = CompilerHelper.Create(ePlatform.Auto, arch, eCudafyCompileMode.Default, WorkingDirectory, GenerateDebug); if (!compile) cp.CompileMode = eCudafyCompileMode.TranslateOnly; return Cudafy(cp, types); }
/// <summary> /// Translates the specified types for the specified architecture without compiling. You can later call Compile method on the CudafyModule. /// </summary> /// <param name="arch">The CUDA or OpenCL architecture.</param> /// <param name="types">The types.</param> /// <returns></returns> public static CudafyModule Translate(eArchitecture arch, params Type[] types) { return(Cudafy(ePlatform.Auto, arch, null, false, types)); }
public static double DTW(double[] _x, double[] _y) { // gpu.EnableMultithreading(); // gpu.EnableSmartCopy(); CudafyModes.Target = eGPUType.Cuda; CudafyTranslator.AllowClasses = true; CudafyTranslator.GenerateDebug = true; gpu = CudafyHost.GetDevice(CudafyModes.Target, 0); arch = gpu.GetArchitecture(); km = CudafyTranslator.Cudafy(arch); // km = CudafyTranslator.Cudafy(arch); // gpu.EnableMultithreading(); // gpu.SetCurrentContext(); gpu.LoadModule(km); return CalcDTW(_x, _y); }
/// <summary> /// Gets the GPU from cache of type implied by specified architecture. Creates one if it does not already exist. /// Sets the current context to the returned device. /// </summary> /// <param name="arch">Architecture type.</param> /// <param name="deviceId">The device id.</param> /// <returns>GPGPU instance.</returns> public static GPGPU GetDevice(eArchitecture arch, int deviceId = 0) { eGPUType type = CompilerHelper.GetGPUType(arch); return GetDevice(type, deviceId); }
public static eLanguage GetLanguage(eArchitecture arch) { //return (((uint)arch & (uint)eArchitecture.OpenCL) == (uint)32768) ? eLanguage.OpenCL : eLanguage.Cuda; if (arch == eArchitecture.Unknown) return CudafyModes.Language; //return arch.HasFlag((eArchitecture)32768) ? eLanguage.OpenCL : eLanguage.Cuda; return (((uint)arch & (uint)32768) == (uint)32768) ? eLanguage.OpenCL : eLanguage.Cuda; }
public SourceCodeFile(string source, eLanguage language, eArchitecture arch) : this() { Source = source; Language = language; Architecture = arch; }
private static Version GetComputeCapability(eArchitecture arch, params Type[] types) { if (arch == eArchitecture.Emulator) return new Version(0, 1); else if (arch == eArchitecture.sm_10) return new Version(1, 0); else if (arch == eArchitecture.sm_11) return new Version(1, 1); else if (arch == eArchitecture.sm_12) return new Version(1, 2); else if (arch == eArchitecture.sm_13) return new Version(1, 3); else if (arch == eArchitecture.sm_20) return new Version(2, 0); else if (arch == eArchitecture.sm_21) return new Version(2, 1); else if (arch == eArchitecture.sm_30) return new Version(3, 0); else if (arch == eArchitecture.sm_35) return new Version(3, 5); else if (arch == eArchitecture.sm_37) return new Version(3, 7); else if (arch == eArchitecture.sm_50) return new Version(5, 0); else if (arch == eArchitecture.sm_52) return new Version(5, 2); else if (arch == eArchitecture.OpenCL) return new Version(1, 0); else if (arch == eArchitecture.OpenCL11) return new Version(1, 1); else if (arch == eArchitecture.OpenCL12) return new Version(1, 2); else if (arch == eArchitecture.Unknown && Language == eLanguage.OpenCL) return new Version(1, 0); else if (arch == eArchitecture.Unknown && Language == eLanguage.Cuda) return new Version(1, 3); throw new ArgumentException("Unknown architecture."); }
/// <summary> /// Cudafies for the specified platform. /// </summary> /// <param name="platform">The platform.</param> /// <param name="arch">The CUDA or OpenCL architecture.</param> /// <returns>A CudafyModule.</returns> public static CudafyModule Cudafy(ePlatform platform, eArchitecture arch) { StackTrace stackTrace = new StackTrace(); Type type = stackTrace.GetFrame(1).GetMethod().ReflectedType; CudafyModule km = CudafyModule.TryDeserialize(type.Name); if (km == null || !km.TryVerifyChecksums()) { km = Cudafy(platform, arch, type); km.Name = type.Name; km.TrySerialize(); } return km; }
/// <summary> /// Determines whether module has PTX or binary for the specified platform. /// </summary> /// <param name="platform">The platform.</param> /// <param name="arch">The architecture.</param> /// <returns> /// <c>true</c> if module has module for the specified values; otherwise, <c>false</c>. /// </returns> public bool HasProgramModuleForPlatform(ePlatform platform, eArchitecture arch) { return HasPTXForPlatform(platform, arch) || HasBinaryForPlatform(platform, arch); }
public static eGPUType GetGPUType(eArchitecture arch) { //return arch.HasFlag((eArchitecture)32768) ? eGPUType.OpenCL : eGPUType.Cuda; if (arch == eArchitecture.Emulator) return eGPUType.Emulator; return (((uint)arch & (uint)32768) == (uint)32768) ? eGPUType.OpenCL : eGPUType.Cuda; }
/// <summary> /// Translates the specified types for the specified architecture without compiling. You can later call Compile method on the CudafyModule. /// </summary> /// <param name="arch">The CUDA or OpenCL architecture.</param> /// <param name="types">The types.</param> /// <returns></returns> public static CudafyModule Translate(eArchitecture arch, params Type[] types) { return Cudafy(ePlatform.Auto, arch, null, false, types); }
public static char[] Execute(String[] keys, string I, int n) { GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, 0); eArchitecture arch = gpu.GetArchitecture(); CudafyModule km = CudafyTranslator.Cudafy(arch); gpu.LoadModule(km); Stopwatch xxxx = new Stopwatch(); xxxx.Start(); StringSearch abb = new StringSearch(keys); string alphabet = "ABCDEFGHI*KLMN*PQRST*VWXYZ"; int alpha = alphabet.Length; int[,] table1 = new int[StringSearch.nodeCount, alpha]; for (int i = 0; i < StringSearch.nodeCount; i++) { for (int j = 0; j < alpha; j++) { table1[i, j] = -1; } } abb.build_table1(table1, abb._root); char[] input = I.ToCharArray(); int length = I.Length; I = ""; int[] output_table = new int[StringSearch.nodeCount]; abb.build_tableO(output_table, abb._root); abb = new StringSearch(); char[] matched_result = new char[length]; xxxx.Stop(); //CudafyModule km = CudafyModule.TryDeserialize(); //if (km == null || !km.TryVerifyChecksums()) //{ // km = CudafyTranslator.Cudafy(); // km.Serialize(); // gpu.LoadModule(km); //} gpu.SetCurrentContext(); int[] tempas = new int[StringSearch.nodeCount]; int[,] tempbab = new int[StringSearch.nodeCount, alpha]; int[,] table1_d = gpu.Allocate <int>(tempbab); int[] output_table_d = gpu.Allocate <int>(tempas); char[] matched_result_d = gpu.Allocate <char>(length); char[] input_d = gpu.Allocate <char>(length); int[] input_length_d = gpu.Allocate <int>(1); int[] input_length = { length }; gpu.CopyToDevice(table1, table1_d); gpu.CopyToDevice(output_table, output_table_d); gpu.CopyToDevice(matched_result, matched_result_d); gpu.CopyToDevice(input, input_d); gpu.CopyToDevice(input_length, input_length_d); int block = (int)Math.Ceiling((double)length / N); gpu.Launch(block, N).Dot(table1_d, output_table_d, matched_result_d, input_d, input_length_d); gpu.CopyFromDevice(matched_result_d, matched_result); gpu.FreeAll(); return(matched_result); }
/// <summary> /// Cudafies the specified types for the specified architecture on automatic platform. /// </summary> /// <param name="arch">The CUDA or OpenCL architecture.</param> /// <param name="types">The types.</param> /// <returns>A CudafyModule.</returns> public static CudafyModule Cudafy(eArchitecture arch, params Type[] types) { return Cudafy(ePlatform.Auto, arch, null, true, types); }
/// <summary> /// Usage: cudafycl.exe myassembly.dll [-arch=sm_11|sm_12|sm_13|sm_20|sm_21|sm_30|sm_35|sm_37|sm_50|sm_52] /// </summary> /// <param name="args"></param> static int Main(string[] args) { if (args.Length < 1) { Console.WriteLine("Usage: cudafycl.exe myassembly.dll [-arch=sm_11|sm_12|sm_13|sm_20|sm_21|sm_30|sm_35|sm_37|sm_50|sm_52] [-cdfy]"); Console.WriteLine("\t-arch: CUDA architecture. Optional. Default is sm_30."); Console.WriteLine("\t-cdfy: cudafy the assembly and create the *.cdfy output file where * is assembly name. Optional."); return(-1); } try { if (!args.Contains(cGUID)) { Process process = new Process(); process.StartInfo.UseShellExecute = false; process.StartInfo.RedirectStandardOutput = true; process.StartInfo.RedirectStandardError = true; process.StartInfo.FileName = "cudafycl.exe"; StringBuilder sb = new StringBuilder(); foreach (var arg in args) { sb.AppendFormat("{0} ", arg); } sb.Append(cGUID); process.StartInfo.Arguments = sb.ToString(); process.Start(); while (!process.HasExited) { System.Threading.Thread.Sleep(10); } if (process.ExitCode != 0) { string s = process.StandardError.ReadToEnd() + "\r\n"; s += process.StandardOutput.ReadToEnd(); throw new CudafyCompileException(CudafyCompileException.csCOMPILATION_ERROR_X, s); } else if (!args.Contains("-cdfy")) { EmbedInAssembly(args[0]); } } else { var arch = args.Where(a => a.StartsWith("-arch")).Select(a => { string[] parts = a.Split('='); eArchitecture ar = eArchitecture.Default; if (parts.Length > 1) { bool pass = Enum.TryParse <eArchitecture>(parts[1], out ar); return(pass ? ar : eArchitecture.Default); } else { return(ar); } } ).FirstOrDefault(); Console.WriteLine(string.Format(@"CreateCudafyModule(""{0}"", {1});", args[0], arch)); CreateCudafyModule(args[0], arch); } } catch (Exception ex) { Console.WriteLine("Error: {0}", ex.ToString()); return(-1); } return(0); }
internal void StoreBinaryFile(string sourceCodeFileId, ePlatform platform, eArchitecture arch, string path) { if(!File.Exists(path)) path = "a_dlink.cubin"; byte[] bytes = File.ReadAllBytes(path); _BinaryModules.Add(new BinaryModule() { Platform = platform, Binary = bytes, Architecture = arch, SourceCodeID = sourceCodeFileId }); }
/// <summary> /// Tries cudafying the assembly producing a *.cdfy file with same name as assembly. /// </summary> /// <param name="assembly">The assembly.</param> /// <param name="arch">The architecture.</param> /// <returns> /// <c>true</c> if successful; otherwise, <c>false</c>. /// </returns> public static bool TryCudafy(this Assembly assembly, eArchitecture arch = eArchitecture.sm_12) { string messages; return TryCudafy(assembly, out messages, arch); }
private static void AddArchOptions(CompilerOptions co, eArchitecture arch) { //if (arch == eArchitecture.sm_11) // co.AddOption("-arch=sm_11"); //else if (arch == eArchitecture.sm_12) // co.AddOption("-arch=sm_12"); //else if (arch == eArchitecture.sm_13) // co.AddOption("-arch=sm_13"); //else if (arch == eArchitecture.sm_20) // co.AddOption("-arch=sm_20"); //else if (arch == eArchitecture.sm_21) // co.AddOption("-arch=sm_21"); //else if (arch == eArchitecture.sm_30) // co.AddOption("-arch=sm_30"); //else if (arch == eArchitecture.sm_35) // co.AddOption("-arch=sm_35"); //else // throw new NotImplementedException(arch.ToString()); co.AddOption("-arch=" + arch.ToString()); co.Architecture = arch; }
static void Main(string[] args) { try { if (args == null || args.Length == 0) { Console.WriteLine("--list for architecture targets"); Console.WriteLine("--print for some SDK/GPU info"); Console.WriteLine("--all to iterate over all target"); return; } if (args?.FirstOrDefault() == "--print") { NvccCompilerOptions nvcc; if (IntPtr.Size == 8) { nvcc = NvccCompilerOptions.Createx64(); } else { nvcc = NvccCompilerOptions.Createx86(); } Console.WriteLine(string.Format("Platform={0}", nvcc.Platform)); Console.WriteLine("CUDA SDK at " + nvcc.CompilerPath); Console.WriteLine("Test: " + nvcc.TryTest()); Console.WriteLine("Press anykey for cards..."); Console.ReadKey(); Console.WriteLine("Reading..."); foreach (var t in new eGPUType[] { eGPUType.Cuda, eGPUType.Emulator, eGPUType.OpenCL }) { CudafyModes.Target = t; printInfo(); } return; } var ars = new eArchitecture[] { eArchitecture.OpenCL, eArchitecture.Emulator, eArchitecture.sm_10, eArchitecture.sm_11, eArchitecture.sm_12, eArchitecture.sm_13, eArchitecture.sm_20, eArchitecture.sm_21, eArchitecture.sm_30, eArchitecture.sm_35, eArchitecture.sm_37, eArchitecture.sm_50, eArchitecture.sm_52, eArchitecture.OpenCL11, eArchitecture.OpenCL12 }; if (args?.FirstOrDefault() == "--list") { Console.WriteLine(string.Join(Environment.NewLine, ars.Select(a => a + ": " + (int)a))); return; } if (args?.FirstOrDefault() != "--all") { ars = args.Select(ii => (eArchitecture)int.Parse(ii)).ToArray(); } foreach (var a in ars) { try { Random rand = new Random(4); Console.WriteLine("Benching " + a); Console.WriteLine("Init: " + MeasureTime(() => RuneCalc.init(a, true, true))); int[] num_runes = new int[] { 79, 103, 81, 93, 88, 90 }; int num_stats = 8; RuneCalc.flat = new int[num_runes.Sum(), 8]; RuneCalc.mult = new int[num_runes.Sum(), 8]; Console.WriteLine("gen: " + MeasureTime(() => { for (int slot = 0; slot < 6; slot++) { for (int rune = 0; rune < num_runes[slot]; rune++) { for (int s = 0; s < num_stats; s++) { if (s < 3) { RuneCalc.flat[rune, s] = rand.Next(0, 50); RuneCalc.mult[rune, s] = rand.Next(0, 20); } else { RuneCalc.flat[rune, s] = rand.Next(0, 10); RuneCalc.mult[rune, s] = 0; } } } } RuneCalc.stat = new int[] { 3500, 530, 403, 101, 15, 50, 15, 0 }; })); Console.WriteLine("seed: " + MeasureTime(() => RuneCalc.Seed())); List <RunData> rd = new List <RunData>(); int reps = 10; int num_builds = 2 << 23; // 2^31 / 32 (int) / 8 (2d size) for (int i = 0; i < reps; i++) { Console.WriteLine("\rRun " + i); Console.Write("Building data\r"); int[,] b = new int[num_builds, 6]; var d = MeasureTime(() => { for (var j = 0; j < num_builds; j++) { int rn = 0; for (int k = 0; k < 6; k++) { b[j, k] = rand.Next(rn, rn + num_runes[k]); rn += num_runes[k]; } } }); var runData = RuneCalc.Execute(b); runData.dataTime = d; rd.Add(runData); Console.CursorTop -= 1; } Console.WriteLine(); Console.WriteLine("Av Dat: " + rd.Average(qr => qr.dataTime)); Console.WriteLine("Av CPU: " + rd.Average(qr => qr.cpuTime)); Console.WriteLine("Av GPU: " + rd.Average(qr => qr.gpuTime)); Console.WriteLine("Av On: " + rd.Average(qr => qr.gpuOn)); Console.WriteLine("Av Off: " + rd.Average(qr => qr.gpuOff)); Console.WriteLine("Av SPD: " + 100 * (1 / (rd.Average(qr => qr.gpuTime) / rd.Average(qr => qr.cpuTime)))); Console.WriteLine("Av Ttl: " + 100 * (1 / ((rd.Average(qr => qr.gpuTime) + rd.Average(qr => qr.gpuOn) + rd.Average(qr => qr.gpuOff)) / rd.Average(qr => qr.cpuTime)))); Console.WriteLine("Av sse: " + rd.Average(qr => qr.sumSq)); Console.WriteLine(); } catch (Exception e) { Console.WriteLine(e.GetType() + ": " + e.Message + Environment.NewLine + e.StackTrace); } } } finally { if (System.Diagnostics.Debugger.IsAttached) { Console.WriteLine("Press anykey to exit..."); Console.Read(); } } }
/// <summary> /// Creates a compiler instance for creating 32-bit apps. /// </summary> /// <param name="cudaVersion">The cuda version.</param> /// <param name="arch">Architecture.</param> /// <returns></returns> public static NvccCompilerOptions Createx86(Version cudaVersion, eArchitecture arch) { string progFiles = Utility.ProgramFiles(); string toolkitbasedir = progFiles + Path.DirectorySeparatorChar + csGPUTOOLKIT; Version selVer; string cvStr = GetCudaVersion(cudaVersion, toolkitbasedir, out selVer); if (string.IsNullOrEmpty(cvStr)) { progFiles = "C:\\Program Files"; toolkitbasedir = progFiles + Path.DirectorySeparatorChar + csGPUTOOLKIT; cvStr = GetCudaVersion(cudaVersion, toolkitbasedir); } Debug.WriteLineIf(!string.IsNullOrEmpty(cvStr), "Compiler version: " + cvStr); string gpuToolKit = progFiles + Path.DirectorySeparatorChar + csGPUTOOLKIT + cvStr; string compiler = gpuToolKit + Path.DirectorySeparatorChar + @"bin" + Path.DirectorySeparatorChar + csNVCC; string includeDir = gpuToolKit + Path.DirectorySeparatorChar + @"include"; NvccCompilerOptions opt = new NvccCompilerOptions("NVidia CC (x86)", compiler, includeDir, selVer, ePlatform.x86); if (!opt.TryTest()) { opt = new NvccCompilerOptions("NVidia CC (x86)", csNVCC, string.Empty, selVer, ePlatform.x86); //#if DEBUG // throw new CudafyCompileException("Test failed for NvccCompilerOptions for x86"); //#endif } opt.AddOption("-m32"); opt.Platform = ePlatform.x86; AddArchOptions(opt, arch); return opt; }
/// <summary> /// Cudafies the specified types for the specified platform. /// </summary> /// <param name="platform">The platform.</param> /// <param name="arch">The CUDA or OpenCL architecture.</param> /// <param name="types">The types.</param> /// <returns>A CudafyModule.</returns> public static CudafyModule Cudafy(ePlatform platform, eArchitecture arch, params Type[] types) { return(Cudafy(platform, arch, null, true, types)); }
/// <summary> /// Creates a default x64 instance for specified architecture. /// </summary> /// <param name="arch">The architecture.</param> /// <returns></returns> public static NvccCompilerOptions Createx64(eArchitecture arch) { return Createx64(null, arch); }
/// <summary> /// Creates a compiler instance for creating 64-bit apps. /// </summary> /// <param name="cudaVersion">The cuda version or null for auto.</param> /// <param name="arch">Architecture.</param> /// <returns></returns> /// <exception cref="NotSupportedException">ProgramFilesx64 not found.</exception> public static NvccCompilerOptions Createx64(Version cudaVersion, eArchitecture arch) { string progFiles = Utility.ProgramFiles(); string toolkitbasedir = progFiles + Path.DirectorySeparatorChar + csGPUTOOLKIT; Version selVer; string cvStr = GetCudaVersion(cudaVersion, toolkitbasedir, out selVer); Debug.WriteLineIf(!string.IsNullOrEmpty(cvStr), "Compiler version: " + cvStr); string gpuToolKit = progFiles + Path.DirectorySeparatorChar + csGPUTOOLKIT + cvStr;// cudaVersion; string compiler = gpuToolKit + Path.DirectorySeparatorChar + @"bin" + Path.DirectorySeparatorChar + csNVCC; string includeDir = gpuToolKit + Path.DirectorySeparatorChar + @"include"; NvccCompilerOptions opt = new NvccCompilerOptions("NVidia CC (x64)", compiler, includeDir, selVer, ePlatform.x64); if (!opt.TryTest()) { opt = new NvccCompilerOptions("NVidia CC (x64)", csNVCC, string.Empty, selVer, ePlatform.x64); //#if DEBUG // throw new CudafyCompileException("Test failed for NvccCompilerOptions for x64"); //#endif } opt.AddOption("-m64"); //opt.AddOption("-DCUDA_FORCE_API_VERSION=3010"); //For mixed bitness mode //if(Directory.Exists(@"C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include")) // opt.AddOption(@"-I""C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include"""); //else // opt.AddOption(@"-I""C:\Program Files (x86)\Microsoft Visual Studio 9.0\VC\include"""); opt.Platform = ePlatform.x64; AddArchOptions(opt, arch); return opt; }
/// <summary> /// Gets the GPU from cache of type implied by specified architecture. Creates one if it does not already exist. /// Sets the current context to the returned device. /// </summary> /// <param name="arch">Architecture type.</param> /// <param name="deviceId">The device id.</param> /// <returns>GPGPU instance.</returns> public static GPGPU GetDevice(eArchitecture arch, int deviceId = 0) { eGPUType type = CompilerHelper.GetGPUType(arch); return(GetDevice(type, deviceId)); }
private static Version GetComputeCapability(eArchitecture arch, params Type[] types) { if (arch == eArchitecture.Emulator) { return(new Version(0, 1)); } else if (arch == eArchitecture.sm_10) { return(new Version(1, 0)); } else if (arch == eArchitecture.sm_11) { return(new Version(1, 1)); } else if (arch == eArchitecture.sm_12) { return(new Version(1, 2)); } else if (arch == eArchitecture.sm_13) { return(new Version(1, 3)); } else if (arch == eArchitecture.sm_20) { return(new Version(2, 0)); } else if (arch == eArchitecture.sm_21) { return(new Version(2, 1)); } else if (arch == eArchitecture.sm_30) { return(new Version(3, 0)); } else if (arch == eArchitecture.sm_35) { return(new Version(3, 5)); } else if (arch == eArchitecture.sm_37) { return(new Version(3, 7)); } else if (arch == eArchitecture.sm_50) { return(new Version(5, 0)); } else if (arch == eArchitecture.sm_52) { return(new Version(5, 2)); } else if (arch == eArchitecture.sm_53) { return(new Version(5, 3)); } else if (arch == eArchitecture.sm_60) { return(new Version(6, 0)); } else if (arch == eArchitecture.sm_61) { return(new Version(6, 1)); } else if (arch == eArchitecture.sm_62) { return(new Version(6, 2)); } else if (arch == eArchitecture.sm_70) { return(new Version(7, 0)); } else if (arch == eArchitecture.sm_72) { return(new Version(7, 2)); } else if (arch == eArchitecture.sm_75) { return(new Version(7, 5)); } else if (arch == eArchitecture.sm_80) { return(new Version(8, 0)); } else if (arch == eArchitecture.OpenCL) { return(new Version(1, 0)); } else if (arch == eArchitecture.OpenCL11) { return(new Version(1, 1)); } else if (arch == eArchitecture.OpenCL12) { return(new Version(1, 2)); } else if (arch == eArchitecture.Unknown && Language == eLanguage.OpenCL) { return(new Version(1, 0)); } else if (arch == eArchitecture.Unknown && Language == eLanguage.Cuda) { return(new Version(1, 3)); } throw new ArgumentException("Unknown architecture."); }
public static void Execute() { GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, 0); eArchitecture arch = Program.testArchitecture; CudafyModule km = CudafyTranslator.Cudafy(arch); gpu.LoadModule(km); int[] a = new int[N]; int[] b = new int[N]; int[] c = new int[N]; // allocate the memory on the GPU int[] dev_a = gpu.Allocate <int>(a); int[] dev_b = gpu.Allocate <int>(b); int[] dev_c = gpu.Allocate <int>(c); // fill the arrays 'a' and 'b' on the CPU for (int i = 0; i < N; i++) { a[i] = i; b[i] = 2 * i; } for (int l = 0; l < km.Functions.Count; l++) { string function = "add_" + l.ToString(); Console.WriteLine(function); // copy the arrays 'a' and 'b' to the GPU gpu.CopyToDevice(a, dev_a); gpu.CopyToDevice(b, dev_b); gpu.Launch(128, 1, function, dev_a, dev_b, dev_c); // copy the array 'c' back from the GPU to the CPU gpu.CopyFromDevice(dev_c, c); // verify that the GPU did the work we requested bool success = true; for (int i = 0; i < N; i++) { if ((a[i] + b[i]) != c[i]) { Console.WriteLine("{0} + {1} != {2}", a[i], b[i], c[i]); success = false; break; } } if (success) { Console.WriteLine("We did it!"); } } // free the memory allocated on the GPU gpu.Free(dev_a); gpu.Free(dev_b); gpu.Free(dev_c); // free the memory we allocated on the CPU // Not necessary, this is .NET }
///// <summary> ///// Determines whether module has binary for the specified platform. ///// </summary> ///// <param name="platform">The platform.</param> ///// <returns> ///// <c>true</c> if module has binary for the specified platform; otherwise, <c>false</c>. ///// </returns> //public bool HasBinaryForPlatform(ePlatform platform) //{ // return _BinaryModules.Count(b => b.Platform == platform) > 0; //} /// <summary> /// Determines whether module has binary for the specified platform and architecture. /// </summary> /// <param name="platform">The platform.</param> /// <param name="arch">The architecture.</param> /// <returns> /// <c>true</c> if module has binary for the specified platform and architecture; otherwise, <c>false</c>. /// </returns> public bool HasBinaryForPlatform(ePlatform platform, eArchitecture arch) { ePlatform currPlatform = platform == ePlatform.Auto ? CurrentPlatform : platform; return _BinaryModules.Count(b => b.Platform == currPlatform && b.Architecture == arch) > 0; }
/// <summary> /// Tries cudafying the assembly producing a *.cdfy file with same name as assembly. /// </summary> /// <param name="assembly">The assembly.</param> /// <param name="arch">The architecture.</param> /// <returns> /// <c>true</c> if successful; otherwise, <c>false</c>. /// </returns> public static bool TryCudafy(this Assembly assembly, eArchitecture arch = eArchitecture.sm_12) { string messages; return(TryCudafy(assembly, out messages, arch)); }
internal void StorePTXFile(string sourceCodeFileId, ePlatform platform, eArchitecture arch, string path) { using (StreamReader sr = File.OpenText(path)) { string ptx = sr.ReadToEnd(); _PTXModules.Add(new PTXModule() { Platform = platform, PTX = ptx, Architecture = arch, SourceCodeID = sourceCodeFileId }); } }
public static CompileProperties Create(ePlatform platform = ePlatform.Auto, eArchitecture arch = eArchitecture.sm_13, eCudafyCompileMode mode = eCudafyCompileMode.Default, string workingDir = null, bool debugInfo = false) { CompileProperties tp = new CompileProperties(); eLanguage language = GetLanguage(arch); if (language == eLanguage.Cuda) { // Get ProgramFiles directory and CUDA directories // Get architecture string progFiles = null; switch (platform) { case ePlatform.x64: progFiles = Utility.ProgramFilesx64(); break; case ePlatform.x86: progFiles = Utility.ProgramFilesx86(); break; default: progFiles = Utility.ProgramFiles(); if (platform == ePlatform.Auto) { platform = IntPtr.Size == 4 ? ePlatform.x86 : ePlatform.x64; } break; } string toolkitbasedir = progFiles + Path.DirectorySeparatorChar + csGPUTOOLKIT; Version selVer; string cvStr = GetCudaVersion(toolkitbasedir, out selVer); if (string.IsNullOrEmpty(cvStr)) { throw new CudafyCompileException(CudafyCompileException.csCUDA_DIR_NOT_FOUND); } string gpuToolKit = progFiles + Path.DirectorySeparatorChar + csGPUTOOLKIT + cvStr; tp.CompilerPath = gpuToolKit + Path.DirectorySeparatorChar + @"bin" + Path.DirectorySeparatorChar + csNVCC; tp.IncludeDirectoryPath = gpuToolKit + Path.DirectorySeparatorChar + @"include"; tp.Architecture = (arch == eArchitecture.Unknown) ? eArchitecture.sm_13 : arch; bool binary = ((mode & eCudafyCompileMode.Binary) == eCudafyCompileMode.Binary); string tempFileName = "CUDAFYSOURCETEMP.tmp"; string cuFileName = tempFileName.Replace(".tmp", ".cu"); string outputFileName = tempFileName.Replace(".tmp", binary ? ".cubin" : ".ptx"); tp.InputFile = cuFileName; tp.OutputFile = outputFileName; if ((mode & eCudafyCompileMode.DynamicParallelism) == eCudafyCompileMode.DynamicParallelism) { tp.AdditionalInputArgs = "cudadevrt.lib cublas_device.lib -dlink"; } } else { mode = eCudafyCompileMode.TranslateOnly; tp.Architecture = (arch == eArchitecture.Unknown) ? eArchitecture.OpenCL : arch; } tp.WorkingDirectory = Directory.Exists(workingDir) ? workingDir : Environment.CurrentDirectory; tp.Platform = platform; tp.CompileMode = mode; tp.GenerateDebugInfo = debugInfo; return(tp); }