// returns the queried preferred work group size for a device // moved to external function as we call it also in Main public static ulong getPreferredWorkGroupSize(IntPtr deviceId) { //get preferredWorkGroupSize ulong preferredWorkGroupSize; { CLContext context = new CLContext(deviceId); IntPtr program = context.CreateAndCompileProgram(@"__kernel void get_size() { }"); CLKernel kernel = context.CreateKernel(program, "get_size"); preferredWorkGroupSize = kernel.KernelPreferredWorkGroupSizeMultiple; kernel.Dispose(); OpenTK.Compute.CL10.CL.ReleaseProgram(program); context.Dispose(); } return(preferredWorkGroupSize); }
public void Run(ProgramParameters parms) //int deviceId, int workGroupSize, int workSize, int numThreadsCreateWork, KernelType kernelt, int keysize, IEnumerable<string> patterns) { int deviceId = (int)parms.DeviceId; int workGroupSize = (int)parms.WorkGroupSize; int workSize = (int)parms.WorkSize; int numThreadsCreateWork = (int)parms.CpuThreads; KernelType kernelt = parms.KernelType; int keysize = (int)parms.KeySize; Console.WriteLine("Cooking up some delicions scallions..."); this.workSize = (uint)workSize; profiler = new Profiler(); #region init profiler.StartRegion("init"); // Combine patterns into a single regexp and build one of Richard's objects var rp = new RegexPattern(parms.Regex); // Create bitmasks array for the GPU var gpu_bitmasks = rp.GenerateOnionPatternBitmasksForGpu(MIN_CHARS) .Select(t => TorBase32.ToUIntArray(TorBase32.CreateBase32Mask(t))) .SelectMany(t => t).ToArray(); //Create Hash Table uint[] dataArray; ushort[] hashTable; uint[][] all_patterns; int max_items_per_key = 0; { Func <uint[], ushort> fnv = (pattern_arr) => { uint f = Util.FNVHash(pattern_arr[0], pattern_arr[1], pattern_arr[2]); f = ((f >> 10) ^ f) & (uint)1023; return((ushort)f); }; all_patterns = rp.GenerateOnionPatternsForGpu(7) .Select(i => TorBase32.ToUIntArray(TorBase32.FromBase32Str(i.Replace('.', 'a')))) .ToArray(); var gpu_dict_list = all_patterns .Select(i => new KeyValuePair <ushort, uint>(fnv(i), Util.FNVHash(i[0], i[1], i[2]))) .GroupBy(i => i.Key) .OrderBy(i => i.Key) .ToList(); dataArray = gpu_dict_list.SelectMany(i => i.Select(j => j.Value)).ToArray(); hashTable = new ushort[1024]; //item 1 index, item 2 length int currIndex = 0; foreach (var item in gpu_dict_list) { int len = item.Count(); hashTable[item.Key] = (ushort)currIndex; currIndex += len; if (len > max_items_per_key) { max_items_per_key = len; } } Console.WriteLine("Putting {0} patterns into {1} buckets.", currIndex, gpu_dict_list.Count); } // Set the key size keySize = keysize; // Find kernel name and check key size kernel_type = kernelt; string kernelFileName = null, kernelName = null; switch (kernel_type) { case KernelType.Normal: kernelFileName = "kernel.cl"; kernelName = "normal"; break; case KernelType.Optimized4_9: if (keySize != 1024) { throw new ArgumentException("Kernel only works with keysize 1024."); } kernelFileName = "kernel.cl"; kernelName = "optimized"; break; case KernelType.Optimized4_11: if (keySize != 2048 && keySize != 4096) { throw new ArgumentException("Kernel only works with keysize 2048 or 4096."); } kernelFileName = "kernel.cl"; kernelName = "optimized"; break; default: throw new ArgumentException("Pick a supported kernel."); } Console.WriteLine("Using kernel {0} from file {1} ({2})", kernelName, kernelFileName, kernel_type); //create device context and kernel CLDeviceInfo device = GetDevices()[deviceId]; if ((uint)workGroupSize > device.MaxWorkGroupSize) { workGroupSize = (int)device.MaxWorkGroupSize; } Console.WriteLine("Using work group size {0}", workGroupSize); CLContext context = new CLContext(device.DeviceId); Console.Write("Compiling kernel... "); string kernel_text = KernelGenerator.GenerateKernel(parms, gpu_bitmasks.Length / 3, max_items_per_key, gpu_bitmasks.Take(3).ToArray(), all_patterns[0], all_patterns.Length); if (parms.SaveGeneratedKernelPath != null) { System.IO.File.WriteAllText(parms.SaveGeneratedKernelPath, kernel_text); } IntPtr program = context.CreateAndCompileProgram(kernel_text); var hashes_per_win = 0.5 / rp.GenerateAllOnionPatternsForRegex().Select(t => Math.Pow(2, -5 * t.Count(q => q != '.'))).Sum(); Console.WriteLine("done."); CLKernel kernel = context.CreateKernel(program, kernelName); //Create buffers CLBuffer <uint> bufLastWs; CLBuffer <uint> bufMidstates; CLBuffer <int> bufExpIndexes; CLBuffer <uint> bufResults; { int num_exps = (get_der_len(EXP_MAX) - get_der_len(EXP_MIN) + 1); uint[] LastWs = new uint[num_exps * 16]; uint[] Midstates = new uint[num_exps * 5]; int[] ExpIndexes = new int[num_exps]; uint[] Results = new uint[128]; bufLastWs = context.CreateBuffer(OpenTK.Compute.CL10.MemFlags.MemReadOnly | OpenTK.Compute.CL10.MemFlags.MemCopyHostPtr, LastWs); bufMidstates = context.CreateBuffer(OpenTK.Compute.CL10.MemFlags.MemReadOnly | OpenTK.Compute.CL10.MemFlags.MemCopyHostPtr, Midstates); bufExpIndexes = context.CreateBuffer(OpenTK.Compute.CL10.MemFlags.MemReadOnly | OpenTK.Compute.CL10.MemFlags.MemCopyHostPtr, ExpIndexes); bufResults = context.CreateBuffer(OpenTK.Compute.CL10.MemFlags.MemReadWrite | OpenTK.Compute.CL10.MemFlags.MemCopyHostPtr, Results); } //Create pattern buffers CLBuffer <ushort> bufHashTable; CLBuffer <uint> bufDataArray; CLBuffer <uint> bufBitmasks; { bufHashTable = context.CreateBuffer(OpenTK.Compute.CL10.MemFlags.MemReadOnly | OpenTK.Compute.CL10.MemFlags.MemCopyHostPtr, hashTable); bufDataArray = context.CreateBuffer(OpenTK.Compute.CL10.MemFlags.MemReadOnly | OpenTK.Compute.CL10.MemFlags.MemCopyHostPtr, dataArray); bufBitmasks = context.CreateBuffer(OpenTK.Compute.CL10.MemFlags.MemReadOnly | OpenTK.Compute.CL10.MemFlags.MemCopyHostPtr, gpu_bitmasks); } //Set kernel arguments lock (new object()) { } // Empty lock, resolves (or maybe hides) a race condition in SetKernelArg kernel.SetKernelArg(0, bufLastWs); kernel.SetKernelArg(1, bufMidstates); kernel.SetKernelArg(2, bufResults); kernel.SetKernelArg(3, (uint)EXP_MIN); kernel.SetKernelArg(4, (byte)get_der_len(EXP_MIN)); kernel.SetKernelArg(5, bufExpIndexes); kernel.SetKernelArg(6, bufBitmasks); kernel.SetKernelArg(7, bufHashTable); kernel.SetKernelArg(8, bufDataArray); profiler.EndRegion("init"); bufBitmasks.EnqueueWrite(true); bufHashTable.EnqueueWrite(true); bufDataArray.EnqueueWrite(true); //start the thread to generate input data for (int i = 0; i < numThreadsCreateWork; i++) { Thread inputThread = new Thread(CreateInput); inputThread.Start(); inputThreads.Add(inputThread); } Thread.Sleep(3000); //wait just a bit so some work is available #endregion int loop = 0; var gpu_runtime_sw = System.Diagnostics.Stopwatch.StartNew(); profiler.StartRegion("total without init"); bool success = false; while (!success) { lock (this) { if (this.Abort) { break; } } //abort flag was set.... bail KernelInput input = null; lock (_kernelInput) { if (_kernelInput.Count > 0) { input = _kernelInput.Pop(); } } if (input == null) //If we have run out of work sleep for a bit { Console.WriteLine("Lack of work for the GPU!! Taking a nap!!"); Thread.Sleep(250); continue; } profiler.StartRegion("set buffers"); bufLastWs.Data = input.LastWs; bufMidstates.Data = input.Midstates; bufExpIndexes.Data = input.ExpIndexes; bufResults.Data = input.Results; kernel.SetKernelArg(3, input.BaseExp); profiler.EndRegion("set buffers"); profiler.StartRegion("write buffers"); bufLastWs.EnqueueWrite(true); bufMidstates.EnqueueWrite(true); bufExpIndexes.EnqueueWrite(true); Array.Clear(bufResults.Data, 0, bufResults.Data.Length); bufResults.EnqueueWrite(true); profiler.EndRegion("write buffers"); kernel.EnqueueNDRangeKernel(workSize, workGroupSize); profiler.StartRegion("read results"); bufResults.EnqueueRead(false); profiler.EndRegion("read results"); loop++; Console.Write("\r"); long hashes = (long)workSize * (long)loop; Console.Write("LoopIteration:{0} HashCount:{1:0.00}MH Speed:{2:0.0}MH/s Runtime:{3} Predicted:{4} ", loop, hashes / 1000000.0d, hashes / gpu_runtime_sw.ElapsedMilliseconds / 1000.0d, gpu_runtime_sw.Elapsed.ToString().Split('.')[0], PredictedRuntime(hashes_per_win, hashes * 1000 / gpu_runtime_sw.ElapsedMilliseconds)); profiler.StartRegion("check results"); foreach (var result in input.Results) { if (result != 0) { try { input.Rsa.ChangePublicExponent((BigNumber)result); string onion_hash = input.Rsa.OnionHash; Console.WriteLine("CPU checking hash: {0}", onion_hash); if (rp.DoesOnionHashMatchPattern(onion_hash)) { Console.WriteLine(); Console.WriteLine("Ding!! Delicious scallions for you!!"); Console.WriteLine(); string key = input.Rsa.Rsa.PrivateKeyAsPEM; if (parms.KeyOutputPath != null) { System.IO.File.AppendAllText(parms.KeyOutputPath, "Generated at: " + System.DateTime.Now.ToString("G") + "\n"); System.IO.File.AppendAllText(parms.KeyOutputPath, "Address/Hash: " + onion_hash + ".onion\n"); System.IO.File.AppendAllText(parms.KeyOutputPath, "RSA key: \n" + key + "\n\n"); } Console.WriteLine("Exponent: {0}", result); input.Rsa.ChangePublicExponent((BigNumber)result); Console.WriteLine("Address/Hash: " + onion_hash + ".onion"); Console.WriteLine(); Console.WriteLine(key); Console.WriteLine(); if (!parms.ContinueGeneration) { success = true; } } } catch (OpenSslException /*ex*/) { } } } profiler.EndRegion("check results"); } foreach (var thread in inputThreads) { thread.Abort(); } profiler.EndRegion("total without init"); Console.WriteLine(profiler.GetSummaryString()); Console.WriteLine("{0:0.00} million hashes per second", ((long)loop * (long)workSize * (long)1000) / (double)profiler.GetTotalMS("total without init") / (double)1000000); }
public void Run(ProgramParameters parms) //int deviceId, int workGroupSize, int workSize, int numThreadsCreateWork, KernelType kernelt, int keysize, IEnumerable<string> patterns) { int deviceId = (int)parms.DeviceId; int workGroupSize = (int)parms.WorkGroupSize; int workSize = (int)parms.WorkSize; int numThreadsCreateWork = (int)parms.CpuThreads; KernelType kernelt = parms.KernelType; int keysize = (int)parms.KeySize; Console.WriteLine("Cooking up some delicions scallions..."); this.workSize = (uint)workSize; profiler = new Profiler(); #region init profiler.StartRegion("init"); // Combine patterns into a single regexp and build one of Richard's objects var rp = new RegexPattern(parms.Regex); // Create bitmasks array for the GPU var gpu_bitmasks = rp.GenerateOnionPatternBitmasksForGpu(MIN_CHARS) .Select(t => TorBase32.ToUIntArray(TorBase32.CreateBase32Mask(t))) .SelectMany(t => t).ToArray(); //Create Hash Table uint[] dataArray; ushort[] hashTable; uint[][] all_patterns; int max_items_per_key = 0; { Func <uint[], ushort> fnv = (pattern_arr) => { uint f = Util.FNVHash(pattern_arr[0], pattern_arr[1], pattern_arr[2]); f = ((f >> 10) ^ f) & (uint)1023; return((ushort)f); }; all_patterns = rp.GenerateOnionPatternsForGpu(7) .Select(i => TorBase32.ToUIntArray(TorBase32.FromBase32Str(i.Replace('.', 'a')))) .ToArray(); var gpu_dict_list = all_patterns .Select(i => new KeyValuePair <ushort, uint>(fnv(i), Util.FNVHash(i[0], i[1], i[2]))) .GroupBy(i => i.Key) .OrderBy(i => i.Key) .ToList(); dataArray = gpu_dict_list.SelectMany(i => i.Select(j => j.Value)).ToArray(); hashTable = new ushort[1024]; //item 1 index, item 2 length int currIndex = 0; foreach (var item in gpu_dict_list) { int len = item.Count(); hashTable[item.Key] = (ushort)currIndex; currIndex += len; if (len > max_items_per_key) { max_items_per_key = len; } } Console.WriteLine("Putting {0} patterns into {1} buckets.", currIndex, gpu_dict_list.Count); } // Set the key size keySize = keysize; // Find kernel name and check key size kernel_type = kernelt; string kernelFileName = null, kernelName = null; switch (kernel_type) { case KernelType.Normal: kernelFileName = "kernel.cl"; kernelName = "normal"; break; case KernelType.Optimized4: kernelFileName = "kernel.cl"; kernelName = "optimized"; break; default: throw new ArgumentException("Pick a supported kernel."); } Console.WriteLine("Using kernel {0} from file {1} ({2})", kernelName, kernelFileName, kernel_type); //create device context and kernel CLDeviceInfo device = GetDevices()[deviceId]; if ((uint)workGroupSize > device.MaxWorkGroupSize) { workGroupSize = (int)device.MaxWorkGroupSize; } Console.WriteLine("Using work group size {0}", workGroupSize); CLContext context = new CLContext(device.DeviceId); Console.Write("Compiling kernel... "); string kernel_text = KernelGenerator.GenerateKernel(parms, gpu_bitmasks.Length / 3, max_items_per_key, gpu_bitmasks.Take(3).ToArray(), all_patterns[0], all_patterns.Length, parms.ExponentIndex); if (parms.SaveGeneratedKernelPath != null) { System.IO.File.WriteAllText(parms.SaveGeneratedKernelPath, kernel_text); } IntPtr program = context.CreateAndCompileProgram(kernel_text); var hashes_per_win = 0.5 / rp.GenerateAllOnionPatternsForRegex().Select(t => Math.Pow(2, -5 * t.Count(q => q != '.'))).Sum(); Console.WriteLine("done."); // // Test SHA1 algo // { Console.WriteLine("Testing SHA1 hash..."); CLKernel shaTestKern = context.CreateKernel(program, "shaTest"); CLBuffer <uint> bufSuccess = context.CreateBuffer(OpenTK.Compute.CL10.MemFlags.MemReadWrite | OpenTK.Compute.CL10.MemFlags.MemCopyHostPtr, new uint[5]); shaTestKern.SetKernelArg(0, bufSuccess); shaTestKern.EnqueueNDRangeKernel(workSize, workGroupSize); bufSuccess.EnqueueRead(false); // Calculate the SHA1 CPU-side System.Security.Cryptography.SHA1 sha = new System.Security.Cryptography.SHA1CryptoServiceProvider(); String testdata = "Hello world!"; byte[] cpuhash = sha.ComputeHash(Encoding.ASCII.GetBytes(testdata)); StringBuilder cpuhex = new StringBuilder(cpuhash.Length * 2); foreach (byte b in cpuhash) { cpuhex.AppendFormat("{0:x2}", b); } Console.WriteLine("CPU SHA-1: {0}", cpuhex.ToString()); // Convert the SHA1 GPU-side to hex String gpuhex = String.Format("{0:x8}{1:x8}{2:x8}{3:x8}{4:x8}", bufSuccess.Data[0], bufSuccess.Data[1], bufSuccess.Data[2], bufSuccess.Data[3], bufSuccess.Data[4]); Console.WriteLine("GPU SHA-1: {0}", gpuhex); if (gpuhex != cpuhex.ToString()) { Console.WriteLine(); Console.WriteLine("******************************* ERROR ERROR ERROR *******************************"); Console.WriteLine("* *"); Console.WriteLine("* GPU and CPU SHA-1 calculations do NOT match. *"); Console.WriteLine("* Hashing will NOT work until this is resolved. *"); Console.WriteLine("* The program will continue, but WILL NOT find a valid match. *"); Console.WriteLine("* *"); Console.WriteLine("* See https://github.com/lachesis/scallion/issues/11#issuecomment-29046835 *"); Console.WriteLine("* *"); Console.WriteLine("*********************************************************************************"); Console.WriteLine(); } else { Console.WriteLine("Looks good!"); } } CLKernel kernel = context.CreateKernel(program, kernelName); //Create buffers CLBuffer <uint> bufLastWs; CLBuffer <uint> bufMidstates; CLBuffer <int> bufExpIndexes; CLBuffer <uint> bufResults; { int num_exps = (get_der_len(EXP_MAX) - get_der_len(EXP_MIN) + 1); uint[] LastWs = new uint[num_exps * 16]; uint[] Midstates = new uint[num_exps * 5]; int[] ExpIndexes = new int[num_exps]; uint[] Results = new uint[128]; bufLastWs = context.CreateBuffer(OpenTK.Compute.CL10.MemFlags.MemReadOnly | OpenTK.Compute.CL10.MemFlags.MemCopyHostPtr, LastWs); bufMidstates = context.CreateBuffer(OpenTK.Compute.CL10.MemFlags.MemReadOnly | OpenTK.Compute.CL10.MemFlags.MemCopyHostPtr, Midstates); bufExpIndexes = context.CreateBuffer(OpenTK.Compute.CL10.MemFlags.MemReadOnly | OpenTK.Compute.CL10.MemFlags.MemCopyHostPtr, ExpIndexes); bufResults = context.CreateBuffer(OpenTK.Compute.CL10.MemFlags.MemReadWrite | OpenTK.Compute.CL10.MemFlags.MemCopyHostPtr, Results); } //Create pattern buffers CLBuffer <ushort> bufHashTable; CLBuffer <uint> bufDataArray; CLBuffer <uint> bufBitmasks; { bufHashTable = context.CreateBuffer(OpenTK.Compute.CL10.MemFlags.MemReadOnly | OpenTK.Compute.CL10.MemFlags.MemCopyHostPtr, hashTable); bufDataArray = context.CreateBuffer(OpenTK.Compute.CL10.MemFlags.MemReadOnly | OpenTK.Compute.CL10.MemFlags.MemCopyHostPtr, dataArray); bufBitmasks = context.CreateBuffer(OpenTK.Compute.CL10.MemFlags.MemReadOnly | OpenTK.Compute.CL10.MemFlags.MemCopyHostPtr, gpu_bitmasks); } //Set kernel arguments lock (new object()) { } // Empty lock, resolves (or maybe hides) a race condition in SetKernelArg kernel.SetKernelArg(0, bufLastWs); kernel.SetKernelArg(1, bufMidstates); kernel.SetKernelArg(2, bufResults); kernel.SetKernelArg(3, (uint)EXP_MIN); kernel.SetKernelArg(4, (byte)get_der_len(EXP_MIN)); kernel.SetKernelArg(5, bufExpIndexes); kernel.SetKernelArg(6, bufBitmasks); kernel.SetKernelArg(7, bufHashTable); kernel.SetKernelArg(8, bufDataArray); profiler.EndRegion("init"); bufBitmasks.EnqueueWrite(true); bufHashTable.EnqueueWrite(true); bufDataArray.EnqueueWrite(true); //start the thread to generate input data for (int i = 0; i < numThreadsCreateWork; i++) { Thread inputThread = new Thread(CreateInput); inputThread.Start(); inputThreads.Add(inputThread); } Thread.Sleep(3000); //wait just a bit so some work is available #endregion int loop = 0; var gpu_runtime_sw = System.Diagnostics.Stopwatch.StartNew(); profiler.StartRegion("total without init"); bool success = false; while (!success) { lock (this) { if (this.Abort) { break; } } //abort flag was set.... bail KernelInput input = null; lock (_kernelInput) { if (_kernelInput.Count > 0) { input = _kernelInput.Pop(); } } if (input == null) //If we have run out of work sleep for a bit { Console.WriteLine("Lack of work for the GPU!! Taking a nap!!"); Thread.Sleep(250); continue; } profiler.StartRegion("set buffers"); bufLastWs.Data = input.LastWs; bufMidstates.Data = input.Midstates; bufExpIndexes.Data = input.ExpIndexes; bufResults.Data = input.Results; kernel.SetKernelArg(3, input.BaseExp); profiler.EndRegion("set buffers"); profiler.StartRegion("write buffers"); bufLastWs.EnqueueWrite(true); bufMidstates.EnqueueWrite(true); bufExpIndexes.EnqueueWrite(true); Array.Clear(bufResults.Data, 0, bufResults.Data.Length); bufResults.EnqueueWrite(true); profiler.EndRegion("write buffers"); kernel.EnqueueNDRangeKernel(workSize, workGroupSize); profiler.StartRegion("read results"); bufResults.EnqueueRead(false); profiler.EndRegion("read results"); loop++; Console.Write("\r"); long hashes = (long)workSize * (long)loop; Console.Write("LoopIteration:{0} HashCount:{1:0.00}MH Speed:{2:0.0}MH/s Runtime:{3} Predicted:{4} ", loop, hashes / 1000000.0d, hashes / gpu_runtime_sw.ElapsedMilliseconds / 1000.0d, gpu_runtime_sw.Elapsed.ToString().Split('.')[0], PredictedRuntime(hashes_per_win, hashes * 1000 / gpu_runtime_sw.ElapsedMilliseconds)); profiler.StartRegion("check results"); foreach (var result in input.Results) { if (result != 0) { try { input.Rsa.Rsa.PublicExponent = (BigNumber)result; string onion_hash = input.Rsa.OnionHash; Console.WriteLine("CPU checking hash: {0}", onion_hash); if (rp.DoesOnionHashMatchPattern(onion_hash)) { input.Rsa.ChangePublicExponent(result); OutputKey(input.Rsa); if (!parms.ContinueGeneration) { success = true; } } } catch (OpenSslException /*ex*/) { } } } profiler.EndRegion("check results"); // Mark key as used (if configured) if (parms.UsedModuliFile != null) { parms.UsedModuliFile.WriteLine(input.Rsa.Rsa.PublicModulus.ToDecimalString()); } } foreach (var thread in inputThreads) { thread.Abort(); } profiler.EndRegion("total without init"); Console.WriteLine(profiler.GetSummaryString()); Console.WriteLine("{0:0.00} million hashes per second", ((long)loop * (long)workSize * (long)1000) / (double)profiler.GetTotalMS("total without init") / (double)1000000); }