static void Main(string[] args) { try { if (args.Length > 0) { deviceID = int.Parse(args[0]); } } catch (Exception ex) { Logger.Log(LogLevel.Error, "Device ID parse error"); } try { if (args.Length > 1) { port = int.Parse(args[1]); Comms.ConnectToMaster(port); } else { TEST = true; Logger.CopyToConsole = true; CGraph.ShowCycles = true; } } catch (Exception ex) { Logger.Log(LogLevel.Error, "Master connection error"); } try { if (args.Length > 3) { gpuCount = int.Parse(args[3]); fastCuda = gpuCount <= (Environment.ProcessorCount / 2); if (fastCuda) { Logger.Log(LogLevel.Info, "Using single GPU blocking mode"); } } } catch { } if (TEST) { currentJob = nextJob = new Job() { jobID = 0, k0 = 0xf4956dc403730b01L, k1 = 0xe6d45de39c2a5a3eL, k2 = 0xcbf626a8afee35f6L, k3 = 0x4307b94b1a0c9980L, pre_pow = TestPrePow, timestamp = DateTime.Now }; } else { currentJob = nextJob = new Job() { jobID = 0, k0 = 0xf4956dc403730b01L, k1 = 0xe6d45de39c2a5a3eL, k2 = 0xcbf626a8afee35f6L, k3 = 0x4307b94b1a0c9980L, pre_pow = TestPrePow, timestamp = DateTime.Now }; if (!Comms.IsConnected()) { Console.WriteLine("Master connection failed, aborting"); Logger.Log(LogLevel.Error, "No master connection, exitting!"); return; } if (deviceID < 0) { int devCnt = CudaContext.GetDeviceCount(); GpuDevicesMessage gpum = new GpuDevicesMessage() { devices = new List <GpuDevice>(devCnt) }; for (int i = 0; i < devCnt; i++) { string name = CudaContext.GetDeviceName(i); var info = CudaContext.GetDeviceInfo(i); gpum.devices.Add(new GpuDevice() { deviceID = i, name = name, memory = info.TotalGlobalMemory }); } //Console.WriteLine(devCnt); Comms.gpuMsg = gpum; Comms.SetEvent(); //Console.WriteLine("event fired"); Task.Delay(1000).Wait(); //Console.WriteLine("closing"); Comms.Close(); return; } } try { var assembly = Assembly.GetEntryAssembly(); var resourceStream = assembly.GetManifestResourceStream("CudaSolver.kernel_x64.ptx"); ctx = new CudaContext(deviceID, !fastCuda ? (CUCtxFlags.BlockingSync | CUCtxFlags.MapHost) : CUCtxFlags.MapHost); meanSeedA = ctx.LoadKernelPTX(resourceStream, "FluffySeed2A"); meanSeedA.BlockDimensions = 128; meanSeedA.GridDimensions = 2048; meanSeedA.PreferredSharedMemoryCarveout = CUshared_carveout.MaxShared; meanSeedB = ctx.LoadKernelPTX(resourceStream, "FluffySeed2B"); meanSeedB.BlockDimensions = 128; meanSeedB.GridDimensions = 2048; meanSeedB.PreferredSharedMemoryCarveout = CUshared_carveout.MaxShared; meanSeedB_4 = ctx.LoadKernelPTX(resourceStream, "FluffySeed2B"); meanSeedB_4.BlockDimensions = 128; meanSeedB_4.GridDimensions = 1024; meanSeedB_4.PreferredSharedMemoryCarveout = CUshared_carveout.MaxShared; meanRound = ctx.LoadKernelPTX(resourceStream, "FluffyRound"); meanRound.BlockDimensions = 512; meanRound.GridDimensions = 4096; meanRound.PreferredSharedMemoryCarveout = CUshared_carveout.MaxShared; meanRound_2 = ctx.LoadKernelPTX(resourceStream, "FluffyRound"); meanRound_2.BlockDimensions = 512; meanRound_2.GridDimensions = 2048; meanRound_2.PreferredSharedMemoryCarveout = CUshared_carveout.MaxShared; meanRoundJoin = ctx.LoadKernelPTX(resourceStream, "FluffyRound_J"); meanRoundJoin.BlockDimensions = 512; meanRoundJoin.GridDimensions = 4096; meanRoundJoin.PreferredSharedMemoryCarveout = CUshared_carveout.MaxShared; meanTail = ctx.LoadKernelPTX(resourceStream, "FluffyTail"); meanTail.BlockDimensions = 1024; meanTail.GridDimensions = 4096; meanTail.PreferredSharedMemoryCarveout = CUshared_carveout.MaxL1; meanRecover = ctx.LoadKernelPTX(resourceStream, "FluffyRecovery"); meanRecover.BlockDimensions = 256; meanRecover.GridDimensions = 2048; meanRecover.PreferredSharedMemoryCarveout = CUshared_carveout.MaxL1; } catch (Exception ex) { Logger.Log(LogLevel.Error, "Unable to create kernels: " + ex.Message); Task.Delay(500).Wait(); Comms.Close(); return; } try { d_buffer = new CudaDeviceVariable <ulong>(BUFFER_SIZE_U32); d_bufferMid = new CudaDeviceVariable <ulong>(d_buffer.DevicePointer + (BUFFER_SIZE_B * 8)); d_bufferB = new CudaDeviceVariable <ulong>(d_buffer.DevicePointer + (BUFFER_SIZE_A * 8)); d_indexesA = new CudaDeviceVariable <uint>(INDEX_SIZE * 2); d_indexesB = new CudaDeviceVariable <uint>(INDEX_SIZE * 2); Array.Clear(h_indexesA, 0, h_indexesA.Length); Array.Clear(h_indexesB, 0, h_indexesA.Length); d_indexesA = h_indexesA; d_indexesB = h_indexesB; streamPrimary = new CudaStream(CUStreamFlags.NonBlocking); streamSecondary = new CudaStream(CUStreamFlags.NonBlocking); } catch (Exception ex) { Task.Delay(200).Wait(); Logger.Log(LogLevel.Error, $"Out of video memory! Only {ctx.GetFreeDeviceMemorySize()} free"); Task.Delay(500).Wait(); Comms.Close(); return; } try { AllocateHostMemory(true, ref h_a, ref hAligned_a, 1024 * 1024 * 32); } catch (Exception ex) { Logger.Log(LogLevel.Error, "Unable to create pinned memory."); Task.Delay(500).Wait(); Comms.Close(); return; } int loopCnt = 0; while (!Comms.IsTerminated) { try { if (!TEST && (Comms.nextJob.pre_pow == null || Comms.nextJob.pre_pow == "" || Comms.nextJob.pre_pow == TestPrePow)) { Logger.Log(LogLevel.Info, string.Format("Waiting for job....")); Task.Delay(1000).Wait(); continue; } if (!TEST && ((currentJob.pre_pow != Comms.nextJob.pre_pow) || (currentJob.origin != Comms.nextJob.origin))) { currentJob = Comms.nextJob; currentJob.timestamp = DateTime.Now; } if (!TEST && (currentJob.timestamp.AddMinutes(30) < DateTime.Now) && Comms.lastIncoming.AddMinutes(30) < DateTime.Now) { Logger.Log(LogLevel.Info, string.Format("Job too old...")); Task.Delay(1000).Wait(); continue; } // test runs only once if (TEST && loopCnt++ > 100) { Comms.IsTerminated = true; } Solution s; while (graphSolutions.TryDequeue(out s)) { meanRecover.SetConstantVariable <ulong>("recovery", s.GetUlongEdges()); d_indexesB.MemsetAsync(0, streamPrimary.Stream); meanRecover.RunAsync(streamPrimary.Stream, s.job.k0, s.job.k1, s.job.k2, s.job.k3, d_indexesB.DevicePointer); streamPrimary.Synchronize(); s.nonces = new uint[40]; d_indexesB.CopyToHost(s.nonces, 0, 0, 40 * 4); s.nonces = s.nonces.OrderBy(n => n).ToArray(); lock (Comms.graphSolutionsOut) { Comms.graphSolutionsOut.Enqueue(s); } Comms.SetEvent(); } uint[] count; do { if (!TEST && ((currentJob.pre_pow != Comms.nextJob.pre_pow) || (currentJob.origin != Comms.nextJob.origin))) { currentJob = Comms.nextJob; currentJob.timestamp = DateTime.Now; } currentJob = currentJob.Next(); Logger.Log(LogLevel.Debug, string.Format("GPU NV{4}:Trimming #{4}: {0} {1} {2} {3}", currentJob.k0, currentJob.k1, currentJob.k2, currentJob.k3, currentJob.jobID, deviceID)); timer.Restart(); d_indexesA.MemsetAsync(0, streamPrimary.Stream); d_indexesB.MemsetAsync(0, streamPrimary.Stream); meanSeedA.RunAsync(streamPrimary.Stream, currentJob.k0, currentJob.k1, currentJob.k2, currentJob.k3, d_bufferMid.DevicePointer, d_indexesB.DevicePointer); meanSeedB_4.RunAsync(streamPrimary.Stream, d_bufferMid.DevicePointer, d_buffer.DevicePointer, d_indexesB.DevicePointer, d_indexesA.DevicePointer, 0); meanSeedB_4.RunAsync(streamPrimary.Stream, d_bufferMid.DevicePointer, d_buffer.DevicePointer + ((BUFFER_SIZE_A * 8) / 4) * 1, d_indexesB.DevicePointer, d_indexesA.DevicePointer, 16); meanSeedB_4.RunAsync(streamPrimary.Stream, d_bufferMid.DevicePointer, d_buffer.DevicePointer + ((BUFFER_SIZE_A * 8) / 4) * 2, d_indexesB.DevicePointer, d_indexesA.DevicePointer, 32); meanSeedB_4.RunAsync(streamPrimary.Stream, d_bufferMid.DevicePointer, d_buffer.DevicePointer + ((BUFFER_SIZE_A * 8) / 4) * 3, d_indexesB.DevicePointer, d_indexesA.DevicePointer, 48); d_indexesB.MemsetAsync(0, streamPrimary.Stream); meanRound_2.RunAsync(streamPrimary.Stream, d_buffer.DevicePointer + ((BUFFER_SIZE_A * 8) / 4) * 2, d_bufferB.DevicePointer, d_indexesA.DevicePointer + (2048 * 4), d_indexesB.DevicePointer + (4096 * 4), DUCK_EDGES_A, DUCK_EDGES_B / 2); meanRound_2.RunAsync(streamPrimary.Stream, d_buffer.DevicePointer, d_bufferB.DevicePointer - (BUFFER_SIZE_B * 8), d_indexesA.DevicePointer, d_indexesB.DevicePointer, DUCK_EDGES_A, DUCK_EDGES_B / 2); d_indexesA.MemsetAsync(0, streamPrimary.Stream); meanRoundJoin.RunAsync(streamPrimary.Stream, d_bufferB.DevicePointer - (BUFFER_SIZE_B * 8), d_bufferB.DevicePointer, d_buffer.DevicePointer, d_indexesB.DevicePointer, d_indexesA.DevicePointer, DUCK_EDGES_B / 2, DUCK_EDGES_B / 2); //d_indexesA.MemsetAsync(0, streamPrimary.Stream); //meanRound.RunAsync(streamPrimary.Stream, d_bufferB.DevicePointer, d_buffer.DevicePointer, d_indexesB.DevicePointer, d_indexesA.DevicePointer, DUCK_EDGES_B, DUCK_EDGES_B / 2); d_indexesB.MemsetAsync(0, streamPrimary.Stream); meanRound.RunAsync(streamPrimary.Stream, d_buffer.DevicePointer, d_bufferB.DevicePointer, d_indexesA.DevicePointer, d_indexesB.DevicePointer, DUCK_EDGES_B / 2, DUCK_EDGES_B / 2); d_indexesA.MemsetAsync(0, streamPrimary.Stream); meanRound.RunAsync(streamPrimary.Stream, d_bufferB.DevicePointer, d_buffer.DevicePointer, d_indexesB.DevicePointer, d_indexesA.DevicePointer, DUCK_EDGES_B / 2, DUCK_EDGES_B / 2); d_indexesB.MemsetAsync(0, streamPrimary.Stream); meanRound.RunAsync(streamPrimary.Stream, d_buffer.DevicePointer, d_bufferB.DevicePointer, d_indexesA.DevicePointer, d_indexesB.DevicePointer, DUCK_EDGES_B / 2, DUCK_EDGES_B / 4); for (int i = 0; i < trimRounds; i++) { d_indexesA.MemsetAsync(0, streamPrimary.Stream); meanRound.RunAsync(streamPrimary.Stream, d_bufferB.DevicePointer, d_buffer.DevicePointer, d_indexesB.DevicePointer, d_indexesA.DevicePointer, DUCK_EDGES_B / 4, DUCK_EDGES_B / 4); d_indexesB.MemsetAsync(0, streamPrimary.Stream); meanRound.RunAsync(streamPrimary.Stream, d_buffer.DevicePointer, d_bufferB.DevicePointer, d_indexesA.DevicePointer, d_indexesB.DevicePointer, DUCK_EDGES_B / 4, DUCK_EDGES_B / 4); } d_indexesA.MemsetAsync(0, streamPrimary.Stream); meanTail.RunAsync(streamPrimary.Stream, d_bufferB.DevicePointer, d_buffer.DevicePointer, d_indexesB.DevicePointer, d_indexesA.DevicePointer); ctx.Synchronize(); streamPrimary.Synchronize(); count = new uint[2]; d_indexesA.CopyToHost(count, 0, 0, 8); if (count[0] > 4194304) { // trouble count[0] = 4194304; // log } hAligned_a.AsyncCopyFromDevice(d_buffer.DevicePointer, 0, 0, count[0] * 8, streamPrimary.Stream); streamPrimary.Synchronize(); System.Runtime.InteropServices.Marshal.Copy(hAligned_a.PinnedHostPointer, h_a, 0, ((int)count[0] * 8) / sizeof(int)); timer.Stop(); currentJob.solvedAt = DateTime.Now; currentJob.trimTime = timer.ElapsedMilliseconds; //Console.WriteLine("Trimmed in {0}ms to {1} edges", timer.ElapsedMilliseconds, count[0]); Logger.Log(LogLevel.Info, string.Format("GPU NV{2}: Trimmed in {0}ms to {1} edges, h {3}", timer.ElapsedMilliseconds, count[0], deviceID, currentJob.height)); }while((currentJob.height != Comms.nextJob.height) && (!Comms.IsTerminated) && (!TEST)); if (TEST) { //Console.WriteLine("Trimmed in {0}ms to {1} edges", timer.ElapsedMilliseconds, count[0]); CGraph cg = FinderBag.GetFinder(); if (cg == null) { continue; } cg.SetEdges(h_a, (int)count[0]); cg.SetHeader(currentJob); //currentJob = currentJob.Next(); Task.Factory.StartNew(() => { Stopwatch sw = new Stopwatch(); sw.Start(); if (count[0] < 200000) { try { if (findersInFlight++ < 3) { Stopwatch cycleTime = new Stopwatch(); cycleTime.Start(); cg.FindSolutions(graphSolutions); cycleTime.Stop(); AdjustTrims(cycleTime.ElapsedMilliseconds); if (graphSolutions.Count > 0) { solutions++; } } else { Logger.Log(LogLevel.Warning, "CPU overloaded!"); } } catch (Exception ex) { Logger.Log(LogLevel.Error, "Cycle finder error" + ex.Message); } finally { findersInFlight--; FinderBag.ReturnFinder(cg); } } sw.Stop(); if (++trims % 50 == 0) { Console.ForegroundColor = ConsoleColor.Green; Console.WriteLine("SOLS: {0}/{1} - RATE: {2:F1}", solutions, trims, (float)trims / solutions); Console.ResetColor(); } //Console.WriteLine("Finder completed in {0}ms on {1} edges with {2} solution(s)", sw.ElapsedMilliseconds, count[0], graphSolutions.Count); //Console.WriteLine("Duped edges: {0}", cg.dupes); Logger.Log(LogLevel.Info, string.Format("Finder completed in {0}ms on {1} edges with {2} solution(s) and {3} dupes", sw.ElapsedMilliseconds, count[0], graphSolutions.Count, cg.dupes)); }); //h_indexesA = d_indexesA; //h_indexesB = d_indexesB; //var sumA = h_indexesA.Sum(e => e); //var sumB = h_indexesB.Sum(e => e); ; } else { CGraph cg = FinderBag.GetFinder(); cg.SetEdges(h_a, (int)count[0]); cg.SetHeader(currentJob); Task.Factory.StartNew(() => { if (count[0] < 200000) { try { if (findersInFlight++ < 3) { Stopwatch cycleTime = new Stopwatch(); cycleTime.Start(); cg.FindSolutions(graphSolutions); cycleTime.Stop(); AdjustTrims(cycleTime.ElapsedMilliseconds); if (graphSolutions.Count > 0) { solutions++; } } else { Logger.Log(LogLevel.Warning, "CPU overloaded!"); } } catch (Exception ex) { Logger.Log(LogLevel.Error, "Cycle finder crashed: " + ex.Message); } finally { findersInFlight--; FinderBag.ReturnFinder(cg); } } }); } } catch (Exception ex) { Logger.Log(LogLevel.Error, "Critical error in main cuda loop " + ex.Message); Task.Delay(5000).Wait(); } } // clean up try { Task.Delay(500).Wait(); Comms.Close(); d_buffer.Dispose(); d_indexesA.Dispose(); d_indexesB.Dispose(); streamPrimary.Dispose(); streamSecondary.Dispose(); hAligned_a.Dispose(); if (ctx != null) { ctx.Dispose(); } } catch { } }
static void Main(string[] args) { try { if (args.Length == 1 && args[0].ToLower().Contains("fidelity")) { string[] fseg = args[0].Split(':'); deviceID = int.Parse(fseg[1]); nonce = Int64.Parse(fseg[2]) - 1; range = int.Parse(fseg[3]); QTEST = true; } else { if (args.Length > 0) { deviceID = int.Parse(args[0]); } } } catch (Exception ex) { Logger.Log(LogLevel.Error, "Device ID parse error: " + ex.Message); } try { if (args.Length > 0) { deviceID = int.Parse(args[0]); } } catch (Exception ex) { Logger.Log(LogLevel.Error, "Device ID parse error"); } try { if (args.Length > 1) { port = int.Parse(args[1]); Comms.ConnectToMaster(port); } else { TEST = true; Logger.CopyToConsole = true; CGraph.ShowCycles = true; } } catch (Exception ex) { Logger.Log(LogLevel.Error, "Master connection error"); } try { if (args.Length > 3) { gpuCount = int.Parse(args[3]); fastCuda = gpuCount <= (Environment.ProcessorCount / 2); if (fastCuda) { Logger.Log(LogLevel.Info, "Using single GPU blocking mode"); } } } catch { } if (TEST) { currentJob = nextJob = new Job() { jobID = 0, k0 = 0xf4956dc403730b01L, k1 = 0xe6d45de39c2a5a3eL, k2 = 0xcbf626a8afee35f6L, k3 = 0x4307b94b1a0c9980L, pre_pow = TestPrePow, timestamp = DateTime.Now }; } else { currentJob = nextJob = new Job() { jobID = 0, k0 = 0xf4956dc403730b01L, k1 = 0xe6d45de39c2a5a3eL, k2 = 0xcbf626a8afee35f6L, k3 = 0x4307b94b1a0c9980L, pre_pow = TestPrePow, timestamp = DateTime.Now }; if (!Comms.IsConnected()) { Console.WriteLine("Master connection failed, aborting"); Logger.Log(LogLevel.Error, "No master connection, exitting!"); return; } if (deviceID < 0) { int devCnt = CudaContext.GetDeviceCount(); GpuDevicesMessage gpum = new GpuDevicesMessage() { devices = new List <GpuDevice>(devCnt) }; for (int i = 0; i < devCnt; i++) { string name = CudaContext.GetDeviceName(i); var info = CudaContext.GetDeviceInfo(i); gpum.devices.Add(new GpuDevice() { deviceID = i, name = name, memory = info.TotalGlobalMemory }); } //Console.WriteLine(devCnt); Comms.gpuMsg = gpum; Comms.SetEvent(); //Console.WriteLine("event fired"); Task.Delay(1000).Wait(); //Console.WriteLine("closing"); Comms.Close(); return; } } try { var assembly = Assembly.GetEntryAssembly(); var resourceStream = assembly.GetManifestResourceStream("CudaSolver.kernel_x64.ptx"); ctx = new CudaContext(deviceID, /*!fastCuda ? (CUCtxFlags.BlockingSync | CUCtxFlags.MapHost) :*/ CUCtxFlags.MapHost); string pow = new StreamReader(resourceStream).ReadToEnd(); //pow = File.ReadAllText(@"kernel_x64.ptx"); Turing = ctx.GetDeviceInfo().MaxSharedMemoryPerMultiprocessor == 65536; using (var s = GenerateStreamFromString(pow)) { if (!Turing) { meanSeedA = ctx.LoadKernelPTX(s, "FluffySeed4K", new CUJITOption[] { CUJITOption.MaxRegisters }, new object[] { (uint)40 }); meanSeedA.BlockDimensions = 512; meanSeedA.GridDimensions = 1024; meanSeedA.PreferredSharedMemoryCarveout = CUshared_carveout.MaxShared; meanRound = ctx.LoadKernelPTX(s, "FluffyRound_A2", new CUJITOption[] { CUJITOption.MaxRegisters }, new object[] { (uint)40 }); meanRound.BlockDimensions = 512; meanRound.GridDimensions = 4096; meanRound.PreferredSharedMemoryCarveout = CUshared_carveout.MaxShared; meanRound_4 = ctx.LoadKernelPTX(s, "FluffyRound_A1", new CUJITOption[] { CUJITOption.MaxRegisters }, new object[] { (uint)32 }); meanRound_4.BlockDimensions = 1024; meanRound_4.GridDimensions = 1024; meanRound_4.PreferredSharedMemoryCarveout = CUshared_carveout.MaxShared; meanRoundJoin = ctx.LoadKernelPTX(s, "FluffyRound_A3", new CUJITOption[] { CUJITOption.MaxRegisters }, new object[] { (uint)32 }); meanRoundJoin.BlockDimensions = 1024; meanRoundJoin.GridDimensions = 4096; meanRoundJoin.PreferredSharedMemoryCarveout = CUshared_carveout.MaxShared; meanTail = ctx.LoadKernelPTX(s, "FluffyTail"); meanTail.BlockDimensions = 1024; meanTail.GridDimensions = 4096; meanTail.PreferredSharedMemoryCarveout = CUshared_carveout.MaxL1; meanRecover = ctx.LoadKernelPTX(s, "FluffyRecovery"); meanRecover.BlockDimensions = 256; meanRecover.GridDimensions = 2048; meanRecover.PreferredSharedMemoryCarveout = CUshared_carveout.MaxL1; } else { meanSeedA = ctx.LoadKernelPTX(s, "FluffySeed4K", new CUJITOption[] { CUJITOption.MaxRegisters }, new object[] { (uint)64 }); meanSeedA.BlockDimensions = 512; meanSeedA.GridDimensions = 1024; meanSeedA.PreferredSharedMemoryCarveout = CUshared_carveout.MaxShared; meanRound = ctx.LoadKernelPTX(s, "FluffyRound_C2", new CUJITOption[] { CUJITOption.MaxRegisters }, new object[] { (uint)32 }); meanRound.BlockDimensions = 1024; meanRound.GridDimensions = 4096; meanRound.PreferredSharedMemoryCarveout = CUshared_carveout.MaxShared; meanRound_4 = ctx.LoadKernelPTX(s, "FluffyRound_C1", new CUJITOption[] { CUJITOption.MaxRegisters }, new object[] { (uint)64 }); meanRound_4.BlockDimensions = 1024; meanRound_4.GridDimensions = 1024; meanRound_4.PreferredSharedMemoryCarveout = CUshared_carveout.MaxShared; meanRoundJoin = ctx.LoadKernelPTX(s, "FluffyRound_C3", new CUJITOption[] { CUJITOption.MaxRegisters }, new object[] { (uint)32 }); meanRoundJoin.BlockDimensions = 1024; meanRoundJoin.GridDimensions = 4096; meanRoundJoin.PreferredSharedMemoryCarveout = CUshared_carveout.MaxShared; meanTail = ctx.LoadKernelPTX(s, "FluffyTail"); meanTail.BlockDimensions = 1024; meanTail.GridDimensions = 4096; meanTail.PreferredSharedMemoryCarveout = CUshared_carveout.MaxL1; meanRecover = ctx.LoadKernelPTX(s, "FluffyRecovery"); meanRecover.BlockDimensions = 256; meanRecover.GridDimensions = 2048; meanRecover.PreferredSharedMemoryCarveout = CUshared_carveout.MaxL1; } } } catch (Exception ex) { Logger.Log(LogLevel.Error, "Unable to create kernels: " + ex.Message); Task.Delay(500).Wait(); Comms.Close(); return; } try { d_buffer = new CudaDeviceVariable <ulong>(BUFFER_SIZE_U32 * (temp ? 8 : 1)); d_bufferMid = new CudaDeviceVariable <ulong>(d_buffer.DevicePointer + (BUFFER_SIZE_B * 2)); d_bufferB = new CudaDeviceVariable <ulong>(d_buffer.DevicePointer + (BUFFER_SIZE_B * 8)); d_indexesA = new CudaDeviceVariable <uint>(INDEX_SIZE); d_indexesB = new CudaDeviceVariable <uint>(INDEX_SIZE); d_aux = new CudaDeviceVariable <uint>(512); Array.Clear(h_indexesA, 0, h_indexesA.Length); Array.Clear(h_indexesB, 0, h_indexesA.Length); d_indexesA = h_indexesA; d_indexesB = h_indexesB; streamPrimary = new CudaStream(CUStreamFlags.NonBlocking); } catch (Exception ex) { Task.Delay(200).Wait(); Logger.Log(LogLevel.Error, $"Mem alloc exception. Out of video memory? {ctx.GetFreeDeviceMemorySize()} free"); Task.Delay(500).Wait(); Comms.Close(); return; } try { AllocateHostMemory(true, ref h_a, ref hAligned_a, 1024 * 1024 * 32); } catch (Exception ex) { Logger.Log(LogLevel.Error, "Unable to create pinned memory."); Task.Delay(500).Wait(); Comms.Close(); return; } int loopCnt = 0; while (!Comms.IsTerminated) { try { if (!TEST && (Comms.nextJob.pre_pow == null || Comms.nextJob.pre_pow == "" || Comms.nextJob.pre_pow == TestPrePow)) { Logger.Log(LogLevel.Info, string.Format("Waiting for job....")); Task.Delay(1000).Wait(); continue; } if (!TEST && ((currentJob.pre_pow != Comms.nextJob.pre_pow) || (currentJob.origin != Comms.nextJob.origin))) { currentJob = Comms.nextJob; currentJob.timestamp = DateTime.Now; } if (!TEST && (currentJob.timestamp.AddMinutes(30) < DateTime.Now) && Comms.lastIncoming.AddMinutes(30) < DateTime.Now) { Logger.Log(LogLevel.Info, string.Format("Job too old...")); Task.Delay(1000).Wait(); continue; } // test runs only once if (TEST && ++loopCnt >= range) { Comms.IsTerminated = true; } Solution s; while (graphSolutions.TryDequeue(out s)) { meanRecover.SetConstantVariable <ulong>("recovery", s.GetUlongEdges()); d_indexesB.MemsetAsync(0, streamPrimary.Stream); meanRecover.RunAsync(streamPrimary.Stream, s.job.k0, s.job.k1, s.job.k2, s.job.k3, d_indexesB.DevicePointer); streamPrimary.Synchronize(); s.nonces = new uint[32]; d_indexesB.CopyToHost(s.nonces, 0, 0, 32 * 4); s.nonces = s.nonces.OrderBy(n => n).ToArray(); //fidelity = (32-cycles_found / graphs_searched) * 32 solutions++; s.fidelity = ((double)solutions / (double)trims) * 32.0; //Console.WriteLine(s.fidelity.ToString("0.000")); if (Comms.IsConnected()) { Comms.graphSolutionsOut.Enqueue(s); Comms.SetEvent(); } if (QTEST) { Console.ForegroundColor = ConsoleColor.Red; Console.WriteLine($"Solution for nonce {s.job.nonce}: {string.Join(' ', s.nonces)}"); Console.ResetColor(); } } if (QTEST) { currentJob = currentJob.NextSequential(ref nonce); Console.WriteLine($"Nonce: {nonce} K0: {currentJob.k0:X} K1: {currentJob.k1:X} K2: {currentJob.k2:X} K3: {currentJob.k3:X}"); } else { currentJob = currentJob.Next(); } Logger.Log(LogLevel.Debug, string.Format("GPU NV{4}:Trimming #{4}: {0} {1} {2} {3}", currentJob.k0, currentJob.k1, currentJob.k2, currentJob.k3, currentJob.jobID, deviceID)); timer.Restart(); d_indexesA.MemsetAsync(0, streamPrimary.Stream); d_indexesB.MemsetAsync(0, streamPrimary.Stream); d_aux.MemsetAsync(0, streamPrimary.Stream); meanSeedA.RunAsync(streamPrimary.Stream, currentJob.k0, currentJob.k1, currentJob.k2, currentJob.k3, d_bufferMid.DevicePointer, d_indexesB.DevicePointer, 0); meanSeedA.RunAsync(streamPrimary.Stream, currentJob.k0, currentJob.k1, currentJob.k2, currentJob.k3, d_bufferMid.DevicePointer + ((BUFFER_SIZE_A * 8) / 4 / 4) * 1, d_indexesB.DevicePointer + (4096 * 4), EDGE_SEG); meanSeedA.RunAsync(streamPrimary.Stream, currentJob.k0, currentJob.k1, currentJob.k2, currentJob.k3, d_bufferMid.DevicePointer + ((BUFFER_SIZE_A * 8) / 4 / 4) * 2, d_indexesB.DevicePointer + (4096 * 8), EDGE_SEG * 2); meanSeedA.RunAsync(streamPrimary.Stream, currentJob.k0, currentJob.k1, currentJob.k2, currentJob.k3, d_bufferMid.DevicePointer + ((BUFFER_SIZE_A * 8) / 4 / 4) * 3, d_indexesB.DevicePointer + (4096 * 12), EDGE_SEG * 3); meanRound_4.RunAsync(streamPrimary.Stream, d_bufferMid.DevicePointer, d_buffer.DevicePointer, d_indexesB.DevicePointer, d_indexesA.DevicePointer, DUCK_EDGES_A / 4, DUCK_EDGES_B / 4, 0); meanRound_4.RunAsync(streamPrimary.Stream, d_bufferMid.DevicePointer, d_buffer.DevicePointer + ((BUFFER_SIZE_B * 8) / 4) * 1, d_indexesB.DevicePointer, d_indexesA.DevicePointer, DUCK_EDGES_A / 4, DUCK_EDGES_B / 4, 1024); meanRound_4.RunAsync(streamPrimary.Stream, d_bufferMid.DevicePointer, d_buffer.DevicePointer + ((BUFFER_SIZE_B * 8) / 4) * 2, d_indexesB.DevicePointer, d_indexesA.DevicePointer, DUCK_EDGES_A / 4, DUCK_EDGES_B / 4, 2048); meanRound_4.RunAsync(streamPrimary.Stream, d_bufferMid.DevicePointer, d_buffer.DevicePointer + ((BUFFER_SIZE_B * 8) / 4) * 3, d_indexesB.DevicePointer, d_indexesA.DevicePointer, DUCK_EDGES_A / 4, DUCK_EDGES_B / 4, 3072); //streamPrimary.Synchronize(); //h_indexesA = d_indexesA; //h_indexesB = d_indexesB; //var sumA = h_indexesA.Sum(e => e); //var sumB = h_indexesB.Sum(e => e); //streamPrimary.Synchronize(); d_indexesB.MemsetAsync(0, streamPrimary.Stream); meanRoundJoin.RunAsync(streamPrimary.Stream, d_buffer.DevicePointer, d_buffer.DevicePointer + ((BUFFER_SIZE_B * 8) / 4) * 1, d_buffer.DevicePointer + ((BUFFER_SIZE_B * 8) / 4) * 2, d_buffer.DevicePointer + ((BUFFER_SIZE_B * 8) / 4) * 3, d_bufferB.DevicePointer, d_indexesA.DevicePointer, d_indexesB.DevicePointer, DUCK_EDGES_B / 4, DUCK_EDGES_B / 2); d_indexesA.MemsetAsync(0, streamPrimary.Stream); meanRound.RunAsync(streamPrimary.Stream, d_bufferB.DevicePointer, d_buffer.DevicePointer, d_indexesB.DevicePointer, d_indexesA.DevicePointer, DUCK_EDGES_B / 2, DUCK_EDGES_B / 2, 0, d_aux.DevicePointer); d_indexesB.MemsetAsync(0, streamPrimary.Stream); meanRound.RunAsync(streamPrimary.Stream, d_buffer.DevicePointer, d_bufferB.DevicePointer, d_indexesA.DevicePointer, d_indexesB.DevicePointer, DUCK_EDGES_B / 2, DUCK_EDGES_B / 2, 1, d_aux.DevicePointer); d_indexesA.MemsetAsync(0, streamPrimary.Stream); meanRound.RunAsync(streamPrimary.Stream, d_bufferB.DevicePointer, d_buffer.DevicePointer, d_indexesB.DevicePointer, d_indexesA.DevicePointer, DUCK_EDGES_B / 2, DUCK_EDGES_B / 2, 2, d_aux.DevicePointer); d_indexesB.MemsetAsync(0, streamPrimary.Stream); meanRound.RunAsync(streamPrimary.Stream, d_buffer.DevicePointer, d_bufferB.DevicePointer, d_indexesA.DevicePointer, d_indexesB.DevicePointer, DUCK_EDGES_B / 2, DUCK_EDGES_B / 4, 3, d_aux.DevicePointer); for (int i = 0; i < (TEST ? 80 : trimRounds); i++) //for (int i = 0; i < 85; i++) { d_indexesA.MemsetAsync(0, streamPrimary.Stream); meanRound.RunAsync(streamPrimary.Stream, d_bufferB.DevicePointer, d_buffer.DevicePointer, d_indexesB.DevicePointer, d_indexesA.DevicePointer, DUCK_EDGES_B / 4, DUCK_EDGES_B / 4, i * 2 + 4, d_aux.DevicePointer); d_indexesB.MemsetAsync(0, streamPrimary.Stream); meanRound.RunAsync(streamPrimary.Stream, d_buffer.DevicePointer, d_bufferB.DevicePointer, d_indexesA.DevicePointer, d_indexesB.DevicePointer, DUCK_EDGES_B / 4, DUCK_EDGES_B / 4, i * 2 + 5, d_aux.DevicePointer); } d_indexesA.MemsetAsync(0, streamPrimary.Stream); meanTail.RunAsync(streamPrimary.Stream, d_bufferB.DevicePointer, d_buffer.DevicePointer, d_indexesB.DevicePointer, d_indexesA.DevicePointer); Task.Delay((int)lastTrimMs).Wait(); streamPrimary.Synchronize(); uint[] count = new uint[2]; d_indexesA.CopyToHost(count, 0, 0, 8); if (count[0] > 131071) { // trouble count[0] = 131071; // log } hAligned_a.AsyncCopyFromDevice(d_buffer.DevicePointer, 0, 0, count[0] * 8, streamPrimary.Stream); streamPrimary.Synchronize(); System.Runtime.InteropServices.Marshal.Copy(hAligned_a.PinnedHostPointer, h_a, 0, ((int)count[0] * 8) / sizeof(int)); trims++; timer.Stop(); lastTrimMs = (long)Math.Min(Math.Max((float)timer.ElapsedMilliseconds * 0.9f, 50), 500); currentJob.solvedAt = DateTime.Now; currentJob.trimTime = timer.ElapsedMilliseconds; //Console.WriteLine("Trimmed in {0}ms to {1} edges", timer.ElapsedMilliseconds, count[0]); Logger.Log(LogLevel.Info, string.Format("GPU NV{2}: Trimmed in {0}ms to {1} edges", timer.ElapsedMilliseconds, count[0], deviceID)); FinderBag.RunFinder(TEST, ref trims, count[0], h_a, currentJob, graphSolutions, timer); if (trims % 50 == 0 && TEST) { Console.ForegroundColor = ConsoleColor.Green; Console.WriteLine("SOLS: {0}/{1} - RATE: {2:F1}", solutions, trims, (float)trims / solutions); Console.ResetColor(); } /* * if (TEST) * { * //Console.WriteLine("Trimmed in {0}ms to {1} edges", timer.ElapsedMilliseconds, count[0]); * * CGraph cg = FinderBag.GetFinder(); * cg.SetEdges(h_a, (int)count[0]); * cg.SetHeader(currentJob); * * //currentJob = currentJob.Next(); * * Task.Factory.StartNew(() => * { * Stopwatch sw = new Stopwatch(); * sw.Start(); * * if (count[0] < 131071) * { * try * { * if (findersInFlight++ < 3) * { * Stopwatch cycleTime = new Stopwatch(); * cycleTime.Start(); * cg.FindSolutions(graphSolutions); * cycleTime.Stop(); * AdjustTrims(cycleTime.ElapsedMilliseconds); * //if (graphSolutions.Count > 0) solutions++; * } * else * Logger.Log(LogLevel.Warning, "CPU overloaded!"); * } * catch (Exception ex) * { * Logger.Log(LogLevel.Error, "Cycle finder error" + ex.Message); * } * finally * { * FinderBag.ReturnFinder(cg); * findersInFlight--; * } * } * * sw.Stop(); * * if (trims % 50 == 0) * { * Console.ForegroundColor = ConsoleColor.Green; * Console.WriteLine("SOLS: {0}/{1} - RATE: {2:F1}", solutions, trims, (float)trims/solutions ); * Console.ResetColor(); * } * //Console.WriteLine("Finder completed in {0}ms on {1} edges with {2} solution(s)", sw.ElapsedMilliseconds, count[0], graphSolutions.Count); * //Console.WriteLine("Duped edges: {0}", cg.dupes); * if (!QTEST) * Logger.Log(LogLevel.Info, string.Format("Finder completed in {0}ms on {1} edges with {2} solution(s) and {3} dupes", sw.ElapsedMilliseconds, count[0], graphSolutions.Count, cg.dupes)); * }); * * //h_indexesA = d_indexesA; * //h_indexesB = d_indexesB; * * //var sumA = h_indexesA.Sum(e => e); * //var sumB = h_indexesB.Sum(e => e); * * ; * } * else * { * CGraph cg = FinderBag.GetFinder(); * cg.SetEdges(h_a, (int)count[0]); * cg.SetHeader(currentJob); * * Task.Factory.StartNew(() => * { * if (count[0] < 131071) * { * try * { * if (findersInFlight++ < 3) * { * Stopwatch cycleTime = new Stopwatch(); * cycleTime.Start(); * cg.FindSolutions(graphSolutions); * cycleTime.Stop(); * AdjustTrims(cycleTime.ElapsedMilliseconds); * } * else * Logger.Log(LogLevel.Warning, "CPU overloaded!"); * } * catch (Exception ex) * { * Logger.Log(LogLevel.Warning, "Cycle finder crashed: " + ex.Message); * } * finally * { * FinderBag.ReturnFinder(cg); * findersInFlight--; * } * } * }); * } * */ } catch (Exception ex) { Logger.Log(LogLevel.Error, "Critical error in main cuda loop " + ex.Message); Task.Delay(500).Wait(); break; } } // clean up try { Task.Delay(500).Wait(); Comms.Close(); d_buffer.Dispose(); d_indexesA.Dispose(); d_indexesB.Dispose(); d_aux.Dispose(); streamPrimary.Dispose(); streamSecondary.Dispose(); hAligned_a.Dispose(); if (ctx != null) { ctx.Dispose(); } } catch { } }