static void Main(string[] args) { const int POISON_QUEUE_MAX_TRY = 5; Console.WriteLine("====== WORKER ======"); using var fromFrontQueue = new PullSocket($">{ThreadSafeFactory.FrontQueue}"); using var toBackQueue = new PushSocket($">{ThreadSafeFactory.BackUrlQueue}"); using var toPoisonQueue = new PushSocket($">{ThreadSafeFactory.BackPosionQueue}"); Console.WriteLine("Initialized Socket"); //process tasks forever var frontQueueProcessor = Task.Run(() => { while (true) { try { Console.WriteLine($"Waiting for front queue"); var workload = fromFrontQueue.ReceiveFrameString(); UrlQueueItem queueItem = workload.FromJson <UrlQueueItem>(); Console.WriteLine($"Request from frontqueue {queueItem.Url}"); _urlQueue.Enqueue(queueItem); } catch (Exception ex) { Console.WriteLine($"Error in Front Queue Processor {ex.Message}"); } } }); var processUrlTask = Task.Run(() => { while (true) { Parallel.For(0, 2 * maxThread / 3, (threadIndex) => { //while (_urlQueue.IsEmpty) //{ // Task.Delay(5000).Wait(); //} if (_urlQueue.TryDequeue(out UrlQueueItem url)) { Console.WriteLine($"Processing {url.Url}"); url.CrawlStart = DateTime.UtcNow; try { foreach (var childUrl in _htmlRenderer.FindChildLinks(url)) { if (!_visitedUrls.ContainsKey(childUrl)) { _visitedUrls.TryAdd(childUrl, false); Console.WriteLine($"Sending to backqueue"); toBackQueue.SendFrame(childUrl); Console.WriteLine($"Sending to backqueue"); } } } catch (Exception ex) { var u = url.Clone() as UrlPosionQueueItem; u.TryCount = 0; u.CrawlFinished = DateTime.UtcNow; u.Errors = new List <string> { ex.Message + "|" + ex?.InnerException?.Message }; _urlPoisonQueue.Enqueue(u); Console.WriteLine($"Error in Url Processor {ex.Message}"); } url.CrawlFinished = DateTime.UtcNow; } }); }
static void Main(string[] seed) { Console.WriteLine("====== FRONT QUEUE ======"); using var toWorkers = new PushSocket($"@{ThreadSafeFactory.FrontQueue}"); using var fromBackQueue = new PullSocket($">{ThreadSafeFactory.BackQueue}"); Console.WriteLine("Socket initialized"); foreach (var s in seed) { _urlQueue.Enqueue(new UrlQueueItem { Url = s }); } var backQueueProcessor = Task.Run(() => { // Receive from backqueue while (true) { Console.WriteLine($"Waiting for backqueue"); var workload = fromBackQueue.ReceiveFrameString(); Console.WriteLine($"Got message from backqueue"); UrlQueueItem queueItem = new UrlQueueItem { Url = workload }; if (!_urlVisiting.ContainsKey(queueItem.Url)) { _urlQueue.Enqueue(queueItem); } else { Console.WriteLine($"Url {queueItem.Url} already sent to processing"); } } }); var frontQueueProcessor = Task.Run(() => { // send to worker while (true) { do { Parallel.For(0, maxThread, (threadIndex) => { //while (_urlQueue.IsEmpty) //{ // Task.Delay(5000).Wait(); //} if (_urlQueue.TryDequeue(out UrlQueueItem url)) { if (_politenessPolicy.CanIGoThere(url, out long crawlDelay)) { Task.Delay((int)crawlDelay).Wait(); url.QueuedOn = DateTime.UtcNow; Console.WriteLine($"Sending to worker [{url.Url}]"); toWorkers.SendFrame(url.ToJson()); Console.WriteLine($"Sent to worker"); _urlVisiting.AddOrUpdate(url.Url.ToLower(), false, (key, val) => false); } } }); } while (!_urlQueue.IsEmpty); } });