public static async void Go() { var crawler = new CrawlerNode(); var parser = new ReviewParserNode(); var data = new DataNode(); var id = 198565; while (id < 500000) { var sw = new Stopwatch(); sw.Start(); var res = crawler.RequestPage("https://www.leafly.com/hybrid/space-queen/reviews/" + id); if (!String.IsNullOrWhiteSpace(res)) { var review = parser.ParseLeaflyReview(id, res); var t = await data.PopulateDatabase(review); } id++; sw.Stop(); _logger.DebugFormat("{0} seconds elapsed processing review {1}", sw.ElapsedMilliseconds / 1000f, id); // if (sw.ElapsedMilliseconds < 500) // { // Thread.Sleep(500); // } } }
public void DistributeWork(HtmlRecord record) { while (crawlerNodes.Count == 0) { ; } var nodes = crawlerNodes.OrderBy(x => x.Value.messageQueue.Count) .OrderBy(y => y.Value.messageQueue .Where(z => z.domain.Host == record.domain.Host) .Count()); CrawlerNode node = nodes.ElementAt(0).Value; jobSet[record.recordid] = node; node.EnqueueHtmlRecord(record); }
public void AddCrawlerNode(Uri domain) { CrawlerNode node; if (!crawlerNodes.ContainsKey(domain)) { node = new CrawlerNode(domain); crawlerNodes.Add(node.nodeDomain, node); } else { node = crawlerNodes[domain]; node.KillSendProcess(); } node.Start(); }