示例#1
0
        public static async void Go()
        {
            var crawler = new CrawlerNode();
            var parser  = new ReviewParserNode();
            var data    = new DataNode();
            var id      = 198565;

            while (id < 500000)
            {
                var sw = new Stopwatch();
                sw.Start();
                var res = crawler.RequestPage("https://www.leafly.com/hybrid/space-queen/reviews/" + id);
                if (!String.IsNullOrWhiteSpace(res))
                {
                    var review = parser.ParseLeaflyReview(id, res);
                    var t      = await data.PopulateDatabase(review);
                }
                id++;
                sw.Stop();
                _logger.DebugFormat("{0} seconds elapsed processing review {1}", sw.ElapsedMilliseconds / 1000f, id);
                // if (sw.ElapsedMilliseconds < 500)
                // {
                //     Thread.Sleep(500);
                // }
            }
        }
示例#2
0
        public void DistributeWork(HtmlRecord record)
        {
            while (crawlerNodes.Count == 0)
            {
                ;
            }
            var nodes = crawlerNodes.OrderBy(x => x.Value.messageQueue.Count)
                        .OrderBy(y => y.Value.messageQueue
                                 .Where(z => z.domain.Host == record.domain.Host)
                                 .Count());
            CrawlerNode node = nodes.ElementAt(0).Value;

            jobSet[record.recordid] = node;
            node.EnqueueHtmlRecord(record);
        }
示例#3
0
        public void AddCrawlerNode(Uri domain)
        {
            CrawlerNode node;

            if (!crawlerNodes.ContainsKey(domain))
            {
                node = new CrawlerNode(domain);
                crawlerNodes.Add(node.nodeDomain, node);
            }
            else
            {
                node = crawlerNodes[domain];
                node.KillSendProcess();
            }
            node.Start();
        }