static async Task Crawl(ConcurrentBag <CrawlingTask> bag, string crawlerName) { CrawlingTask task; while (bag.TryTake(out task)) { IEnumerable <string> urls = await GetLinksFromContent(task); if (urls != null) { foreach (var url in urls) { var t = new CrawlingTask { UrlToCrawl = url, ProducerName = crawlerName }; bag.Add(t); } } Console.WriteLine("Indexing url {0} posted by {1} is completed by {2}!", task.UrlToCrawl, task.ProducerName, crawlerName); } }
/// <summary> /// 模拟爬虫程序 /// </summary> /// <param name="bag"></param> /// <param name="crawlerName"></param> /// <returns></returns> static async Task Crawl(ConcurrentBag <CrawlingTask> bag, string crawlerName) { CrawlingTask task; while (bag.TryTake(out task)) { // 如果页面中存在URL地址,则将这些地址放入待爬取的任务集合 IEnumerable <string> urls = await GetLinksFromContent(task); if (urls != null) { foreach (var url in urls) { var t = new CrawlingTask { UrlToCrawl = url, ProducerName = crawlerName }; bag.Add(t); } Console.WriteLine($"Indexing url {task.UrlToCrawl} posted by {task.ProducerName} is completed by {crawlerName}"); } } }
static async Task<IEnumerable<string>> GetLinksFromContent(CrawlingTask task) { await GetRandomDelay(); if (_contentEmulation.ContainsKey(task.UrlToCrawl)) return _contentEmulation[task.UrlToCrawl]; return null; }
static async Task <IEnumerable <string> > GetLinksFromContent(CrawlingTask task) { await GetRandomDelay(); if (_contentEmulation.ContainsKey(task.UrlToCrawl)) { return(_contentEmulation[task.UrlToCrawl]); } return(null); }
static async Task Crawl(ConcurrentBag<CrawlingTask> bag, string crawlerName) { CrawlingTask task; while (bag.TryTake(out task)) { IEnumerable<string> urls = await GetLinksFromContent(task); if (urls != null) { foreach (var url in urls) { var t = new CrawlingTask { UrlToCrawl = url, ProducerName = crawlerName }; bag.Add(t); } } Console.WriteLine("Indexing url {0} posted by {1} is completed by {2}!", task.UrlToCrawl, task.ProducerName, crawlerName); } }