コード例 #1
0
ファイル: Program.cs プロジェクト: Asll666/Presentations
        static async Task Crawl(ConcurrentBag <CrawlingTask> bag, string crawlerName)
        {
            CrawlingTask task;

            while (bag.TryTake(out task))
            {
                IEnumerable <string> urls = await GetLinksFromContent(task);

                if (urls != null)
                {
                    foreach (var url in urls)
                    {
                        var t = new CrawlingTask
                        {
                            UrlToCrawl   = url,
                            ProducerName = crawlerName
                        };

                        bag.Add(t);
                    }
                }
                Console.WriteLine("Indexing url {0} posted by {1} is completed by {2}!",
                                  task.UrlToCrawl, task.ProducerName, crawlerName);
            }
        }
コード例 #2
0
        /// <summary>
        /// 模拟爬虫程序
        /// </summary>
        /// <param name="bag"></param>
        /// <param name="crawlerName"></param>
        /// <returns></returns>
        static async Task Crawl(ConcurrentBag <CrawlingTask> bag, string crawlerName)
        {
            CrawlingTask task;

            while (bag.TryTake(out task))
            {
                // 如果页面中存在URL地址,则将这些地址放入待爬取的任务集合
                IEnumerable <string> urls = await GetLinksFromContent(task);

                if (urls != null)
                {
                    foreach (var url in urls)
                    {
                        var t = new CrawlingTask
                        {
                            UrlToCrawl   = url,
                            ProducerName = crawlerName
                        };

                        bag.Add(t);
                    }

                    Console.WriteLine($"Indexing url {task.UrlToCrawl} posted by {task.ProducerName} is completed by {crawlerName}");
                }
            }
        }
コード例 #3
0
		static async Task<IEnumerable<string>> GetLinksFromContent(CrawlingTask task)
		{
			await GetRandomDelay();

			if (_contentEmulation.ContainsKey(task.UrlToCrawl)) return _contentEmulation[task.UrlToCrawl];

			return null;
		}
コード例 #4
0
ファイル: Program.cs プロジェクト: Asll666/Presentations
        static async Task <IEnumerable <string> > GetLinksFromContent(CrawlingTask task)
        {
            await GetRandomDelay();

            if (_contentEmulation.ContainsKey(task.UrlToCrawl))
            {
                return(_contentEmulation[task.UrlToCrawl]);
            }

            return(null);
        }
コード例 #5
0
		static async Task Crawl(ConcurrentBag<CrawlingTask> bag, string crawlerName)
		{
			CrawlingTask task;
			while (bag.TryTake(out task))
			{
				IEnumerable<string> urls = await GetLinksFromContent(task);
				if (urls != null)
				{
					foreach (var url in urls)
					{
						var t = new CrawlingTask
						{
							UrlToCrawl = url,
							ProducerName = crawlerName
						};

						bag.Add(t);
					}
				}
				Console.WriteLine("Indexing url {0} posted by {1} is completed by {2}!",
					task.UrlToCrawl, task.ProducerName, crawlerName);
			}
		}