Пример #1
0
        protected virtual async Task ContinueCrawlerAsync(WebCrawler crawler, string lastCache, string basePath)
        {
            string lastArea       = string.Empty;
            bool   lastAreaExists = File.Exists(lastCache);

            if (lastAreaExists)
            {
                lastArea = File.ReadAllLines(lastCache)[0];
            }
            int count = 0;

            while (++count > 0)
            {
                LogHelper.Info($"{crawler.GetType()}城市:" + count.ToString());
                var path = Directory.GetCurrentDirectory() + string.Format(basePath, count.ToString());
                if (File.Exists(path))
                {
                    var areas = File.ReadAllLines(path);
                    await StartCrawlerAsync(crawler, areas, lastAreaExists, lastCache, lastArea);
                }
                else
                {
                    LogHelper.Info($"{crawler.GetType()}爬虫结束");
                    File.Delete(lastCache);
                    break;
                }
            }
        }
Пример #2
0
 protected async Task StartCrawlerAsync(WebCrawler crawler, string[] areas, bool lastAreaExists,
                                        string lastCache, string lastArea)
 {
     if (lastAreaExists)
     {
         areas = areas.SkipWhile(area => area != lastArea).ToArray();
     }
     LogHelper.Info($"剩余城市数量:{areas.Count()}");
     foreach (var area in areas)
     {
         File.WriteAllText(lastCache, area);
         await crawler.AgentCrawlerAsync(new Uri(area));
     }
 }
Пример #3
0
        protected async Task WriteAreaUriToFileAsync(WebCrawler crawler, string city, string areaPath)
        {
            List <string> areaList     = new List <string>();
            var           cityAreaUris = await crawler.GetCityAreaUrisAsync(city);

            foreach (var cityAreaUri in cityAreaUris)
            {
                var areas = await crawler.GetAreasAsync(cityAreaUri);

                foreach (var area in areas)
                {
                    areaList.Add(area);
                }
            }
            if (areaList.Count > 0)
            {
                File.WriteAllLines(areaPath + $"/{index}.txt", areaList);
            }
            else
            {
                --index;
            }
        }