Exemplo n.º 1
0
        private DishListRawData GetCrawDishList()
        {
            // 获取需要下载列表文件
            List <DishDetailsRawData> dishDetails = DishFileStore.GetByPrefix <DishDetailsRawData>("DishList", (item, objName) => item.Tag = objName);
            DishListRawData           data        = new DishListRawData(dishDetails);

            // 一次只 下一个
            return(data);
        }
Exemplo n.º 2
0
        private async Task ExecuteImp(IJobExecutionContext context)
        {
            //1 爬类别
            DishCategoryRawData dcr = GetDishCategoryRawData();

            if (dcr == null)
            {
                await CrawlDishCategory();

                // 经过手工修改,再进入 2
                return;
            }
            //2 爬列表
            List <Task> tasks = new List <Task>();

            foreach (var dcrItem in dcr)
            {
                if (dcrItem.NeedCrawl)
                {
                    var task = CrawDishList(dcrItem);
                    tasks.Add(task);
                }
            }
            Task.WaitAll(tasks.ToArray());

            //3 爬详情
            tasks.Clear();
            var start = DateTime.Now;

            ShowAndLog($"爬详情 start:{start}");
            DishListRawData dlr = GetCrawDishList();

            foreach (var dlrItem in dlr)
            {
                tasks.Add(CrawlDishDetails(dlrItem));
                //if (UserBreaker())
                //{
                //    // 必须全部下完,才进入 4
                //    return;
                //}
            }
            Task.WaitAll(tasks.ToArray());
            var end = DateTime.Now;

            ShowAndLog($"爬详情 end:{end} 耗时:{end.Subtract(start).TotalMinutes}");

            //4 爬img
            //start = DateTime.Now;
            //ShowAndLog($"爬img start:{start}");
            //DishImgRawData imgs = GetDishDetails();
            //ParallelOptions parallelOptions = new ParallelOptions()
            //{
            //    MaxDegreeOfParallelism = 50,
            //};

            //Parallel.ForEach(imgs, parallelOptions
            //    , (img) =>
            //    {
            //        try
            //        {
            //            CrawlerHelper.DownloadImgAndSave(img);
            //            Console.Write(".");
            //        }
            //        catch(System.Net.WebException ex)
            //        {
            //            ShowAndLog($"{img.SourcrUrl}:{ex.Message}");
            //            Thread.Sleep(100);
            //        }
            //    }
            //    );
            //end = DateTime.Now;
            //ShowAndLog($"爬img end:{end} 耗时:{end.Subtract(start).TotalMinutes}");
        }