private DishListRawData GetCrawDishList() { // 获取需要下载列表文件 List <DishDetailsRawData> dishDetails = DishFileStore.GetByPrefix <DishDetailsRawData>("DishList", (item, objName) => item.Tag = objName); DishListRawData data = new DishListRawData(dishDetails); // 一次只 下一个 return(data); }
private async Task ExecuteImp(IJobExecutionContext context) { //1 爬类别 DishCategoryRawData dcr = GetDishCategoryRawData(); if (dcr == null) { await CrawlDishCategory(); // 经过手工修改,再进入 2 return; } //2 爬列表 List <Task> tasks = new List <Task>(); foreach (var dcrItem in dcr) { if (dcrItem.NeedCrawl) { var task = CrawDishList(dcrItem); tasks.Add(task); } } Task.WaitAll(tasks.ToArray()); //3 爬详情 tasks.Clear(); var start = DateTime.Now; ShowAndLog($"爬详情 start:{start}"); DishListRawData dlr = GetCrawDishList(); foreach (var dlrItem in dlr) { tasks.Add(CrawlDishDetails(dlrItem)); //if (UserBreaker()) //{ // // 必须全部下完,才进入 4 // return; //} } Task.WaitAll(tasks.ToArray()); var end = DateTime.Now; ShowAndLog($"爬详情 end:{end} 耗时:{end.Subtract(start).TotalMinutes}"); //4 爬img //start = DateTime.Now; //ShowAndLog($"爬img start:{start}"); //DishImgRawData imgs = GetDishDetails(); //ParallelOptions parallelOptions = new ParallelOptions() //{ // MaxDegreeOfParallelism = 50, //}; //Parallel.ForEach(imgs, parallelOptions // , (img) => // { // try // { // CrawlerHelper.DownloadImgAndSave(img); // Console.Write("."); // } // catch(System.Net.WebException ex) // { // ShowAndLog($"{img.SourcrUrl}:{ex.Message}"); // Thread.Sleep(100); // } // } // ); //end = DateTime.Now; //ShowAndLog($"爬img end:{end} 耗时:{end.Subtract(start).TotalMinutes}"); }