/// <summary> /// 爬一页菜品 /// </summary> /// <param name="listFullUrl"></param> /// <returns></returns> private async Task <List <DishDetailsRawDataItem> > GetOnePageDishs(string listFullUrl) { Console.Write(listFullUrl); List <DishDetailsRawDataItem> pageDishs = new List <DishDetailsRawDataItem>(); IHtmlDocument doc = await CrawlerHelper.GetDocumentASync(listFullUrl); var div = doc.QuerySelector("#J_list"); // 找出顶级分类 if (div == null) { return(pageDishs); } var lis = div.GetElementsByTagName("li"); if (lis == null || lis.Length < 10) { return(pageDishs); } foreach (var li in lis) { Console.Write("."); var link = li.GetElementsByTagName("a").First(); pageDishs.Add(new DishDetailsRawDataItem() { SmallImageUrl = li.GetElementsByTagName("img").First().GetAttribute("data-src"), Name = link.GetAttribute("title"), Url = link.GetAttribute("href"), DataId = li.GetAttribute("data-id") }); } Console.WriteLine(""); return(pageDishs); }
private async Task CrawlDishCategory() { DishCategoryRawData cates = new DishCategoryRawData(); IHtmlDocument doc = await CrawlerHelper.GetDocumentASync("https://home.meishichina.com/recipe-type.html"); var divs = doc.QuerySelectorAll(".category_sub.clear"); // 找出顶级分类 Logger.Info($"找到菜品顶级分类: {divs.Length } 个"); foreach (var topDiv in divs) // { string topTag = topDiv.FirstElementChild.TextContent.Replace("/", ","); var links = topDiv.FirstElementChild.NextElementSibling.GetElementsByTagName("a"); Console.Write(topTag); foreach (var a in links) { string listUrl = a.GetAttribute("href"); cates.Add(new DishCategoryRawDataItem() { Tag = topTag + "," + a.GetAttribute("title") ?? a.TextContent, ListUrl = listUrl, PagesNumber = GetPageNumber(listUrl), }); Console.Write("."); } Console.WriteLine(""); } SerializeHelper.Save("Dish", "DishCategory", cates); }
private async Task ExecuteImp(IJobExecutionContext context) { rawData = new FoodMaterialRawData(); IHtmlDocument doc = await CrawlerHelper.GetDocumentASync(" http://www.meishichina.com/YuanLiao/"); var lis = doc.QuerySelectorAll(".nav_wrap2 li a"); // 找出价格行 Logger.Info($"找到食材顶级分类: {lis.Length -2} 个"); for (int i = 1; i <= lis.Length - 2; i++) // 去掉一头一尾 { await ImportTopCategoryAsync(lis[i]); //break; } //importer.Import(rawData); Logger.Info($"共有{rawData.TopCount} 个大类 {rawData.MiddleCount} 个中类 {rawData.MaterialCount} 个食材 "); }
//补充 还不完整的信息 private async Task FillDetail(DishDetailsRawDataItem detail) { IHtmlDocument doc = await CrawlerHelper.GetDocumentASync($"https:{detail.Url}"); //大图 var img = doc.QuerySelector(".J_photo img"); // 找出大图url detail.BigImageUrl = img?.GetAttribute("src"); // 分类 var tips = doc.QuerySelectorAll(".recipeTip.mt16"); if (tips != null && tips.Length >= 3) { var links = tips[2].GetElementsByTagName("a"); StringBuilder sb = new StringBuilder(); foreach (var a in links) { sb.Append(a.TextContent); sb.Append(","); } detail.Tags = sb.ToString(); } // 配比bom var particulars = doc.QuerySelectorAll(".particulars"); if (particulars != null && particulars.Length >= 2) { var boms = particulars.Select(p => { var bom = new DishBomRawData(); var bis = p.GetElementsByTagName("li").Select(li => { var spans = li.Children; if (spans != null && spans.Length >= 2) { var bi = new BomItem() { EnglishName = CrawlerHelper.GetUrlLast(spans[0].FirstElementChild?.GetAttribute("href")), FoodMaterialName = spans[0].GetElementsByTagName("b").FirstOrDefault()?.TextContent, Use = spans[1].TextContent, }; return(bi); } else { return(new BomItem()); } } ); bom.AddRange(bis); return(bom); }).ToList(); if (boms != null && boms.Count >= 2) { detail.DishBom = boms[0]; detail.AuxDishBom = boms[1]; } } // 口味等 var specs = doc.QuerySelectorAll(".recipeCategory_sub_R.mt30.clear li").Select(li => { return(li.GetElementsByTagName("a").FirstOrDefault()?.TextContent); }).ToList(); if (specs != null && specs.Count >= 4) { detail.Taste = specs[0]; detail.Technology = specs[1]; detail.CookTime = specs[2]; detail.Difficulty = specs[3]; } // 步骤 var steps = doc.QuerySelectorAll(".recipeStep li").Select(li => { string url = li.GetElementsByTagName("img").FirstOrDefault()?.GetAttribute("src"); string content = li.LastElementChild?.TextContent; return(new CookeyItem() { Photo = url, Content = content }); }).ToList(); detail.Cookery = new CookeryRawData(steps); }