Exemplo n.º 1
0
        /// <summary>
        /// 爬一页菜品
        /// </summary>
        /// <param name="listFullUrl"></param>
        /// <returns></returns>
        private async Task <List <DishDetailsRawDataItem> > GetOnePageDishs(string listFullUrl)
        {
            Console.Write(listFullUrl);
            List <DishDetailsRawDataItem> pageDishs = new List <DishDetailsRawDataItem>();

            IHtmlDocument doc = await CrawlerHelper.GetDocumentASync(listFullUrl);

            var div = doc.QuerySelector("#J_list"); // 找出顶级分类

            if (div == null)
            {
                return(pageDishs);
            }
            var lis = div.GetElementsByTagName("li");

            if (lis == null || lis.Length < 10)
            {
                return(pageDishs);
            }
            foreach (var li in lis)
            {
                Console.Write(".");
                var link = li.GetElementsByTagName("a").First();
                pageDishs.Add(new DishDetailsRawDataItem()
                {
                    SmallImageUrl = li.GetElementsByTagName("img").First().GetAttribute("data-src"),
                    Name          = link.GetAttribute("title"),
                    Url           = link.GetAttribute("href"),
                    DataId        = li.GetAttribute("data-id")
                });
            }
            Console.WriteLine("");

            return(pageDishs);
        }
Exemplo n.º 2
0
        private async Task CrawlDishCategory()
        {
            DishCategoryRawData cates = new DishCategoryRawData();

            IHtmlDocument doc = await CrawlerHelper.GetDocumentASync("https://home.meishichina.com/recipe-type.html");

            var divs = doc.QuerySelectorAll(".category_sub.clear"); // 找出顶级分类

            Logger.Info($"找到菜品顶级分类: {divs.Length } 个");

            foreach (var topDiv in divs) //
            {
                string topTag = topDiv.FirstElementChild.TextContent.Replace("/", ",");
                var    links  = topDiv.FirstElementChild.NextElementSibling.GetElementsByTagName("a");
                Console.Write(topTag);
                foreach (var a in links)
                {
                    string listUrl = a.GetAttribute("href");
                    cates.Add(new DishCategoryRawDataItem()
                    {
                        Tag         = topTag + "," + a.GetAttribute("title") ?? a.TextContent,
                        ListUrl     = listUrl,
                        PagesNumber = GetPageNumber(listUrl),
                    });
                    Console.Write(".");
                }
                Console.WriteLine("");
            }

            SerializeHelper.Save("Dish", "DishCategory", cates);
        }
Exemplo n.º 3
0
        private async Task ExecuteImp(IJobExecutionContext context)
        {
            rawData = new FoodMaterialRawData();
            IHtmlDocument doc = await CrawlerHelper.GetDocumentASync(" http://www.meishichina.com/YuanLiao/");

            var lis = doc.QuerySelectorAll(".nav_wrap2 li a"); // 找出价格行

            Logger.Info($"找到食材顶级分类: {lis.Length -2} 个");

            for (int i = 1; i <= lis.Length - 2; i++) // 去掉一头一尾
            {
                await ImportTopCategoryAsync(lis[i]);

                //break;
            }

            //importer.Import(rawData);
            Logger.Info($"共有{rawData.TopCount} 个大类 {rawData.MiddleCount} 个中类 {rawData.MaterialCount} 个食材 ");
        }
Exemplo n.º 4
0
        //补充 还不完整的信息
        private async Task FillDetail(DishDetailsRawDataItem detail)
        {
            IHtmlDocument doc = await CrawlerHelper.GetDocumentASync($"https:{detail.Url}");

            //大图
            var img = doc.QuerySelector(".J_photo img"); // 找出大图url

            detail.BigImageUrl = img?.GetAttribute("src");
            // 分类
            var tips = doc.QuerySelectorAll(".recipeTip.mt16");

            if (tips != null && tips.Length >= 3)
            {
                var           links = tips[2].GetElementsByTagName("a");
                StringBuilder sb    = new StringBuilder();
                foreach (var a in links)
                {
                    sb.Append(a.TextContent);
                    sb.Append(",");
                }
                detail.Tags = sb.ToString();
            }
            // 配比bom
            var particulars = doc.QuerySelectorAll(".particulars");

            if (particulars != null && particulars.Length >= 2)
            {
                var boms = particulars.Select(p =>
                {
                    var bom = new DishBomRawData();
                    var bis = p.GetElementsByTagName("li").Select(li =>
                    {
                        var spans = li.Children;
                        if (spans != null && spans.Length >= 2)
                        {
                            var bi = new BomItem()
                            {
                                EnglishName      = CrawlerHelper.GetUrlLast(spans[0].FirstElementChild?.GetAttribute("href")),
                                FoodMaterialName = spans[0].GetElementsByTagName("b").FirstOrDefault()?.TextContent,
                                Use = spans[1].TextContent,
                            };
                            return(bi);
                        }
                        else
                        {
                            return(new BomItem());
                        }
                    }
                                                                  );
                    bom.AddRange(bis);
                    return(bom);
                }).ToList();
                if (boms != null && boms.Count >= 2)
                {
                    detail.DishBom    = boms[0];
                    detail.AuxDishBom = boms[1];
                }
            }

            // 口味等
            var specs = doc.QuerySelectorAll(".recipeCategory_sub_R.mt30.clear li").Select(li =>
            {
                return(li.GetElementsByTagName("a").FirstOrDefault()?.TextContent);
            }).ToList();

            if (specs != null && specs.Count >= 4)
            {
                detail.Taste      = specs[0];
                detail.Technology = specs[1];
                detail.CookTime   = specs[2];
                detail.Difficulty = specs[3];
            }
            // 步骤
            var steps = doc.QuerySelectorAll(".recipeStep li").Select(li =>
            {
                string url     = li.GetElementsByTagName("img").FirstOrDefault()?.GetAttribute("src");
                string content = li.LastElementChild?.TextContent;
                return(new CookeyItem()
                {
                    Photo = url,
                    Content = content
                });
            }).ToList();

            detail.Cookery = new CookeryRawData(steps);
        }