Пример #1
0
        private async Task ImportTopCategoryAsync(IElement element)
        {
            var startTime = DateTime.Now;

            var topCatName = element.InnerHtml;
            var topCatUrl  = element.GetAttribute("href");

            Logger.Info($"导入食材顶级分类: {topCatName} {topCatUrl}");

            //var url = $"http:{topCatUrl}";
            var topDoc = await CrawlerHelper.GetDocumentAddHttpPrefixAsync(topCatUrl);

            var middleDivs = topDoc.QuerySelectorAll(".category_sub.clear");

            Logger.Info($"   有{middleDivs.Length} 中类 ");

            foreach (var midDiv in middleDivs)
            {
                var middleCatName = midDiv.FirstElementChild.TextContent; //h2 // node 是可视树,用xpath, element 是逻辑树,用selector
                if (dataStore.HasSave(topCatName, middleCatName))
                {
                    continue; // 已经导入的,就不重复导入
                }

                var ul            = midDiv.LastElementChild; //ul
                var foodMaterials = new FoodMaterialCollection();
                foreach (var li in ul.GetElementsByTagName("li"))
                {
                    var a = li.FirstElementChild;
                    var foodMaterialName = a.TextContent;
                    var foodMaterialHref = a.GetAttribute("href");

                    var foodMaterial = await TryGetFoodMaterial(foodMaterialName, foodMaterialHref);

                    if (foodMaterial != null)
                    {
                        foodMaterials.Add(foodMaterial);
                    }
                }
                Logger.Info($"    {middleCatName} 有{foodMaterials.Count}个食材 ");

                var rawItem = new FoodMaterialRawDataItem()
                {
                    Top           = topCatName,
                    Middle        = middleCatName,
                    FoodMaterials = foodMaterials
                };

                dataStore.SaveCategory(rawItem);

                rawData.Add(rawItem);

                string msg = $"导入{topCatName} {middleCatName}耗时:{DateTime.Now.Subtract(startTime).TotalSeconds}";
                Console.WriteLine(msg);
                Logger.Info(msg);
                //break;
            }
        }
Пример #2
0
        public void SaveCategory(FoodMaterialRawDataItem item)
        {
            var json = Newtonsoft.Json.JsonConvert.SerializeObject(item);

            using (var file = System.IO.File.CreateText(GetLoaclFilePath(item.Top, item.Middle)))
            {
                file.Write(json);
                file.Flush();
            }
        }
Пример #3
0
 public void SaveCategory(FoodMaterialRawDataItem item)
 {
 }