private async Task ImportTopCategoryAsync(IElement element) { var startTime = DateTime.Now; var topCatName = element.InnerHtml; var topCatUrl = element.GetAttribute("href"); Logger.Info($"导入食材顶级分类: {topCatName} {topCatUrl}"); //var url = $"http:{topCatUrl}"; var topDoc = await CrawlerHelper.GetDocumentAddHttpPrefixAsync(topCatUrl); var middleDivs = topDoc.QuerySelectorAll(".category_sub.clear"); Logger.Info($" 有{middleDivs.Length} 中类 "); foreach (var midDiv in middleDivs) { var middleCatName = midDiv.FirstElementChild.TextContent; //h2 // node 是可视树,用xpath, element 是逻辑树,用selector if (dataStore.HasSave(topCatName, middleCatName)) { continue; // 已经导入的,就不重复导入 } var ul = midDiv.LastElementChild; //ul var foodMaterials = new FoodMaterialCollection(); foreach (var li in ul.GetElementsByTagName("li")) { var a = li.FirstElementChild; var foodMaterialName = a.TextContent; var foodMaterialHref = a.GetAttribute("href"); var foodMaterial = await TryGetFoodMaterial(foodMaterialName, foodMaterialHref); if (foodMaterial != null) { foodMaterials.Add(foodMaterial); } } Logger.Info($" {middleCatName} 有{foodMaterials.Count}个食材 "); var rawItem = new FoodMaterialRawDataItem() { Top = topCatName, Middle = middleCatName, FoodMaterials = foodMaterials }; dataStore.SaveCategory(rawItem); rawData.Add(rawItem); string msg = $"导入{topCatName} {middleCatName}耗时:{DateTime.Now.Subtract(startTime).TotalSeconds}"; Console.WriteLine(msg); Logger.Info(msg); //break; } }
public void SaveCategory(FoodMaterialRawDataItem item) { var json = Newtonsoft.Json.JsonConvert.SerializeObject(item); using (var file = System.IO.File.CreateText(GetLoaclFilePath(item.Top, item.Middle))) { file.Write(json); file.Flush(); } }
public void SaveCategory(FoodMaterialRawDataItem item) { }