private async Task<dynamic> ProcessCategoryAsync(Category category) { // 从网站上抓取产品信息,因为抓到的数据可能重复,所以需要过滤掉重复数据,否则在多线程更新数据库的时候可能产生冲突 var downloadProducts = await new YhdProductExtractor().ExtractProductsInCategoryAsync(category.Number); // 获取已经存在产品的信息签名 var existingProducts = downloadProducts.Select(p => { var existProduct = _ProductRepo.GetBySourceAndNumber(p.Source, p.Number); return new ProductSignature { Source = p.Source, Number = p.Number, Signature = existProduct != null ? existProduct.Signature : null }; }); //var existingProducts = _ProductRepo.GetByCategoryId(category.Id) // .Select(p => new ProductSignature { Source = p.Source, Number = p.Number, Signature = p.Signature }); // 计算刚下载的产品的签名 downloadProducts.AsParallel().ForAll(p => p.Signature = ProductSignature.ComputeSignature(p)); // 找到签名发生变化的产品 var changedProducts = FindChangedProducts(downloadProducts, existingProducts).ToList(); //await ServerProxy.UpsertProductsAsync(category.Id, changedProducts); _ProductArchiveService.Archive(category.Id, changedProducts); return new { Total = downloadProducts.Count(), Changed = changedProducts.Count }; }
/// <summary> /// 解析分类 /// </summary> /// <param name="xpath"></param> /// <param name="outerNode"></param> /// <param name="parentCategory"></param> /// <returns></returns> private IEnumerable<dynamic> ParseCategories(string xpath, HtmlNode outerNode, Category parentCategory, int level) { var nodes = outerNode.SelectNodes(xpath); if (nodes == null) yield break; foreach (var node in nodes) { var category = ParseCategoryFromANode(node); category.Level = level; if (parentCategory != null && !string.IsNullOrWhiteSpace(parentCategory.Number)) category.ParentNumber = parentCategory.Number; yield return new { Category = category, Node = node }; } }