Exemple #1
0
        private async Task<dynamic> ProcessCategoryAsync(Category category) {
            // 从网站上抓取产品信息,因为抓到的数据可能重复,所以需要过滤掉重复数据,否则在多线程更新数据库的时候可能产生冲突
            var downloadProducts = await new YhdProductExtractor().ExtractProductsInCategoryAsync(category.Number);
            // 获取已经存在产品的信息签名
            var existingProducts = downloadProducts.Select(p => {
                var existProduct = _ProductRepo.GetBySourceAndNumber(p.Source, p.Number);
                return new ProductSignature { Source = p.Source, Number = p.Number, Signature = existProduct != null ? existProduct.Signature : null };
            });
            //var existingProducts = _ProductRepo.GetByCategoryId(category.Id)
            //    .Select(p => new ProductSignature { Source = p.Source, Number = p.Number, Signature = p.Signature });

            // 计算刚下载的产品的签名
            downloadProducts.AsParallel().ForAll(p => p.Signature = ProductSignature.ComputeSignature(p));

            // 找到签名发生变化的产品
            var changedProducts = FindChangedProducts(downloadProducts, existingProducts).ToList();

            //await ServerProxy.UpsertProductsAsync(category.Id, changedProducts);
            _ProductArchiveService.Archive(category.Id, changedProducts);

            return new {
                Total = downloadProducts.Count(),
                Changed = changedProducts.Count
            };
        }
        /// <summary>
        /// 解析分类
        /// </summary>
        /// <param name="xpath"></param>
        /// <param name="outerNode"></param>
        /// <param name="parentCategory"></param>
        /// <returns></returns>
        private IEnumerable<dynamic> ParseCategories(string xpath, HtmlNode outerNode, Category parentCategory, int level) {
            var nodes = outerNode.SelectNodes(xpath);
            if (nodes == null)
                yield break;

            foreach (var node in nodes) {
                var category = ParseCategoryFromANode(node);
                category.Level = level;
                if (parentCategory != null && !string.IsNullOrWhiteSpace(parentCategory.Number))
                    category.ParentNumber = parentCategory.Number;
                yield return new { Category = category, Node = node };
            }
        }