public void ParseEntity() { var service = SpiderFactory.CreateScope(); var dataContext = new DataFlowContext(service); dataContext.AddResponse(new Response { Request = new Request("https://list.jd.com/list.html?cat=9987,653,655", new Dictionary <string, string> { { "cat", "手机" }, { "cat3", "110" } }), RawText = File.ReadAllText("Jd.html") }); DataParser <Product> extractor = new DataParser <Product>(); extractor.HandleAsync(dataContext).GetAwaiter().GetResult(); var results = ((List <object>)dataContext.GetItem(typeof(Product).FullName)).Select(x => (Product)x) .ToList(); Assert.Equal(60, results.Count); Assert.Equal("手机", results[0].CategoryName); Assert.Equal(110, results[0].CategoryId); Assert.Equal("https://item.jd.com/3031737.html", results[0].Url); Assert.Equal("3031737", results[0].Sku); Assert.Equal("荣耀官方旗舰店", results[0].ShopName); Assert.Equal("荣耀 NOTE 8 4GB+32GB 全网通版 冰河银", results[0].Name); Assert.Equal("1000000904", results[0].VenderId); Assert.Equal("1000000904", results[0].JdzyShopId); Assert.Equal(DateTime.Now.ToString("yyyy-MM-dd"), results[0].RunId.ToString("yyyy-MM-dd")); }
public void MultiEntitySelector() { var service = SpiderFactory.CreateScope(); var dataContext = new DataFlowContext(service); dataContext.AddResponse(new Response { Request = new Request("http://abcd.com"), RawText = Html }); var parser = new DataParser <E>(); parser.HandleAsync(dataContext).GetAwaiter().GetResult(); var results = ((List <object>)dataContext.GetItem(typeof(E).FullName)).Select(x => (E)x).ToList(); Assert.Equal("a", results[0].title); Assert.Equal("b", results[1].title); }
protected override Task <DataFlowResult> Parse(DataFlowContext context) { if (!context.Contains(Model.TypeName)) { context.Add(Model.TypeName, TableMetadata); } var selectable = context.GetSelectable(); List <dynamic> results = new List <dynamic>(); if (selectable.Properties == null) { selectable.Properties = new Dictionary <string, object>(); } var environments = new Dictionary <string, string>(); foreach (var property in context.GetResponse().Request.Properties) { environments.Add(property.Key, property.Value); } if (Model.ShareValueSelectors != null) { foreach (var selector in Model.ShareValueSelectors) { string name = selector.Name; var value = selectable.Select(selector.ToSelector()).GetValue(); if (!environments.ContainsKey(name)) { environments.Add(name, value); } else { environments[name] = value; } } } bool singleExtractor = Model.Selector == null; if (!singleExtractor) { var selector = Model.Selector.ToSelector(); var list = selectable.SelectList(selector).Nodes()?.ToList(); if (list != null) { if (Model.Take > 0 && list.Count > Model.Take) { list = Model.TakeFromHead ? list.Take(Model.Take).ToList() : list.Skip(list.Count - Model.Take).ToList(); } for (var i = 0; i < list.Count; ++i) { var item = list.ElementAt(i); var obj = ParseObject(environments, item, i); if (obj != null) { results.Add(obj); } else { Logger?.LogWarning($"解析到空数据,类型: {Model.TypeName}"); } } } } else { var obj = ParseObject(environments, selectable, 0); if (obj != null) { results.Add(obj); } else { Logger?.LogWarning($"解析到空数据,类型: {Model.TypeName}"); } } if (results.Count > 0) { var items = context.GetItem(Model.TypeName); if (items == null) { context.AddItem(Model.TypeName, results); } else { items.AddRange(results); } } return(Task.FromResult(DataFlowResult.Success)); }