public void ParseEntity() { var service = SpiderFactory.CreateScope(); var dataContext = new DataFlowContext(service); dataContext.AddResponse(new Response { Request = new Request("https://list.jd.com/list.html?cat=9987,653,655", new Dictionary <string, string> { { "cat", "手机" }, { "cat3", "110" } }), RawText = File.ReadAllText("Jd.html") }); DataParser <Product> extractor = new DataParser <Product>(); extractor.HandleAsync(dataContext).GetAwaiter().GetResult(); var results = ((List <object>)dataContext.GetItem(typeof(Product).FullName)).Select(x => (Product)x) .ToList(); Assert.Equal(60, results.Count); Assert.Equal("手机", results[0].CategoryName); Assert.Equal(110, results[0].CategoryId); Assert.Equal("https://item.jd.com/3031737.html", results[0].Url); Assert.Equal("3031737", results[0].Sku); Assert.Equal("荣耀官方旗舰店", results[0].ShopName); Assert.Equal("荣耀 NOTE 8 4GB+32GB 全网通版 冰河银", results[0].Name); Assert.Equal("1000000904", results[0].VenderId); Assert.Equal("1000000904", results[0].JdzyShopId); Assert.Equal(DateTime.Now.ToString("yyyy-MM-dd"), results[0].RunId.ToString("yyyy-MM-dd")); }
public void XpathFollow() { var service = SpiderFactory.CreateScope(); var dataContext = new DataFlowContext(service); dataContext.AddResponse(new Response { Request = new Request("http://cnblogs.com"), RawText = File.ReadAllText("cnblogs.html") }); var xpathFollow = DataParser.XpathFollow(".//div[@class='pager']"); var requests = xpathFollow.Invoke(dataContext); Assert.Equal(12, requests.Length); Assert.Contains(requests, r => r == "http://cnblogs.com/sitehome/p/2"); }
public void MultiEntitySelector() { var service = SpiderFactory.CreateScope(); var dataContext = new DataFlowContext(service); dataContext.AddResponse(new Response { Request = new Request("http://abcd.com"), RawText = Html }); var parser = new DataParser <E>(); parser.HandleAsync(dataContext).GetAwaiter().GetResult(); var results = ((List <object>)dataContext.GetItem(typeof(E).FullName)).Select(x => (E)x).ToList(); Assert.Equal("a", results[0].title); Assert.Equal("b", results[1].title); }