Пример #1
0
        public void ParseEntity()
        {
            var service     = SpiderFactory.CreateScope();
            var dataContext = new DataFlowContext(service);

            dataContext.AddResponse(new Response
            {
                Request = new Request("https://list.jd.com/list.html?cat=9987,653,655",
                                      new Dictionary <string, string>
                {
                    { "cat", "手机" },
                    { "cat3", "110" }
                }),
                RawText = File.ReadAllText("Jd.html")
            });

            DataParser <Product> extractor = new DataParser <Product>();


            extractor.HandleAsync(dataContext).GetAwaiter().GetResult();

            var results = ((List <object>)dataContext.GetItem(typeof(Product).FullName)).Select(x => (Product)x)
                          .ToList();

            Assert.Equal(60, results.Count);
            Assert.Equal("手机", results[0].CategoryName);
            Assert.Equal(110, results[0].CategoryId);
            Assert.Equal("https://item.jd.com/3031737.html", results[0].Url);
            Assert.Equal("3031737", results[0].Sku);
            Assert.Equal("荣耀官方旗舰店", results[0].ShopName);
            Assert.Equal("荣耀 NOTE 8 4GB+32GB 全网通版 冰河银", results[0].Name);
            Assert.Equal("1000000904", results[0].VenderId);
            Assert.Equal("1000000904", results[0].JdzyShopId);
            Assert.Equal(DateTime.Now.ToString("yyyy-MM-dd"), results[0].RunId.ToString("yyyy-MM-dd"));
        }
Пример #2
0
        public void XpathFollow()
        {
            var service     = SpiderFactory.CreateScope();
            var dataContext = new DataFlowContext(service);

            dataContext.AddResponse(new Response
            {
                Request = new Request("http://cnblogs.com"),
                RawText = File.ReadAllText("cnblogs.html")
            });
            var xpathFollow = DataParser.XpathFollow(".//div[@class='pager']");

            var requests = xpathFollow.Invoke(dataContext);

            Assert.Equal(12, requests.Length);
            Assert.Contains(requests, r => r == "http://cnblogs.com/sitehome/p/2");
        }
Пример #3
0
        public void MultiEntitySelector()
        {
            var service     = SpiderFactory.CreateScope();
            var dataContext = new DataFlowContext(service);

            dataContext.AddResponse(new Response
            {
                Request = new Request("http://abcd.com"),
                RawText = Html
            });

            var parser = new DataParser <E>();

            parser.HandleAsync(dataContext).GetAwaiter().GetResult();

            var results = ((List <object>)dataContext.GetItem(typeof(E).FullName)).Select(x => (E)x).ToList();

            Assert.Equal("a", results[0].title);
            Assert.Equal("b", results[1].title);
        }