Example #1
0
        public void ParseEntity()
        {
            var service     = SpiderFactory.CreateScope();
            var dataContext = new DataFlowContext(service);

            dataContext.AddResponse(new Response
            {
                Request = new Request("https://list.jd.com/list.html?cat=9987,653,655",
                                      new Dictionary <string, string>
                {
                    { "cat", "手机" },
                    { "cat3", "110" }
                }),
                RawText = File.ReadAllText("Jd.html")
            });

            DataParser <Product> extractor = new DataParser <Product>();


            extractor.HandleAsync(dataContext).GetAwaiter().GetResult();

            var results = ((List <object>)dataContext.GetItem(typeof(Product).FullName)).Select(x => (Product)x)
                          .ToList();

            Assert.Equal(60, results.Count);
            Assert.Equal("手机", results[0].CategoryName);
            Assert.Equal(110, results[0].CategoryId);
            Assert.Equal("https://item.jd.com/3031737.html", results[0].Url);
            Assert.Equal("3031737", results[0].Sku);
            Assert.Equal("荣耀官方旗舰店", results[0].ShopName);
            Assert.Equal("荣耀 NOTE 8 4GB+32GB 全网通版 冰河银", results[0].Name);
            Assert.Equal("1000000904", results[0].VenderId);
            Assert.Equal("1000000904", results[0].JdzyShopId);
            Assert.Equal(DateTime.Now.ToString("yyyy-MM-dd"), results[0].RunId.ToString("yyyy-MM-dd"));
        }
Example #2
0
        public void MultiEntitySelector()
        {
            var service     = SpiderFactory.CreateScope();
            var dataContext = new DataFlowContext(service);

            dataContext.AddResponse(new Response
            {
                Request = new Request("http://abcd.com"),
                RawText = Html
            });

            var parser = new DataParser <E>();

            parser.HandleAsync(dataContext).GetAwaiter().GetResult();

            var results = ((List <object>)dataContext.GetItem(typeof(E).FullName)).Select(x => (E)x).ToList();

            Assert.Equal("a", results[0].title);
            Assert.Equal("b", results[1].title);
        }
Example #3
0
        protected override Task <DataFlowResult> Parse(DataFlowContext context)
        {
            if (!context.Contains(Model.TypeName))
            {
                context.Add(Model.TypeName, TableMetadata);
            }

            var            selectable = context.GetSelectable();
            List <dynamic> results    = new List <dynamic>();

            if (selectable.Properties == null)
            {
                selectable.Properties = new Dictionary <string, object>();
            }

            var environments = new Dictionary <string, string>();

            foreach (var property in context.GetResponse().Request.Properties)
            {
                environments.Add(property.Key, property.Value);
            }

            if (Model.ShareValueSelectors != null)
            {
                foreach (var selector in Model.ShareValueSelectors)
                {
                    string name  = selector.Name;
                    var    value = selectable.Select(selector.ToSelector()).GetValue();
                    if (!environments.ContainsKey(name))
                    {
                        environments.Add(name, value);
                    }
                    else
                    {
                        environments[name] = value;
                    }
                }
            }

            bool singleExtractor = Model.Selector == null;

            if (!singleExtractor)
            {
                var selector = Model.Selector.ToSelector();

                var list = selectable.SelectList(selector).Nodes()?.ToList();
                if (list != null)
                {
                    if (Model.Take > 0 && list.Count > Model.Take)
                    {
                        list = Model.TakeFromHead
                            ? list.Take(Model.Take).ToList()
                            : list.Skip(list.Count - Model.Take).ToList();
                    }

                    for (var i = 0; i < list.Count; ++i)
                    {
                        var item = list.ElementAt(i);
                        var obj  = ParseObject(environments, item, i);
                        if (obj != null)
                        {
                            results.Add(obj);
                        }
                        else
                        {
                            Logger?.LogWarning($"解析到空数据,类型: {Model.TypeName}");
                        }
                    }
                }
            }
            else
            {
                var obj = ParseObject(environments, selectable, 0);
                if (obj != null)
                {
                    results.Add(obj);
                }
                else
                {
                    Logger?.LogWarning($"解析到空数据,类型: {Model.TypeName}");
                }
            }

            if (results.Count > 0)
            {
                var items = context.GetItem(Model.TypeName);
                if (items == null)
                {
                    context.AddItem(Model.TypeName, results);
                }
                else
                {
                    items.AddRange(results);
                }
            }

            return(Task.FromResult(DataFlowResult.Success));
        }