예제 #1
0
        public void ParseEntity()
        {
            var service     = SpiderFactory.CreateScopeServiceProvider();
            var dataContext = new DataFlowContext(new Response
            {
                Request = new Request("https://list.jd.com/list.html?cat=9987,653,655",
                                      new Dictionary <string, string>
                {
                    { "cat", "手机" },
                    { "cat3", "110" }
                }),
                RawText = File.ReadAllText("Jd.html")
            }, service);

            DataParser <Product> extractor = new DataParser <Product>();


            extractor.HandleAsync(dataContext).GetAwaiter().GetResult();

            var results = (ParseResult <Product>)dataContext.GetParseItem(typeof(Product).FullName);

            Assert.Equal(60, results.Count);
            Assert.Equal("手机", results[0].CategoryName);
            Assert.Equal(110, results[0].CategoryId);
            Assert.Equal("https://item.jd.com/3031737.html", results[0].Url);
            Assert.Equal("3031737", results[0].Sku);
            Assert.Equal("荣耀官方旗舰店", results[0].ShopName);
            Assert.Equal("荣耀 NOTE 8 4GB+32GB 全网通版 冰河银", results[0].Name);
            Assert.Equal("1000000904", results[0].VenderId);
            Assert.Equal("1000000904", results[0].JdzyShopId);
            Assert.Equal(DateTime.Now.ToString("yyyy-MM-dd"), results[0].RunId.ToString("yyyy-MM-dd"));
        }
예제 #2
0
        public Task <DataFlowResult> HandleAsync(DataFlowContext context)
        {
            int result = 0;

            IEnumerator enumerator = context.GetParseItem(typeof(IndexEntity).FullName).GetEnumerator();

            while (enumerator.MoveNext())
            {
                result = SaveDealIndex((IndexEntity)enumerator.Current);
                if (result == -1)
                {
                    break;
                }
            }

            if (result == -1)
            {
                IEventBus bus = (IEventBus)context.Services.GetService(typeof(IEventBus));
                bus.Publish(context.Response.Request.OwnerId, new Event()
                {
                    Type = Framework.ExitCommand
                });

                return(Task.FromResult(DataFlowResult.Terminated));
            }
            else
            {
                return(Task.FromResult(DataFlowResult.Success));
            }
        }
예제 #3
0
        public async Task <DataFlowResult> HandleAsync(DataFlowContext context)
        {
            IEnumerator enumerator = context.GetParseItem(typeof(InfoEntity).FullName).GetEnumerator();

            while (enumerator.MoveNext())
            {
                var info = (InfoEntity)enumerator.Current;
                var gps  = await GetGPSAsync(info.Community);

                info.Latitude   = gps.Item1;
                info.Longtitude = gps.Item2;
                SaveDealInfo((InfoEntity)enumerator.Current);
                Logger.LogInformation($"community: {info.Community}\tlat: {gps.Item1}\t lng: {gps.Item2}");
            }

            return(DataFlowResult.Success);
        }
예제 #4
0
        public void MultiEntitySelector()
        {
            var service     = SpiderFactory.CreateScopeServiceProvider();
            var dataContext = new DataFlowContext(new Response
            {
                Request = new Request("http://abcd.com"),
                RawText = Html
            }, service);

            var parser = new DataParser <E>();

            parser.HandleAsync(dataContext).GetAwaiter().GetResult();

            var results = (ParseResult <E>)dataContext.GetParseItem(typeof(E).FullName);

            Assert.Equal("a", results[0].title);
            Assert.Equal("b", results[1].title);
        }
예제 #5
0
        public void SingleEntitySelector()
        {
            var service     = SpiderProvider.Value.CreateScopeServiceProvider();
            var dataContext = new DataFlowContext(new Response
            {
                Request = new Request("http://abcd.com"),
                RawText = Html
            }, service);

            var parser = new DataParser <N>();


            parser.HandleAsync(dataContext).GetAwaiter().GetResult();

            var results = (ParseResult <N>)dataContext.GetParseItem(typeof(N).FullName);

            Assert.Equal("i am title", results[0].title);
            Assert.Equal("i am dotnetspider", results[0].dotnetspider);
        }
예제 #6
0
        protected override Task <DataFlowResult> Parse(DataFlowContext context)
        {
            if (!context.Contains(_model.TypeName))
            {
                context.Add(_model.TypeName, _tableMetadata);
            }

            var selectable = context.GetSelectable();
            var results    = new ParseResult <T>();

            if (selectable.Properties == null)
            {
                selectable.Properties = new Dictionary <string, object>();
            }

            var environments = new Dictionary <string, string>();

            foreach (var property in context.Response.Request.Properties)
            {
                environments.Add(property.Key, property.Value);
            }

            if (_model.ShareValueSelectors != null)
            {
                foreach (var selector in _model.ShareValueSelectors)
                {
                    string name  = selector.Name;
                    var    value = selectable.Select(selector.ToSelector()).GetValue();
                    if (!environments.ContainsKey(name))
                    {
                        environments.Add(name, value);
                    }
                    else
                    {
                        environments[name] = value;
                    }
                }
            }

            bool singleExtractor = _model.Selector == null;

            if (!singleExtractor)
            {
                var selector = _model.Selector.ToSelector();

                var list = selectable.SelectList(selector).Nodes()?.ToList();
                if (list != null)
                {
                    if (_model.Take > 0 && list.Count > _model.Take)
                    {
                        list = _model.TakeFromHead
                                                        ? list.Take(_model.Take).ToList()
                                                        : list.Skip(list.Count - _model.Take).ToList();
                    }

                    for (var i = 0; i < list.Count; ++i)
                    {
                        var item = list.ElementAt(i);
                        var obj  = ParseObject(environments, item, i);
                        if (obj != null)
                        {
                            results.Add(obj);
                        }
                        else
                        {
                            Logger?.LogWarning($"解析到空数据,类型: {_model.TypeName}");
                        }
                    }
                }
            }
            else
            {
                var obj = ParseObject(environments, selectable, 0);
                if (obj != null)
                {
                    results.Add(obj);
                }
                else
                {
                    Logger?.LogWarning($"解析到空数据,类型: {_model.TypeName}");
                }
            }

            if (results.Count > 0)
            {
                var items = context.GetParseItem(_model.TypeName);
                if (items == null)
                {
                    context.AddParseItem(_model.TypeName, results);
                }
                else
                {
                    ((ParseResult <T>)items).AddRange(results);
                }
            }

            return(Task.FromResult(DataFlowResult.Success));
        }