public void ParseEntity() { var service = SpiderFactory.CreateScopeServiceProvider(); var dataContext = new DataFlowContext(new Response { Request = new Request("https://list.jd.com/list.html?cat=9987,653,655", new Dictionary <string, string> { { "cat", "手机" }, { "cat3", "110" } }), RawText = File.ReadAllText("Jd.html") }, service); DataParser <Product> extractor = new DataParser <Product>(); extractor.HandleAsync(dataContext).GetAwaiter().GetResult(); var results = (ParseResult <Product>)dataContext.GetParseItem(typeof(Product).FullName); Assert.Equal(60, results.Count); Assert.Equal("手机", results[0].CategoryName); Assert.Equal(110, results[0].CategoryId); Assert.Equal("https://item.jd.com/3031737.html", results[0].Url); Assert.Equal("3031737", results[0].Sku); Assert.Equal("荣耀官方旗舰店", results[0].ShopName); Assert.Equal("荣耀 NOTE 8 4GB+32GB 全网通版 冰河银", results[0].Name); Assert.Equal("1000000904", results[0].VenderId); Assert.Equal("1000000904", results[0].JdzyShopId); Assert.Equal(DateTime.Now.ToString("yyyy-MM-dd"), results[0].RunId.ToString("yyyy-MM-dd")); }
public Task <DataFlowResult> HandleAsync(DataFlowContext context) { int result = 0; IEnumerator enumerator = context.GetParseItem(typeof(IndexEntity).FullName).GetEnumerator(); while (enumerator.MoveNext()) { result = SaveDealIndex((IndexEntity)enumerator.Current); if (result == -1) { break; } } if (result == -1) { IEventBus bus = (IEventBus)context.Services.GetService(typeof(IEventBus)); bus.Publish(context.Response.Request.OwnerId, new Event() { Type = Framework.ExitCommand }); return(Task.FromResult(DataFlowResult.Terminated)); } else { return(Task.FromResult(DataFlowResult.Success)); } }
public async Task <DataFlowResult> HandleAsync(DataFlowContext context) { IEnumerator enumerator = context.GetParseItem(typeof(InfoEntity).FullName).GetEnumerator(); while (enumerator.MoveNext()) { var info = (InfoEntity)enumerator.Current; var gps = await GetGPSAsync(info.Community); info.Latitude = gps.Item1; info.Longtitude = gps.Item2; SaveDealInfo((InfoEntity)enumerator.Current); Logger.LogInformation($"community: {info.Community}\tlat: {gps.Item1}\t lng: {gps.Item2}"); } return(DataFlowResult.Success); }
public void MultiEntitySelector() { var service = SpiderFactory.CreateScopeServiceProvider(); var dataContext = new DataFlowContext(new Response { Request = new Request("http://abcd.com"), RawText = Html }, service); var parser = new DataParser <E>(); parser.HandleAsync(dataContext).GetAwaiter().GetResult(); var results = (ParseResult <E>)dataContext.GetParseItem(typeof(E).FullName); Assert.Equal("a", results[0].title); Assert.Equal("b", results[1].title); }
public void SingleEntitySelector() { var service = SpiderProvider.Value.CreateScopeServiceProvider(); var dataContext = new DataFlowContext(new Response { Request = new Request("http://abcd.com"), RawText = Html }, service); var parser = new DataParser <N>(); parser.HandleAsync(dataContext).GetAwaiter().GetResult(); var results = (ParseResult <N>)dataContext.GetParseItem(typeof(N).FullName); Assert.Equal("i am title", results[0].title); Assert.Equal("i am dotnetspider", results[0].dotnetspider); }
protected override Task <DataFlowResult> Parse(DataFlowContext context) { if (!context.Contains(_model.TypeName)) { context.Add(_model.TypeName, _tableMetadata); } var selectable = context.GetSelectable(); var results = new ParseResult <T>(); if (selectable.Properties == null) { selectable.Properties = new Dictionary <string, object>(); } var environments = new Dictionary <string, string>(); foreach (var property in context.Response.Request.Properties) { environments.Add(property.Key, property.Value); } if (_model.ShareValueSelectors != null) { foreach (var selector in _model.ShareValueSelectors) { string name = selector.Name; var value = selectable.Select(selector.ToSelector()).GetValue(); if (!environments.ContainsKey(name)) { environments.Add(name, value); } else { environments[name] = value; } } } bool singleExtractor = _model.Selector == null; if (!singleExtractor) { var selector = _model.Selector.ToSelector(); var list = selectable.SelectList(selector).Nodes()?.ToList(); if (list != null) { if (_model.Take > 0 && list.Count > _model.Take) { list = _model.TakeFromHead ? list.Take(_model.Take).ToList() : list.Skip(list.Count - _model.Take).ToList(); } for (var i = 0; i < list.Count; ++i) { var item = list.ElementAt(i); var obj = ParseObject(environments, item, i); if (obj != null) { results.Add(obj); } else { Logger?.LogWarning($"解析到空数据,类型: {_model.TypeName}"); } } } } else { var obj = ParseObject(environments, selectable, 0); if (obj != null) { results.Add(obj); } else { Logger?.LogWarning($"解析到空数据,类型: {_model.TypeName}"); } } if (results.Count > 0) { var items = context.GetParseItem(_model.TypeName); if (items == null) { context.AddParseItem(_model.TypeName, results); } else { ((ParseResult <T>)items).AddRange(results); } } return(Task.FromResult(DataFlowResult.Success)); }