protected override Task <DataFlowResult> Store(DataFlowContext context) { var items = context.GetData(); Console.WriteLine(JsonConvert.SerializeObject(items)); return(Task.FromResult(DataFlowResult.Success)); }
public override async Task HandleAsync(DataFlowContext context) { if (IsNullOrEmpty(context)) { Logger.LogWarning("数据流上下文不包含实体解析结果"); return; } var data = context.GetData(); var result = new Dictionary <Type, ICollection <dynamic> >(); foreach (var kv in data) { var type = kv.Key as Type; if (type == null || !_baseType.IsAssignableFrom(type)) { continue; } if (kv.Value is IEnumerable list) { foreach (var obj in list) { AddResult(result, type, obj); } } else { AddResult(result, type, kv.Value); } } await HandleAsync(context, result); }
protected override async Task <DataFlowResult> Store(DataFlowContext context) { var items = context.GetData(); foreach (var item in items) { var tableMetadata = (TableMetadata)context[item.Key]; if (!_cache.ContainsKey(tableMetadata.Schema.Database)) { _cache.TryAdd(tableMetadata.Schema.Database, _client.GetDatabase(tableMetadata.Schema.Database)); } var db = _cache[tableMetadata.Schema.Database]; var collection = db.GetCollection <BsonDocument>(tableMetadata.Schema.Table); var bsonDocs = new List <BsonDocument>(); foreach (var data in item.Value) { bsonDocs.Add(BsonDocument.Create(data)); } await collection.InsertManyAsync(bsonDocs); } return(DataFlowResult.Success); }
public override async Task HandleAsync(DataFlowContext context) { if (context.IsEmpty) { Logger.LogWarning("数据流上下文不包含实体解析结果"); return; } var data = context.GetData(); var dict = new Dictionary <Type, List <dynamic> >(); foreach (var d in data) { var type = d.Key as Type; if (type == null || !_baseType.IsAssignableFrom(type)) { continue; } if (d.Value is IEnumerable list) { foreach (var obj in list) { InsertData(dict, type, obj); } } else { InsertData(dict, type, d.Value); } } await StoreAsync(context, dict); }
protected override Task StoreAsync(DataFlowContext context) { var items = context.GetData().Where(x => !ReferenceEquals(x.Key, Consts.ResponseBytes)); Console.ForegroundColor = ConsoleColor.Cyan; Console.WriteLine($"{Environment.NewLine}DATA: {JsonConvert.SerializeObject(items)}{Environment.NewLine}"); return(Task.CompletedTask); }
protected override async Task StoreAsync(DataFlowContext context) { var file = Path.Combine(GetDataFolder(context.Request.Owner), $"{context.Request.Hash}.json"); using var writer = OpenWrite(file); var items = context.GetData(); await writer.WriteLineAsync(JsonConvert.SerializeObject(items)); }
protected override Task StoreAsync(DataFlowContext context) { var items = context.GetData(); Console.ForegroundColor = ConsoleColor.Cyan; Console.WriteLine($"{Environment.NewLine}DATA: {JsonConvert.SerializeObject(items)}{Environment.NewLine}"); return(Task.CompletedTask); }
protected override async Task StoreAsync(DataFlowContext context) { var file = Path.Combine(GetDataFolder(context.Request.Owner), $"{context.Request.Hash}.json"); using var writer = OpenWrite(file); var items = context.GetData().Where(x => !ReferenceEquals(x.Key, Consts.ResponseBytes)); await writer.WriteLineAsync(JsonConvert.SerializeObject(items)); }
protected override async Task <DataFlowResult> Store(DataFlowContext context) { var items = context.GetData(); var file = Path.Combine(GetDataFolder(context.Response.Request.OwnerId), $"{context.Response.Request.Hash}.json"); CreateFile(file); await Writer.WriteLineAsync(JsonConvert.SerializeObject(items)); return(DataFlowResult.Success); }
protected override Task StoreAsync(DataFlowContext context) { var typeName = typeof(News).FullName; var data = context.GetData(typeName); if (data is News news) { Console.WriteLine($"URL: {news.Url}, TITLE: {news.Title}, VIEWS: {news.Views}"); } return(Task.CompletedTask); }
protected override Task <DataFlowResult> Store(DataFlowContext context) { var items = context.GetData(); foreach (var item in items) { foreach (var data in item.Value) { Console.WriteLine(JsonConvert.SerializeObject(data)); } } return(Task.FromResult(DataFlowResult.Success)); }
protected override async Task StoreAsync(DataFlowContext context) { var typeName = typeof(News).FullName; var data = (News)context.GetData(typeName); if (data != null) { await using var conn = new MySqlConnection( "Database='mysql';Data Source=localhost;password=1qazZAQ!;User ID=root;Port=3306;"); await conn.ExecuteAsync( $"INSERT IGNORE INTO cnblogs2.news2 (title, url, summary, views, content) VALUES (@Title, @Url, @Summary, @Views, @Content);", data); } }
public async Task MultiEntitySelector() { var request = new Request("http://abcd.com"); var dataContext = new DataFlowContext(null, new SpiderOptions(), request, new Response { Content = new ByteArrayContent(Encoding.UTF8.GetBytes(Html)) }); var parser = new DataParser <E>(); await parser.HandleAsync(dataContext); var results = (List <E>)dataContext.GetData(typeof(E)); Assert.Equal("a", results[0].title); Assert.Equal("b", results[1].title); }
public override Task HandleAsync(DataFlowContext context) { if (IsNullOrEmpty(context)) { Logger.LogWarning("数据流上下文不包含解析结果"); return(Task.CompletedTask); } var typeName = typeof(News).FullName; var data = context.GetData(typeName); if (data is News news) { Console.WriteLine($"URL: {news.Url}, TITLE: {news.Title}, VIEWS: {news.Views}"); } return(Task.CompletedTask); }
protected override async Task StoreAsync(DataFlowContext context) { var id = context.Request.Owner; var table = $"dotnet_spider:response_{id}"; _tableCreatedDict.GetOrAdd(table, t => { EnsureDatabaseAndTableCreated(context, t); return(true); }); var hash = context.Request.Hash; var bytes = context.GetData(Consts.ResponseBytes); var data = Convert.ToBase64String(bytes); var httpClient = context.ServiceProvider.GetRequiredService <IHttpClientFactory>().CreateClient(_rest); for (var i = 0; i < 3; ++i) { try { var httpRequestMessage = new HttpRequestMessage(HttpMethod.Put, $"{_rest}{table}/row"); httpRequestMessage.Headers.TryAddWithoutValidation("Accept", "application/json"); var rowKey = hash.ToBase64String(); var body = "{\"Row\":[{\"key\":\"" + rowKey + "\", \"Cell\": [{\"column\":\"" + _columnName + "\", \"$\":\"" + data + "\"}]}]}"; var content = new StringContent(body, Encoding.UTF8, "application/json"); httpRequestMessage.Content = content; var res = await httpClient.SendAsync(httpRequestMessage); res.EnsureSuccessStatusCode(); } catch (Exception ex) { Logger.LogError($"Store {context.Request.RequestUri} response to HBase failed [{i}]: {ex}"); } } }
protected virtual void AddParsedResult <T>(DataFlowContext context, IEnumerable <T> results) where T : EntityBase <T>, new() { if (results != null) { var type = typeof(T); var items = context.GetData(type); if (items == null) { var list = new List <T>(); list.AddRange(results); context.AddData(type, list); } else { items.AddRange(results); } } }
public async Task SingleEntitySelector() { var request = new Request("http://abcd.com"); var dataContext = new DataFlowContext(null, new SpiderOptions(), request, new Response { Content = new ByteArrayContent(Encoding.UTF8.GetBytes(Html)) }); var parser = new DataParser <N>(); parser.UseHtmlSelectableBuilder(); await parser.HandleAsync(dataContext); var results = (List <N>)dataContext.GetData(typeof(N)); Assert.Equal("i am title", results[0].title); Assert.Equal("i am dotnetspider", results[0].dotnetspider); }
public override async Task HandleAsync(DataFlowContext context) { if (IsNullOrEmpty(context)) { Logger.LogWarning("数据流上下文不包含解析结果"); return; } var typeName = typeof(News).FullName; var data = (News)context.GetData(typeName); if (data != null) { await using var conn = new MySqlConnection( "Database='mysql';Data Source=localhost;password=1qazZAQ!;User ID=root;Port=3306;"); await conn.ExecuteAsync( $"INSERT IGNORE INTO cnblogs2.news2 (title, url, summary, views, content) VALUES (@Title, @Url, @Summary, @Views, @Content);", data); } }
public async Task ParseEntity() { var request = new Request("https://list.jd.com/list.html?cat=9987,653,655", new Dictionary <string, object> { { "cat", "手机" }, { "cat3", "110" } }); var dataContext = new DataFlowContext(null, new SpiderOptions(), request, new Response { Content = new ByteArrayContent(File.ReadAllBytes("Jd.html")) }); var parser = new DataParser <Product>(); await parser.InitializeAsync(); parser.UseHtmlSelectableBuilder(); await parser.HandleAsync(dataContext); var results = (List <Product>)dataContext.GetData(typeof(Product)); Assert.Equal(60, results.Count); Assert.Contains("手机商品筛选", results[0].Title); Assert.Contains("手机商品筛选", results[1].Title); Assert.Contains("手机商品筛选", results[2].Title); Assert.Equal("手机", results[0].CategoryName); Assert.Equal(110, results[0].CategoryId); Assert.Equal("https://item.jd.com/3031737.html", results[0].Url); Assert.Equal("3031737", results[0].Sku); Assert.Equal("荣耀官方旗舰店", results[0].ShopName); Assert.Equal("荣耀 NOTE 8 4GB+32GB 全网通版 冰河银", results[0].Name); Assert.Equal("1000000904", results[0].VenderId); Assert.Equal("1000000904", results[0].JdzyShopId); Assert.Equal(DateTimeOffset.Now.ToString("yyyy-MM-dd"), results[0].RunId.ToString("yyyy-MM-dd")); var requests = dataContext.FollowRequests; Assert.Equal(7, requests.Count); }