protected override Task <DataFlowResult> Parse(DataFlowContext context) { var typeName = typeof(News).FullName; context.AddData(typeName, new News { Url = context.Response.Request.Url, Title = context.Response.Request.Properties["title"], Summary = context.Response.Request.Properties["summary"], Views = int.Parse(context.Response.Request.Properties["views"]), Content = context.Selectable.Select(Selectors.XPath(".//div[@id='news_body']")).GetValue() }); return(Task.FromResult(DataFlowResult.Success)); }
protected override Task ParseAsync(DataFlowContext context) { var typeName = typeof(News).FullName; context.AddData(typeName, new News { Url = context.Request.RequestUri.ToString(), Title = context.Request.Properties["title"]?.ToString()?.Trim(), Summary = context.Request.Properties["summary"]?.ToString()?.Trim(), Views = int.Parse(context.Request.Properties["views"]?.ToString()?.Trim() ?? "0"), Content = context.Selectable.Select(Selectors.XPath(".//div[@id='news_body']")).Value ?.Trim() }); return(Task.CompletedTask); }
public async Task MultiPrimary() { using var conn = CreateConnection(); // 如果实体的 Schema 没有配置表名,则使用类名 await conn.ExecuteAsync( $"drop table if exists {Escape}test{Escape}.{Escape}createtablemultiprimay{Escape};"); var storage = CreateStorage(StorageMode.Insert); var context = new DataFlowContext(null, new SpiderOptions(), new Request(), new Response()); var typeName = typeof(CreateTableEntity8); var entity = new CreateTableEntity8(); var items = new List <CreateTableEntity8> { entity }; context.AddData(typeName, items); await storage.HandleAsync(context); var list = (await conn.QueryAsync <CreateTableEntity8>( $"SELECT * FROM {Escape}test{Escape}.{Escape}createtablemultiprimay{Escape}")) .ToList(); Assert.Single(list); entity = list.First(); Assert.Equal("xxx", entity.Str1); Assert.Equal("yyy", entity.Str2); Assert.Equal(655, entity.Required); Assert.Equal(0, entity.Decimal); Assert.Equal(600, entity.Long); Assert.Equal(400, entity.Double); Assert.Equal(200.0F, entity.Float); var primaries = (await conn.QueryAsync <PrimaryInfo>( $"SELECT t.CONSTRAINT_TYPE, c.COLUMN_NAME FROM INFORMATION_SCHEMA.TABLE_CONSTRAINTS AS t, INFORMATION_SCHEMA.KEY_COLUMN_USAGE AS c WHERE t.TABLE_NAME = c.TABLE_NAME AND t.TABLE_SCHEMA = 'test' AND t.CONSTRAINT_TYPE = 'PRIMARY KEY' AND t.TABLE_NAME='createtablemultiprimay';") ).ToList(); _testOutputHelper.WriteLine(JsonConvert.SerializeObject(primaries)); var columnNames = primaries.Select(x => x.COLUMN_NAME).ToList(); Assert.Equal(2, primaries.Count); Assert.Contains("str2", columnNames); Assert.Contains("decimal", columnNames); await conn.ExecuteAsync( $"drop table if exists {Escape}test{Escape}.{Escape}createtablemultiprimay{Escape};"); }
public async Task AutoIncPrimary() { using (var conn = CreateConnection()) { // 如果实体的 Schema 没有配置表名,则使用类名 await conn.ExecuteAsync( $"drop table if exists {Escape}test{Escape}.{Escape}createtableautoincprimay{Escape};"); { var storage = CreateStorage(StorageMode.Insert); var context = new DataFlowContext(null, new SpiderOptions(), new Request(), new Response()); var typeName = typeof(CreateTableEntity5); var entity = new CreateTableEntity5(); var items = new List <CreateTableEntity5> { entity, entity }; context.AddData(typeName, items); await storage.HandleAsync(context); var list = (await conn.QueryAsync <CreateTableEntity5>( $"SELECT * FROM {Escape}test{Escape}.{Escape}createtableautoincprimay{Escape}")) .ToList(); Assert.Equal(2, list.Count); entity = list.First(); Assert.Equal("xxx", entity.Str1); Assert.Equal("yyy", entity.Str2); Assert.Equal(655, entity.Required); Assert.Equal(0, entity.Decimal); Assert.Equal(600, entity.Long); Assert.Equal(400, entity.Double); Assert.Equal(200.0F, entity.Float); Assert.Equal(1, entity.Id); Assert.Equal(2, list[1].Id); var primaries = (await conn.QueryAsync <PrimaryInfo>( $"SELECT t.CONSTRAINT_TYPE, c.COLUMN_NAME FROM INFORMATION_SCHEMA.TABLE_CONSTRAINTS AS t, INFORMATION_SCHEMA.KEY_COLUMN_USAGE AS c WHERE t.TABLE_NAME = c.TABLE_NAME AND t.TABLE_SCHEMA = 'test' AND t.CONSTRAINT_TYPE = 'PRIMARY KEY' AND t.TABLE_NAME='createtableautoincprimay';") ).ToList(); Assert.Single(primaries); Assert.Equal("id", primaries[0].COLUMN_NAME); await conn.ExecuteAsync( $"drop table if exists {Escape}test{Escape}.{Escape}createtableautoincprimay{Escape};"); } } }
public async Task CreateTableAutoIncPrimary() { using (var conn = CreateConnection()) { // 如果实体的 Schema 没有配置表名,则使用类名 await conn.ExecuteAsync("drop table if exists test.dbo.createtableautoincprimay;"); { var storage = CreateStorage(StorageMode.Insert); var dfc = new DataFlowContext(null, null, null, null); var typeName = typeof(CreateTableEntity5); var entity = new CreateTableEntity5(); var items = new List <CreateTableEntity5> { entity, entity }; dfc.AddData(typeName, items); await storage.HandleAsync(dfc); var list = (await conn.QueryAsync <CreateTableEntity5>("SELECT * FROM test.dbo.createtableautoincprimay")) .ToList(); Assert.Equal(2, list.Count); entity = list.First(); Assert.Equal("xxx", entity.Str1); Assert.Equal("yyy", entity.Str2); Assert.Equal(655, entity.Required); Assert.Equal(0, entity.Decimal); Assert.Equal(600, entity.Long); Assert.Equal(400, entity.Double); Assert.Equal(200.0F, entity.Float); Assert.Equal(1, entity.Id); Assert.Equal(2, list[1].Id); var primaries = (await conn.QueryAsync <IndexInfo> (@"USE test; EXEC sp_pkeys @table_name='createtableautoincprimay'") ).ToList(); Assert.Single(primaries); Assert.Equal("id", primaries[0].COLUMN_NAME); Assert.Equal(1, primaries[0].KEY_SEQ); await conn.ExecuteAsync("drop table if exists test.dbo.createtableautoincprimay;"); } } }
public async Task Indexes() { using (var conn = CreateConnection()) { await conn.ExecuteAsync( $"drop table if exists {Escape}test{Escape}.{Escape}createtableindexes{Escape};"); { var storage = CreateStorage(StorageMode.Insert); var context = new DataFlowContext(null, new SpiderOptions(), new Request(), new Response()); var typeName = typeof(CreateTableEntity5); var entity = new CreateTableEntity9(); var items = new List <CreateTableEntity9> { entity }; context.AddData(typeName, items); await storage.HandleAsync(context); var indexes = (await conn.QueryAsync <IndexInfo> ("show index from test.createtableindexes") ).ToList(); Assert.Equal(6, indexes.Count); Assert.Contains(indexes, x => x.Key_name == "INDEX_STR1" && x.Non_unique == 1 && x.Column_name == "str1"); Assert.Contains(indexes, x => x.Key_name == "INDEX_STR1_STR2" && x.Non_unique == 1 && x.Column_name == "str1"); Assert.Contains(indexes, x => x.Key_name == "INDEX_STR1_STR2" && x.Non_unique == 1 && x.Column_name == "str2"); Assert.Contains(indexes, x => x.Key_name == "UNIQUE_STR3" && x.Non_unique == 0 && x.Column_name == "str3"); Assert.Contains(indexes, x => x.Key_name == "UNIQUE_STR3_STR4" && x.Non_unique == 0 && x.Column_name == "str3"); Assert.Contains(indexes, x => x.Key_name == "UNIQUE_STR3_STR4" && x.Non_unique == 0 && x.Column_name == "str4"); await conn.ExecuteAsync( $"drop table if exists {Escape}test{Escape}.{Escape}createtableindexes{Escape};"); } } }
protected virtual void AddParsedResult <T>(DataFlowContext context, IEnumerable <T> results) where T : EntityBase <T>, new() { if (results != null) { var type = typeof(T); var items = context.GetData(type); if (items == null) { var list = new List <T>(); list.AddRange(results); context.AddData(type, list); } else { items.AddRange(results); } } }
public async Task InsertAndUpdate() { using (var conn = CreateConnection()) { // 如果实体的 Schema 没有配置表名,则使用类名 await conn.ExecuteAsync( $"drop table if exists {Escape}test{Escape}.{Escape}createtableprimay{Escape};"); { var storage = CreateStorage(StorageMode.InsertAndUpdate); var context = new DataFlowContext(null, new SpiderOptions(), new Request(), new Response()); var typeName = typeof(CreateTableEntity4); var entity = new CreateTableEntity4(); var items = new List <CreateTableEntity4> { entity, new CreateTableEntity4 { Str1 = "zzz" } }; context.AddData(typeName, items); await storage.HandleAsync(context); var list = (await conn.QueryAsync <CreateTableEntity4>( $"SELECT * FROM {Escape}test{Escape}.{Escape}createtableprimay{Escape}")) .ToList(); Assert.Single(list); entity = list.First(); Assert.Equal("zzz", entity.Str1); Assert.Equal("yyy", entity.Str2); Assert.Equal(655, entity.Required); Assert.Equal(0, entity.Decimal); Assert.Equal(600, entity.Long); Assert.Equal(400, entity.Double); Assert.Equal(200.0F, entity.Float); await conn.ExecuteAsync( $"drop table if exists {Escape}test{Escape}.{Escape}createtableprimay{Escape};"); } } }
public async Task UseTransaction() { using (var conn = CreateConnection()) { // 如果实体的 Schema 没有配置表名,则使用类名 await conn.ExecuteAsync("drop table if exists test.dbo.createtableprimay;"); { var storage = (RelationalDatabaseEntityStorageBase)CreateStorage(StorageMode.InsertIgnoreDuplicate); storage.UseTransaction = true; var dfc = new DataFlowContext(null, null, null, null); var typeName = typeof(CreateTableEntity4); var entity = new CreateTableEntity4(); var items = new List <CreateTableEntity4> { entity, entity, entity }; dfc.AddData(typeName, items); await storage.HandleAsync(dfc); var list = (await conn.QueryAsync <CreateTableEntity4>("SELECT * FROM test.dbo.createtableprimay")) .ToList(); Assert.Single(list); entity = list.First(); Assert.Equal("xxx", entity.Str1); Assert.Equal("yyy", entity.Str2); Assert.Equal(655, entity.Required); Assert.Equal(0, entity.Decimal); Assert.Equal(600, entity.Long); Assert.Equal(400, entity.Double); Assert.Equal(200.0F, entity.Float); await conn.ExecuteAsync("drop table if exists test.dbo.createtableprimay;"); } } }
protected override async Task ParseAsync(DataFlowContext context) { var props = context.Request.Properties; if (!props.ContainsKey(REQUEST_CHECK_PROPERTY_NAME)) { return; } var type = props[REQUEST_CHECK_PROPERTY_NAME] as string; if (type != _parserName) { return; } var jsonStr = context.Response.ReadAsString(); var obj = JsonConvert.DeserializeObject <T>(jsonStr); context.AddData(typeof(T).Name, obj); OnHanlder(context, obj); }
protected override Task ParseAsync(DataFlowContext context) { context.AddData("URL", context.Request.RequestUri); context.AddData("Title", context.Selectable.XPath(".//title")?.Value); return(Task.CompletedTask); }
protected override Task <DataFlowResult> Parse(DataFlowContext context) { context.AddData("URL", context.Response.Request.Url); context.AddData("Title", context.Selectable.XPath(".//title").GetValue()); return(Task.FromResult(DataFlowResult.Success)); }
protected override Task <DataFlowResult> Parse(DataFlowContext context) { ISelectable selectable = context.Selectable; string next = selectable.XPath("//div[@class='ew-page']/a[last()]").GetValue().TrimEnd(">".ToCharArray()); // 解析数据 List <string> data = selectable.XPath("//li[@class='item']").GetValues(); if (null != data && data.Count > 0) { List <VideoInfo> videos = new List <VideoInfo>(); Selectable st1 = null; List <Request> reqs = new List <Request>(); foreach (string item in data) { st1 = new Selectable(item); //临时视频地址 string url = st1.XPath("//a/@href").GetValue(); VideoInfo video = new VideoInfo { Name = st1.XPath("//span[@class='s1']").GetValue(), Cover = st1.XPath("//img/@src").GetValue(), Year = st1.XPath("//span[@class='hint']").GetValue(), Description = st1.XPath("//p[@class='star']").GetValue(), IsPay = string.IsNullOrWhiteSpace(st1.XPath("//span[@class='pay']").GetValue()) || !st1.XPath("//span[@class='pay']").GetValue().Contains("付费") ? false : true, Type = 1, ParentUrl = url }; videos.Add(video); if (!string.IsNullOrWhiteSpace(video.ParentUrl)) { string tm = new JsHttpHelper().GetPageContent(video.ParentUrl); if (!string.IsNullOrWhiteSpace(tm)) { Selectable stt = new Selectable(tm); var urls = stt.XPath("//div[@class='top-list-zd g-clear']//a[@data-daochu]").GetValues(ValueOption.OuterHtml); foreach (var i in urls) { stt = new Selectable(i); string u = stt.XPath("//a/@href").GetValue(); if (!string.IsNullOrWhiteSpace(u)) { string n = stt.XPath("//a").GetValue(); video.Details.Add(new VideoDetail { PlayUrl = u, Number = "1", IsPay = video.IsPay, PlayName = n }); } } stt = null; } Request req = CreateFromRequest(context.Response.Request, url); req.DownloaderType = DownloaderType.WebDriver; reqs.Add(req); } //videos.AsParallel().ForAll(m => //{ // if (!string.IsNullOrWhiteSpace(m.ParentUrl)) // { // string tm = new JsHttpHelper().GetPageContent(m.ParentUrl); // if (!string.IsNullOrWhiteSpace(tm)) // { // Selectable stt = new Selectable(tm); // var urls = stt.XPath("//div[@class='top-list-zd g-clear']//a['@data-daochu']").GetValues(ValueOption.OuterHtml); // foreach (var i in urls) // { // stt = new Selectable(i); // string u = stt.XPath("//a/@href").GetValue(); // if (!string.IsNullOrWhiteSpace(u)) // { // string n = stt.XPath("//a").GetValue(); // video.Details.Add(new VideoDetail { PlayUrl = u, Number = "1", IsPay = video.IsPay, PlayName = n }); // } // } // stt = null; // } // //reqs.Add(CreateFromRequest(context.Response.Request, url)); // } //}); } st1 = null; context.AddData("v", videos); //if (reqs.Count > 0) //{ // context.AddExtraRequests(reqs.ToArray()); //} } //如果解析为空,跳过后续步骤(存储 etc) if (data == null || data.Count == 0) { context.ClearData(); return(Task.FromResult(DataFlowResult.Terminated)); } if (next != "下一页") { FollowRequestQuerier = null; } return(Task.FromResult(DataFlowResult.Success)); }