public void TargetUrlsSelector_1Region_1Pattern() { var entity1 = EntitySpider.GenerateEntityMetaData(typeof(Entity14).GetTypeInfo()); var processor = new EntityProcessor(new Site(), entity1); Assert.AreEqual(1, processor.GetTargetUrlPatterns("//*[@id=\"1111\"]").Count); Assert.AreEqual(@"&page=[0-9]+&", processor.GetTargetUrlPatterns("//*[@id=\"1111\"]")[0].ToString()); Assert.IsTrue(processor.GetTargetUrlPatterns("//*[@id=\"222\"]") == null); }
public void Indexes() { var entity1 = EntitySpider.GenerateEntityMetaData(typeof(Entity10).GetTypeInfo()); Assert.AreEqual("Id", entity1.Table.Indexs[0]); Assert.AreEqual("Name", entity1.Table.Primary); Assert.AreEqual(2, entity1.Table.Uniques.Length); Assert.AreEqual("Id,Name", entity1.Table.Uniques[0]); Assert.AreEqual("Id", entity1.Table.Uniques[1]); }
public void SetPrimary() { var entity1 = EntitySpider.GenerateEntityMetaData(typeof(Entity5).GetTypeInfo()); Assert.AreEqual(1, entity1.Entity.Fields.Count); Assert.AreEqual("Name", entity1.Entity.Fields[0].Name); var entity2 = EntitySpider.GenerateEntityMetaData(typeof(Entity6).GetTypeInfo()); Assert.AreEqual(1, entity2.Entity.Fields.Count); Assert.AreEqual("name", entity2.Entity.Fields[0].Name); }
public void Formater() { var entity1 = EntitySpider.GenerateEntityMetaData(typeof(Entity11).GetTypeInfo()); var formatters = ((Field)entity1.Entity.Fields[0]).Formatters; Assert.AreEqual(2, formatters.Count); var replaceFormatter = (ReplaceFormatter)formatters[0]; Assert.AreEqual("a", replaceFormatter.NewValue); Assert.AreEqual("b", replaceFormatter.OldValue); }
public void Indexes() { var entity1 = EntitySpider.GenerateEntityMetaData(typeof(Entity10).GetTypeInfo()); Assert.AreEqual("Id", entity1.Indexes[0][0]); Assert.AreEqual("name", entity1.Primary[0]); Assert.AreEqual(2, entity1.Uniques.Count); Assert.AreEqual("Id", entity1.Uniques[0][0]); Assert.AreEqual("Name", entity1.Uniques[0][1]); Assert.AreEqual("Id", entity1.Uniques[1][0]); }
public void Schema() { var entityMetadata = EntitySpider.GenerateEntityMetaData(typeof(Entity4).GetTypeInfo()); Assert.AreEqual("db", entityMetadata.Table.Database); Assert.AreEqual(EntitySpider.GenerateTableName("table", entityMetadata.Table.Suffix), entityMetadata.Table.Name); Assert.AreEqual(TableSuffix.Monday, entityMetadata.Table.Suffix); var entityMetadata1 = EntitySpider.GenerateEntityMetaData(typeof(Entity14).GetTypeInfo()); Assert.IsNull(entityMetadata1.Table); }
public void SetNotExistColumnToUnique() { try { var entityMetadata = EntitySpider.GenerateEntityMetaData(typeof(Entity3).GetTypeInfo()); TestPipeline pipeline = new TestPipeline(""); pipeline.AddEntity(entityMetadata); throw new Exception("Test failed"); } catch (SpiderException exception) { Assert.AreEqual("Columns set as unique is not a property of your entity.", exception.Message); } }
public void TempEntityNoPrimaryInfo() { var entityMetadata = EntitySpider.GenerateEntityMetaData(typeof(Entity1).GetTypeInfo()); EntityProcessor processor = new EntityProcessor(new Site(), entityMetadata); var page = new Page(new Request("http://www.abcd.com")) { Content = "{'data':[{'age':'1'},{'age':'2'}]}", ContentType = ContentType.Json }; processor.Process(page); Assert.AreEqual(2, page.ResultItems.GetResultItem("DotnetSpider.Extension.Test.EntityExtractorTest+Entity1").Count); }
public void TargetUrlsSelector_NullRegion_NullPattern() { try { var entity2 = EntitySpider.GenerateEntityMetaData(typeof(Entity24).GetTypeInfo()); var processor2 = new EntityProcessor(new Site(), entity2); } catch (ArgumentNullException e) { Assert.IsNotNull(e); return; } throw new Exception("Failed."); }
public void TargetUrlsSelector_Null() { try { var entity2 = EntitySpider.GenerateEntityMetaData(typeof(Entity15).GetTypeInfo()); var processor2 = new EntityProcessor(new Site(), entity2); } catch (Exception e) { Assert.AreEqual("Region xpath and patterns should not be null both.", e.Message); return; } throw new Exception("Failed."); }
public void UpdateConnectString() { ClearDb(); using (MySqlConnection conn = new MySqlConnection(ConnectString)) { conn.Execute("CREATE DATABASE IF NOT EXISTS `dotnetspider1` DEFAULT CHARACTER SET utf8;"); conn.Execute("CREATE TABLE IF NOT EXISTS `dotnetspider1`.`settings` (`id` int(11) NOT NULL AUTO_INCREMENT,`type` varchar(45) NOT NULL,`key` varchar(45) DEFAULT NULL,`value` text,PRIMARY KEY(`id`),UNIQUE KEY `UNIQUE` (`key`,`type`)) ENGINE=InnoDB AUTO_INCREMENT = 1 DEFAULT CHARSET = utf8;"); try { conn.Execute("INSERT `dotnetspider1`.`settings` (`value`,`type`,`key`) VALUES (\"Database='mysql';Data Source=127.0.0.1;User ID=root;Password=1qazZAQ!;Port=3306\",'ConnectString','MySql01')"); } catch (Exception) { // ignored } ISpider spider = new DefaultSpider("test", new Site()); MySqlEntityPipeline insertPipeline = new MySqlEntityPipeline(null) { UpdateConnectString = new DbUpdateConnectString { ConnectString = ConnectString, QueryString = "SELECT value from `dotnetspider1`.`settings` where `type`='ConnectString' and `key`='MySql01' LIMIT 1" } }; var metadata = EntitySpider.GenerateEntityMetaData(typeof(ProductInsert).GetTypeInfo()); insertPipeline.AddEntity(metadata); insertPipeline.InitPipeline(spider); JObject data1 = new JObject { { "Sku", "110" }, { "Category", "3C" }, { "Url", "http://jd.com/110" }, { "CDate", "2016-08-13" } }; JObject data2 = new JObject { { "Sku", "111" }, { "Category", "3C" }, { "Url", "http://jd.com/111" }, { "CDate", "2016-08-13" } }; insertPipeline.Process(metadata.Name, new List <JObject> { data1, data2 }); var list = conn.Query <ProductInsert>($"select * from test.sku_{DateTime.Now.ToString("yyyy_MM_dd")}").ToList(); Assert.AreEqual(2, list.Count); Assert.AreEqual("110", list[0].Sku); Assert.AreEqual("111", list[1].Sku); conn.Execute("DROP DATABASE IF EXISTS `dotnetspider1`"); } ClearDb(); }
public void HandlerWhenExtractZeroResult() { var entityMetadata = EntitySpider.GenerateEntityMetaData(typeof(Product).GetTypeInfo()); var identity = Guid.NewGuid().ToString("N"); entityMetadata.DataHandler = new MyDataHanlder(identity); EntityProcessor processor = new EntityProcessor(new Site(), entityMetadata); processor.Process(new Page(new Request("http://www.abcd.com")) { Content = "{}", ContentType = ContentType.Json }); Assert.IsTrue(File.Exists(identity)); File.Delete(identity); }
public void DefineUpdateEntity() { SqlServerEntityPipeline insertPipeline = new SqlServerEntityPipeline(ConnectString); try { insertPipeline.AddEntity(EntitySpider.GenerateEntityMetaData(typeof(UpdateEntity1).GetTypeInfo())); throw new SpiderException("TEST FAILED."); } catch (SpiderException e) { Assert.AreEqual("Columns set as Primary is not a property of your entity.", e.Message); } try { insertPipeline.AddEntity(EntitySpider.GenerateEntityMetaData(typeof(UpdateEntity2).GetTypeInfo())); throw new SpiderException("TEST FAILED."); } catch (SpiderException e) { Assert.AreEqual("Columns set as update is not a property of your entity.", e.Message); } try { insertPipeline.AddEntity(EntitySpider.GenerateEntityMetaData(typeof(UpdateEntity3).GetTypeInfo())); throw new SpiderException("TEST FAILED."); } catch (SpiderException e) { Assert.AreEqual("There is no column need update.", e.Message); } var metadata = EntitySpider.GenerateEntityMetaData(typeof(UpdateEntity4).GetTypeInfo()); insertPipeline.AddEntity(EntitySpider.GenerateEntityMetaData(typeof(UpdateEntity4).GetTypeInfo())); Assert.AreEqual(1, insertPipeline.GetUpdateColumns(metadata.Name).Length); Assert.AreEqual("Value", insertPipeline.GetUpdateColumns(metadata.Name).First()); SqlServerEntityPipeline insertPipeline2 = new SqlServerEntityPipeline(ConnectString); var metadata2 = EntitySpider.GenerateEntityMetaData(typeof(UpdateEntity5).GetTypeInfo()); insertPipeline2.AddEntity(metadata2); Assert.AreEqual(1, insertPipeline2.GetUpdateColumns(metadata2.Name).Length); Assert.AreEqual("Value", insertPipeline2.GetUpdateColumns(metadata2.Name).First()); }
public void DefineUpdateEntity() { MsSqlEntityPipeline insertPipeline = new MsSqlEntityPipeline(ConnectString, PipelineMode.Update); IEntityPipeline pipeline; try { insertPipeline.InitEntity(EntitySpider.GenerateEntityMetaData(typeof(UpdateEntity1).GetTypeInfo())); throw new SpiderException("TEST FAILED."); } catch (SpiderException e) { Assert.AreEqual("Columns set as primary is not a property of your entity.", e.Message); } try { insertPipeline.InitEntity(EntitySpider.GenerateEntityMetaData(typeof(UpdateEntity2).GetTypeInfo())); throw new SpiderException("TEST FAILED."); } catch (SpiderException e) { Assert.AreEqual("Columns set as update is not a property of your entity.", e.Message); } try { insertPipeline.InitEntity(EntitySpider.GenerateEntityMetaData(typeof(UpdateEntity3).GetTypeInfo())); throw new SpiderException("TEST FAILED."); } catch (SpiderException e) { Assert.AreEqual("There is no column need update.", e.Message); } insertPipeline.InitEntity(EntitySpider.GenerateEntityMetaData(typeof(UpdateEntity4).GetTypeInfo())); Assert.AreEqual(1, insertPipeline.GetUpdateColumns().Count); Assert.AreEqual("value", insertPipeline.GetUpdateColumns().First().Name); MsSqlEntityPipeline insertPipeline2 = new MsSqlEntityPipeline(ConnectString, PipelineMode.Update); insertPipeline2.InitEntity(EntitySpider.GenerateEntityMetaData(typeof(UpdateEntity5).GetTypeInfo())); Assert.AreEqual(1, insertPipeline2.GetUpdateColumns().Count); Assert.AreEqual("value", insertPipeline2.GetUpdateColumns().First().Name); }
public void UpdateWhenUnionPrimaryCheckIfSameBeforeUpdate() { ClearDb(); using (MySqlConnection conn = new MySqlConnection(ConnectString)) { ISpider spider = new DefaultSpider("test", new Site()); MySqlEntityPipeline insertPipeline = new MySqlEntityPipeline(ConnectString); var metadata = EntitySpider.GenerateEntityMetaData(typeof(Product2Insert).GetTypeInfo()); insertPipeline.AddEntity(metadata); insertPipeline.InitPipeline(spider); JObject data1 = new JObject { { "Sku", "110" }, { "Category1", "4C" }, { "Category", "3C" }, { "Url", "http://jd.com/110" }, { "CDate", "2016-08-13" } }; JObject data2 = new JObject { { "Sku", "111" }, { "Category1", "4C" }, { "Category", "3C" }, { "Url", "http://jd.com/111" }, { "CDate", "2016-08-13" } }; insertPipeline.Process(metadata.Name, new List <JObject> { data1, data2 }); MySqlEntityPipeline updatePipeline = new MySqlEntityPipeline(ConnectString, true); var metadata2 = EntitySpider.GenerateEntityMetaData(typeof(Product2Update).GetTypeInfo()); updatePipeline.AddEntity(metadata2); updatePipeline.InitPipeline(spider); JObject data3 = new JObject { { "Sku", "110" }, { "Category1", "4C" }, { "Category", "AAAA" }, { "Url", "http://jd.com/110" }, { "CDate", "2016-08-13" } }; updatePipeline.Process(metadata2.Name, new List <JObject> { data3 }); var list = conn.Query <Product2Insert>($"select * from test.sku2_{DateTime.Now.ToString("yyyy_MM_dd")}").ToList(); Assert.AreEqual(2, list.Count); Assert.AreEqual("110", list[0].Sku); Assert.AreEqual("AAAA", list[0].Category); } ClearDb(); }
public void Update() { ClearDb(); using (SqlConnection conn = new SqlConnection(ConnectString)) { ISpider spider = new DefaultSpider("test", new Site()); SqlServerEntityPipeline insertPipeline = new SqlServerEntityPipeline(ConnectString); var metadata = EntitySpider.GenerateEntityMetaData(typeof(ProductInsert).GetTypeInfo()); insertPipeline.AddEntity(metadata); insertPipeline.InitPipeline(spider); JObject data1 = new JObject { { "Sku", "110" }, { "Category", "3C" }, { "Url", "http://jd.com/110" }, { "CDate", "2016-08-13" } }; JObject data2 = new JObject { { "Sku", "111" }, { "Category", "3C" }, { "Url", "http://jd.com/111" }, { "CDate", "2016-08-13" } }; insertPipeline.Process(metadata.Entity.Name, new List <JObject> { data1, data2 }); SqlServerEntityPipeline updatePipeline = new SqlServerEntityPipeline(ConnectString); var metadat2 = EntitySpider.GenerateEntityMetaData(typeof(ProductUpdate).GetTypeInfo()); updatePipeline.AddEntity(metadat2); updatePipeline.InitPipeline(spider); JObject data3 = new JObject { { "Sku", "110" }, { "Category", "4C" }, { "Url", "http://jd.com/110" }, { "CDate", "2016-08-13" } }; updatePipeline.Process(metadat2.Entity.Name, new List <JObject> { data3 }); var list = conn.Query <ProductInsert>($"use test;select * from sku_{DateTime.Now.ToString("yyyy_MM_dd")}").ToList(); Assert.AreEqual(2, list.Count); Assert.AreEqual("110", list[0].Sku); Assert.AreEqual("4C", list[0].Category); } ClearDb(); }
public void EntitySelector() { var entity1 = EntitySpider.GenerateEntityMetaData(typeof(Entity7).GetTypeInfo()); Assert.AreEqual("expression", entity1.Entity.Selector.Expression); Assert.AreEqual(SelectorType.XPath, entity1.Entity.Selector.Type); Assert.IsTrue(entity1.Entity.Multi); var entity2 = EntitySpider.GenerateEntityMetaData(typeof(Entity8).GetTypeInfo()); Assert.AreEqual("expression2", entity2.Entity.Selector.Expression); Assert.AreEqual(SelectorType.Css, entity2.Entity.Selector.Type); Assert.IsTrue(entity2.Entity.Multi); var entity3 = EntitySpider.GenerateEntityMetaData(typeof(Entity9).GetTypeInfo()); Assert.IsFalse(entity3.Entity.Multi); Assert.IsNull(entity3.Entity.Selector); Assert.AreEqual("DotnetSpider.Extension.Test.EntitySpiderTest2+Entity9", entity3.Entity.Name); }
public void UpdateWhenUnionPrimary() { ClearDb(); using (SqlConnection conn = new SqlConnection(ConnectString)) { ISpider spider = new DefaultSpider("test", new Site()); MsSqlEntityPipeline insertPipeline = new MsSqlEntityPipeline(ConnectString); insertPipeline.InitEntity(EntitySpider.GenerateEntityMetaData(typeof(Product2).GetTypeInfo())); insertPipeline.InitPipeline(spider); JObject data1 = new JObject { { "sku", "110" }, { "category1", "4C" }, { "category", "3C" }, { "url", "http://jd.com/110" }, { "cdate", "2016-08-13" } }; JObject data2 = new JObject { { "sku", "111" }, { "category1", "4C" }, { "category", "3C" }, { "url", "http://jd.com/111" }, { "cdate", "2016-08-13" } }; insertPipeline.Process(new List <JObject> { data1, data2 }); MsSqlEntityPipeline updatePipeline = new MsSqlEntityPipeline(ConnectString, PipelineMode.Update); updatePipeline.InitEntity(EntitySpider.GenerateEntityMetaData(typeof(Product2).GetTypeInfo())); updatePipeline.InitPipeline(spider); JObject data3 = new JObject { { "sku", "110" }, { "category1", "4C" }, { "category", "AAAA" }, { "url", "http://jd.com/110" }, { "cdate", "2016-08-13" } }; updatePipeline.Process(new List <JObject> { data3 }); var list = conn.Query <Product2>($"use test;select * from sku2_{DateTime.Now.ToString("yyyy_MM_dd")}").ToList(); Assert.AreEqual(2, list.Count); Assert.AreEqual("110", list[0].Sku); Assert.AreEqual("AAAA", list[0].Category); } ClearDb(); }
public void Insert() { ClearDb(); using (MySqlConnection conn = new MySqlConnection(ConnectString)) { ISpider spider = new DefaultSpider("test", new Site()); MySqlEntityPipeline insertPipeline = new MySqlEntityPipeline(ConnectString); var metadata = EntitySpider.GenerateEntityMetaData(typeof(ProductInsert).GetTypeInfo()); insertPipeline.AddEntity(metadata); insertPipeline.InitPipeline(spider); // Common data JObject data1 = new JObject { { "Sku", "110" }, { "Category", "3C" }, { "Url", "http://jd.com/110" }, { "CDate", "2016-08-13" } }; JObject data2 = new JObject { { "Sku", "111" }, { "Category", "3C" }, { "Url", "http://jd.com/111" }, { "CDate", "2016-08-13" } }; // Value is null JObject data3 = new JObject { { "Sku", "112" }, { "Category", null }, { "Url", "http://jd.com/111" }, { "CDate", "2016-08-13" } }; insertPipeline.Process(metadata.Name, new List <JObject> { data1, data2, data3 }); var list = conn.Query <ProductInsert>($"select * from test.sku_{DateTime.Now.ToString("yyyy_MM_dd")}").ToList(); Assert.AreEqual(3, list.Count); Assert.AreEqual("110", list[0].Sku); Assert.AreEqual("111", list[1].Sku); Assert.AreEqual(null, list[2].Category); } ClearDb(); }
public void Extract() { var entityMetadata = EntitySpider.GenerateEntityMetaData(typeof(Product).GetTypeInfo()); EntityExtractor extractor = new EntityExtractor("test", null, entityMetadata); var results = extractor.Extract(new Page(new Request("http://list.jd.com/list.html?cat=9987,653,655&page=2&JL=6_0_0&ms=5#J_main", new Dictionary <string, dynamic> { { "cat", "手机" }, { "cat3", "110" } }), ContentType.Html, null) { Content = File.ReadAllText(Path.Combine(SpiderConsts.BaseDirectory, "Jd.html")) }); Assert.AreEqual(60, results.Count); Assert.AreEqual("手机", results[0].GetValue("CategoryName")); Assert.AreEqual("110", results[0].GetValue("CategoryId")); Assert.AreEqual("http://item.jd.com/3031737.html", results[0].GetValue("Url")); Assert.AreEqual("3031737", results[0].GetValue("Sku")); Assert.AreEqual("荣耀官方旗舰店", results[0].GetValue("ShopName")); Assert.AreEqual("荣耀 NOTE 8 4GB+32GB 全网通版 冰河银", results[0].GetValue("Name")); Assert.AreEqual("1000000904", results[0].GetValue("VenderId")); Assert.AreEqual("1000000904", results[0].GetValue("JdzyShopId")); Assert.AreEqual(DateTime.Now.ToString("yyyy_MM_dd"), results[0].GetValue("RunId")); }