Ejemplo n.º 1
0
        public void Insert()
        {
            ClearDb();

            using (MySqlConnection conn = new MySqlConnection("Database='mysql';Data Source=127.0.0.1;User ID=root;Password=1qazZAQ!;Port=3306"))
            {
                ISpider spider = new DefaultSpider("test", new Site());

                MySqlEntityPipeline insertPipeline = new MySqlEntityPipeline("Database='mysql';Data Source=127.0.0.1;User ID=root;Password=1qazZAQ!;Port=3306");
                insertPipeline.InitiEntity(EntitySpider.ParseEntityMetaData(typeof(Product).GetTypeInfo()));
                insertPipeline.InitPipeline(spider);

                JObject data1 = new JObject {
                    { "sku", "110" }, { "category", "3C" }, { "url", "http://jd.com/110" }, { "cdate", "2016-08-13" }
                };
                JObject data2 = new JObject {
                    { "sku", "111" }, { "category", "3C" }, { "url", "http://jd.com/111" }, { "cdate", "2016-08-13" }
                };
                insertPipeline.Process(new List <JObject> {
                    data1, data2
                });

                var list = conn.Query <Product>($"select * from test.sku_{DateTime.Now.ToString("yyyy_MM_dd")}").ToList();
                Assert.Equal(2, list.Count);
                Assert.Equal("110", list[0].Sku);
                Assert.Equal("111", list[1].Sku);
            }

            ClearDb();
        }
Ejemplo n.º 2
0
        public void TargetUrlsSelector_1Region_1Pattern()
        {
            var entity1   = EntitySpider.ParseEntityMetaData(typeof(Entity14).GetTypeInfo());
            var processor = new EntityProcessor(new Site(), entity1);

            Assert.AreEqual(1, processor.GetTargetUrlPatterns("//*[@id=\"1111\"]").Count);
            Assert.AreEqual(@"&page=[0-9]+&", processor.GetTargetUrlPatterns("//*[@id=\"1111\"]")[0].ToString());
            Assert.IsTrue(processor.GetTargetUrlPatterns("//*[@id=\"222\"]") == null);
        }
Ejemplo n.º 3
0
        public void SetPrimary()
        {
            var entity1 = EntitySpider.ParseEntityMetaData(typeof(Entity5).GetTypeInfo());

            Assert.Equal(1, entity1.Entity.Fields.Count);
            Assert.Equal("Name", entity1.Entity.Fields[0].Name);
            var entity2 = EntitySpider.ParseEntityMetaData(typeof(Entity6).GetTypeInfo());

            Assert.Equal(1, entity2.Entity.Fields.Count);
            Assert.Equal("name", entity2.Entity.Fields[0].Name);
        }
Ejemplo n.º 4
0
        public void Formater()
        {
            var entity1    = EntitySpider.ParseEntityMetaData(typeof(Entity11).GetTypeInfo());
            var formatters = ((Field)entity1.Entity.Fields[1]).Formatters;

            Assert.Equal(2, formatters.Count);
            var replaceFormatter = (ReplaceFormatter)formatters[0];

            Assert.Equal("a", replaceFormatter.NewValue);
            Assert.Equal("b", replaceFormatter.OldValue);
        }
Ejemplo n.º 5
0
        public void TargetUrlsSelector_Multi_2SameRegion_2Pattern()
        {
            var entity1   = EntitySpider.ParseEntityMetaData(typeof(Entity20).GetTypeInfo());
            var processor = new EntityProcessor(new Site(), entity1);

            Assert.Equal(2, processor.GetTargetUrlPatterns("//*[@id=\"1111\"]").Count);
            Assert.Equal(@"&page=[0-1]+&", processor.GetTargetUrlPatterns("//*[@id=\"1111\"]")[0].ToString());
            Assert.Equal(@"&page=[0-9]+&", processor.GetTargetUrlPatterns("//*[@id=\"1111\"]")[1].ToString());

            Assert.True(processor.GetTargetUrlPatterns("//*[@id=\"3333\"]") == null);
        }
Ejemplo n.º 6
0
        public void SetNotExistColumnToUnique()
        {
            var exception = Assert.Throws <SpiderException>(() =>
            {
                var entityMetadata    = EntitySpider.ParseEntityMetaData(typeof(Entity3).GetTypeInfo());
                TestPipeline pipeline = new TestPipeline();
                pipeline.InitiEntity(entityMetadata);
            });

            Assert.Equal("Columns set as unique is not a property of your entity.", exception.Message);
        }
Ejemplo n.º 7
0
        public void Indexes()
        {
            var entity1 = EntitySpider.ParseEntityMetaData(typeof(Entity10).GetTypeInfo());

            Assert.Equal("Id", entity1.Indexes[0][0]);
            Assert.Equal("name", entity1.Primary[0]);
            Assert.Equal(2, entity1.Uniques.Count);
            Assert.Equal("Id", entity1.Uniques[0][0]);
            Assert.Equal("Name", entity1.Uniques[0][1]);
            Assert.Equal("Id", entity1.Uniques[1][0]);
        }
Ejemplo n.º 8
0
        public void Schema()
        {
            var entityMetadata = EntitySpider.ParseEntityMetaData(typeof(Entity4).GetTypeInfo());

            Assert.Equal("db", entityMetadata.Schema.Database);
            Assert.Equal("table", entityMetadata.Schema.TableName);
            Assert.Equal(TableSuffix.Monday, entityMetadata.Schema.Suffix);

            var entityMetadata1 = EntitySpider.ParseEntityMetaData(typeof(Entity13).GetTypeInfo());

            Assert.Null(entityMetadata1.Schema);
        }
Ejemplo n.º 9
0
 public void TargetUrlsSelector_Null()
 {
     try
     {
         var entity2    = EntitySpider.ParseEntityMetaData(typeof(Entity15).GetTypeInfo());
         var processor2 = new EntityProcessor(new Site(), entity2);
     }
     catch (Exception e)
     {
         Assert.AreEqual("Region xpath and patterns should not be null both.", e.Message);
         return;
     }
     throw new Exception("Failed.");
 }
Ejemplo n.º 10
0
 public void TargetUrlsSelector_NullRegion_NullPattern()
 {
     try
     {
         var entity2    = EntitySpider.ParseEntityMetaData(typeof(Entity24).GetTypeInfo());
         var processor2 = new EntityProcessor(new Site(), entity2);
     }
     catch (ArgumentNullException e)
     {
         Assert.IsNotNull(e);
         return;
     }
     throw new Exception("Failed.");
 }
Ejemplo n.º 11
0
 public void SetNotExistColumnToUnique()
 {
     try
     {
         var          entityMetadata = EntitySpider.ParseEntityMetaData(typeof(Entity3).GetTypeInfo());
         TestPipeline pipeline       = new TestPipeline();
         pipeline.InitiEntity(entityMetadata);
         throw new Exception("Test failed");
     }
     catch (SpiderException exception)
     {
         Assert.AreEqual("Columns set as unique is not a property of your entity.", exception.Message);
     }
 }
Ejemplo n.º 12
0
 public void TargetUrlsSelector_NullRegion_NullPattern()
 {
     try
     {
         var entity2    = EntitySpider.ParseEntityMetaData(typeof(Entity24).GetTypeInfo());
         var processor2 = new EntityProcessor(new Site(), entity2);
     }
     catch (Exception e)
     {
         Assert.Equal("值不能为 null。\r\n参数名: pattern", e.Message);
         return;
     }
     throw new Exception("Failed.");
 }
Ejemplo n.º 13
0
        public void UpdateConnectString()
        {
            ClearDb();

            using (MySqlConnection conn = new MySqlConnection("Database='mysql';Data Source=127.0.0.1;User ID=root;Password=1qazZAQ!;Port=3306"))
            {
                conn.Execute("CREATE DATABASE IF NOT EXISTS `dotnetspider1` DEFAULT CHARACTER SET utf8;");
                conn.Execute("CREATE TABLE IF NOT EXISTS `dotnetspider1`.`settings` (`id` int(11) NOT NULL AUTO_INCREMENT,`type` varchar(45) NOT NULL,`key` varchar(45) DEFAULT NULL,`value` text,PRIMARY KEY(`id`),UNIQUE KEY `UNIQUE` (`key`,`type`)) ENGINE=InnoDB AUTO_INCREMENT = 1 DEFAULT CHARSET = utf8;");
                try
                {
                    conn.Execute("INSERT `dotnetspider1`.`settings` (`value`,`type`,`key`) VALUES (\"Database='mysql';Data Source=127.0.0.1;User ID=root;Password=1qazZAQ!;Port=3306\",'ConnectString','MySql01')");
                }
                catch (Exception)
                {
                    // ignored
                }
                ISpider spider = new DefaultSpider("test", new Site());

                MySqlEntityPipeline insertPipeline = new MySqlEntityPipeline
                {
                    UpdateConnectString = new DbUpdateConnectString
                    {
                        ConnectString = "Database='mysql';Data Source=127.0.0.1;User ID=root;Password=1qazZAQ!;Port=3306",
                        QueryString   = "SELECT value from `dotnetspider1`.`settings` where `type`='ConnectString' and `key`='MySql01' LIMIT 1"
                    }
                };
                insertPipeline.InitiEntity(EntitySpider.ParseEntityMetaData(typeof(Product).GetTypeInfo()));
                insertPipeline.InitPipeline(spider);

                JObject data1 = new JObject {
                    { "sku", "110" }, { "category", "3C" }, { "url", "http://jd.com/110" }, { "cdate", "2016-08-13" }
                };
                JObject data2 = new JObject {
                    { "sku", "111" }, { "category", "3C" }, { "url", "http://jd.com/111" }, { "cdate", "2016-08-13" }
                };

                insertPipeline.Process(new List <JObject> {
                    data1, data2
                });

                var list = conn.Query <Product>($"select * from test.sku_{DateTime.Now.ToString("yyyy_MM_dd")}").ToList();
                Assert.Equal(2, list.Count);
                Assert.Equal("110", list[0].Sku);
                Assert.Equal("111", list[1].Sku);
                conn.Execute("DROP DATABASE IF EXISTS `dotnetspider1`");
            }

            ClearDb();
        }
        public void UpdateConnectString()
        {
            ClearDb();

            using (MySqlConnection conn = new MySqlConnection("Database='mysql';Data Source=127.0.0.1;User ID=root;Password=1qazZAQ!;Port=3306"))
            {
                conn.Execute(Settings.MySqlDatabase);
                conn.Execute(Settings.MySqlSettingTable);
                try
                {
                    conn.Execute("INSERT `dotnetspider`.`settings` (`value`,`type`,`key`) VALUES (\"Database='mysql';Data Source=127.0.0.1;User ID=root;Password=1qazZAQ!;Port=3306\",'ConnectString','MySql01')");
                }
                catch (Exception)
                {
                }
                ISpider spider = new DefaultSpider("test", new Site());

                MySqlEntityPipeline insertPipeline = new MySqlEntityPipeline
                {
                    UpdateConnectString = new DbUpdateConnectString
                    {
                        ConnectString = "Database='mysql';Data Source=127.0.0.1;User ID=root;Password=1qazZAQ!;Port=3306",
                        QueryString   = "SELECT value from `dotnetspider`.`settings` where `type`='ConnectString' and `key`='MySql01' LIMIT 1"
                    }
                };
                insertPipeline.InitiEntity(EntitySpider.ParseEntityMetaData(typeof(Product).GetTypeInfo()));
                insertPipeline.InitPipeline(spider);

                JObject data1 = new JObject {
                    { "sku", "110" }, { "category", "3C" }, { "url", "http://jd.com/110" }, { "cdate", "2016-08-13" }
                };
                JObject data2 = new JObject {
                    { "sku", "111" }, { "category", "3C" }, { "url", "http://jd.com/111" }, { "cdate", "2016-08-13" }
                };
                insertPipeline.Process(new List <JObject> {
                    data1, data2
                });

                var list = conn.Query <Product>($"select * from test.sku_{DateTime.Now.ToString("yyyy_MM_dd")}").ToList();
                Assert.Equal(2, list.Count);
                Assert.Equal("110", list[0].Sku);
                Assert.Equal("111", list[1].Sku);
                conn.Execute(Settings.DropMySqlDatabase);
            }

            ClearDb();
        }
Ejemplo n.º 15
0
        public void DefineUpdateEntity()
        {
            MsSqlEntityPipeline insertPipeline = new MsSqlEntityPipeline(ConnectString, PipelineMode.Update);

            try
            {
                insertPipeline.InitiEntity(EntitySpider.ParseEntityMetaData(typeof(UpdateEntity1).GetTypeInfo()));
                throw new SpiderException("TEST FAILED.");
            }
            catch (SpiderException e)
            {
                Assert.Equal("Columns set as primary is not a property of your entity.", e.Message);
            }

            try
            {
                insertPipeline.InitiEntity(EntitySpider.ParseEntityMetaData(typeof(UpdateEntity2).GetTypeInfo()));
                throw new SpiderException("TEST FAILED.");
            }
            catch (SpiderException e)
            {
                Assert.Equal("Columns set as update is not a property of your entity.", e.Message);
            }

            try
            {
                insertPipeline.InitiEntity(EntitySpider.ParseEntityMetaData(typeof(UpdateEntity3).GetTypeInfo()));
                throw new SpiderException("TEST FAILED.");
            }
            catch (SpiderException e)
            {
                Assert.Equal("There is no column need update.", e.Message);
            }

            insertPipeline.InitiEntity(EntitySpider.ParseEntityMetaData(typeof(UpdateEntity4).GetTypeInfo()));
            Assert.Equal(1, insertPipeline.GetUpdateColumns().Count);
            Assert.Equal("value", insertPipeline.GetUpdateColumns().First().Name);

            MsSqlEntityPipeline insertPipeline2 = new MsSqlEntityPipeline(ConnectString, PipelineMode.Update);

            insertPipeline2.InitiEntity(EntitySpider.ParseEntityMetaData(typeof(UpdateEntity5).GetTypeInfo()));
            Assert.Equal(1, insertPipeline2.GetUpdateColumns().Count);
            Assert.Equal("value", insertPipeline2.GetUpdateColumns().First().Name);
        }
Ejemplo n.º 16
0
        public void EntitySelector()
        {
            var entity1 = EntitySpider.ParseEntityMetaData(typeof(Entity7).GetTypeInfo());

            Assert.Equal("expression", entity1.Entity.Selector.Expression);
            Assert.Equal(SelectorType.XPath, entity1.Entity.Selector.Type);
            Assert.True(entity1.Entity.Multi);

            var entity2 = EntitySpider.ParseEntityMetaData(typeof(Entity8).GetTypeInfo());

            Assert.Equal("expression2", entity2.Entity.Selector.Expression);
            Assert.Equal(SelectorType.Css, entity2.Entity.Selector.Type);
            Assert.True(entity2.Entity.Multi);

            var entity3 = EntitySpider.ParseEntityMetaData(typeof(Entity9).GetTypeInfo());

            Assert.False(entity3.Entity.Multi);
            Assert.Null(entity3.Entity.Selector);
            Assert.Equal("DotnetSpider.Extension.Test.SpiderEntityTest+Entity9", entity3.Entity.Name);
        }
Ejemplo n.º 17
0
        public void UpdateWhenUnionPrimary()
        {
            ClearDb();

            using (SqlConnection conn = new SqlConnection(ConnectString))
            {
                ISpider spider = new DefaultSpider("test", new Site());

                MsSqlEntityPipeline insertPipeline = new MsSqlEntityPipeline(ConnectString);
                insertPipeline.InitiEntity(EntitySpider.ParseEntityMetaData(typeof(Product2).GetTypeInfo()));
                insertPipeline.InitPipeline(spider);

                JObject data1 = new JObject {
                    { "sku", "110" }, { "category1", "4C" }, { "category", "3C" }, { "url", "http://jd.com/110" }, { "cdate", "2016-08-13" }
                };
                JObject data2 = new JObject {
                    { "sku", "111" }, { "category1", "4C" }, { "category", "3C" }, { "url", "http://jd.com/111" }, { "cdate", "2016-08-13" }
                };
                insertPipeline.Process(new List <JObject> {
                    data1, data2
                });

                MsSqlEntityPipeline updatePipeline = new MsSqlEntityPipeline(ConnectString, PipelineMode.Update);
                updatePipeline.InitiEntity(EntitySpider.ParseEntityMetaData(typeof(Product2).GetTypeInfo()));
                updatePipeline.InitPipeline(spider);

                JObject data3 = new JObject {
                    { "sku", "110" }, { "category1", "4C" }, { "category", "AAAA" }, { "url", "http://jd.com/110" }, { "cdate", "2016-08-13" }
                };
                updatePipeline.Process(new List <JObject> {
                    data3
                });

                var list = conn.Query <Product2>($"use test;select * from sku2_{DateTime.Now.ToString("yyyy_MM_dd")}").ToList();
                Assert.Equal(2, list.Count);
                Assert.Equal("110", list[0].Sku);
                Assert.Equal("AAAA", list[0].Category);
            }

            ClearDb();
        }
Ejemplo n.º 18
0
        public void Extract()
        {
            var             entityMetadata = EntitySpider.ParseEntityMetaData(typeof(Product).GetTypeInfo());
            EntityExtractor extractor      = new EntityExtractor("test", null, entityMetadata);
            var             results        = extractor.Process(new Page(new Request("http://list.jd.com/list.html?cat=9987,653,655&page=2&JL=6_0_0&ms=5#J_main", 1, new Dictionary <string, dynamic>
            {
                { "cat", "手机" },
                { "cat3", "110" }
            }), ContentType.Html)
            {
                Content = File.ReadAllText(Path.Combine(SpiderEnviroment.BaseDirectory, "Jd.html"))
            });

            Assert.Equal(60, results.Count);
            Assert.Equal("手机", results[0].GetValue("CategoryName"));
            Assert.Equal("110", results[0].GetValue("CategoryId"));
            Assert.Equal("http://item.jd.com/3031737.html", results[0].GetValue("Url"));
            Assert.Equal("3031737", results[0].GetValue("Sku"));
            Assert.Equal("荣耀官方旗舰店", results[0].GetValue("ShopName"));
            Assert.Equal("荣耀 NOTE 8 4GB+32GB 全网通版 冰河银", results[0].GetValue("Name"));
            Assert.Equal("1000000904", results[0].GetValue("VenderId"));
            Assert.Equal("1000000904", results[0].GetValue("JdzyShopId"));
            Assert.Equal(DateTime.Now.ToString("yyyy-MM-dd"), results[0].GetValue("RunId"));
        }
Ejemplo n.º 19
0
        public void Insert()
        {
            ClearDb();

            using (SqlConnection conn = new SqlConnection(ConnectString))
            {
                ISpider spider = new DefaultSpider("test", new Site());

                MsSqlEntityPipeline insertPipeline = new MsSqlEntityPipeline(ConnectString);
                insertPipeline.InitiEntity(EntitySpider.ParseEntityMetaData(typeof(Product).GetTypeInfo()));
                insertPipeline.InitPipeline(spider);

                // Common data
                JObject data1 = new JObject {
                    { "sku", "110" }, { "category", "3C" }, { "url", "http://jd.com/110" }, { "cdate", "2016-08-13" }
                };
                JObject data2 = new JObject {
                    { "sku", "111" }, { "category", "3C" }, { "url", "http://jd.com/111" }, { "cdate", "2016-08-13" }
                };
                // Value is null
                JObject data3 = new JObject {
                    { "sku", "112" }, { "category", null }, { "url", "http://jd.com/111" }, { "cdate", "2016-08-13" }
                };
                insertPipeline.Process(new List <JObject> {
                    data1, data2, data3
                });

                var list = conn.Query <Product>($"use test;select * from sku_{DateTime.Now.ToString("yyyy_MM_dd")}").ToList();
                Assert.Equal(3, list.Count);
                Assert.Equal("110", list[0].Sku);
                Assert.Equal("111", list[1].Sku);
                Assert.Equal(null, list[2].Category);
            }

            ClearDb();
        }