Example #1
0
        public void TargetUrlsSelector_1Region_1Pattern()
        {
            var entity1   = EntityDefine.Parse <Entity14>();
            var processor = new EntityProcessor(new Site(), entity1);

            Assert.Single(processor.GetTargetUrlPatterns("//*[@id=\"1111\"]"));
            Assert.Equal(@"&page=[0-9]+&", processor.GetTargetUrlPatterns("//*[@id=\"1111\"]")[0].ToString());
            Assert.True(processor.GetTargetUrlPatterns("//*[@id=\"222\"]") == null);
        }
Example #2
0
        public void Indexes()
        {
            var entity1 = EntityDefine.Parse <Entity10>();

            Assert.Equal("Id", entity1.TableInfo.Indexs[0]);
            Assert.Equal("Name", entity1.TableInfo.Primary);
            Assert.Equal(2, entity1.TableInfo.Uniques.Length);
            Assert.Equal("Id,Name", entity1.TableInfo.Uniques[0]);
            Assert.Equal("Id", entity1.TableInfo.Uniques[1]);
        }
Example #3
0
        public void Formater()
        {
            var entity1    = EntityDefine.Parse <Entity11>();
            var formatters = ((Column)entity1.Columns[0]).Formatters;

            Assert.Equal(2, formatters.Count);
            var replaceFormatter = (ReplaceFormatter)formatters[0];

            Assert.Equal("a", replaceFormatter.NewValue);
            Assert.Equal("b", replaceFormatter.OldValue);
        }
Example #4
0
        public void Schema()
        {
            var entityMetadata = EntityDefine.Parse <Entity4>();

            Assert.Equal("test", entityMetadata.TableInfo.Database);
            Assert.Equal(EntityTable.Monday, entityMetadata.TableInfo.Postfix);

            var entityMetadata1 = EntityDefine.Parse <Entity14>();

            Assert.Null(entityMetadata1.TableInfo);
        }
Example #5
0
        public void SetPrimary()
        {
            var entity1 = EntityDefine.Parse <Entity5>();

            Assert.Single(entity1.Columns);
            Assert.Equal("Name", entity1.Columns[0].Name);
            var entity2 = EntityDefine.Parse <Entity6>();

            Assert.Single(entity2.Columns);
            Assert.Equal("name", entity2.Columns[0].Name);
        }
Example #6
0
        public void TargetUrlsSelector_Multi_2SameRegion_2Pattern()
        {
            var entity1   = EntityDefine.Parse <Entity20>();
            var processor = new EntityProcessor(new Site(), entity1);

            Assert.Equal(2, processor.GetTargetUrlPatterns("//*[@id=\"1111\"]").Count);
            Assert.Equal(@"&page=[0-1]+&", processor.GetTargetUrlPatterns("//*[@id=\"1111\"]")[0].ToString());
            Assert.Equal(@"&page=[0-9]+&", processor.GetTargetUrlPatterns("//*[@id=\"1111\"]")[1].ToString());

            Assert.True(processor.GetTargetUrlPatterns("//*[@id=\"3333\"]") == null);
        }
Example #7
0
        public StorageCache(ISpider spider)
        {
            _pipeline = BaseEntityDbPipeline.GetPipelineFromAppConfig() as BaseEntityDbPipeline;

            if (_pipeline == null)
            {
                throw new SpiderException("Can not get StorageCache's pipeline.");
            }
            _pipeline.AddEntity(EntityDefine.Parse <CrawlCache>());
            _pipeline.InitPipeline(spider);

            _spider = spider;
        }
Example #8
0
        public void TempEntityNoPrimaryInfo()
        {
            var entityMetadata = EntityDefine.Parse <Entity1>();

            EntityProcessor processor = new EntityProcessor(new Site(), entityMetadata);
            var             page      = new Page(new Request("http://www.abcd.com"))
            {
                Content = "{'data':[{'age':'1'},{'age':'2'}]}"
            };

            processor.Process(page);
            Assert.Equal(2, (page.ResultItems.GetResultItem("DotnetSpider.Extension.Test.EntityExtractorTest+Entity1") as List <DataObject>).Count);
        }
Example #9
0
 public void SetNotExistColumnToUnique()
 {
     try
     {
         var          entityMetadata = EntityDefine.Parse <Entity3>();
         TestPipeline pipeline       = new TestPipeline("");
         pipeline.AddEntity(entityMetadata);
         throw new Exception("Test failed");
     }
     catch (SpiderException exception)
     {
         Assert.Equal("Columns set as unique is not a property of your entity.", exception.Message);
     }
 }
Example #10
0
 public void TargetUrlsSelector_NullRegion_NullPattern()
 {
     try
     {
         var entity2    = EntityDefine.Parse <Entity24>();
         var processor2 = new EntityProcessor(new Site(), entity2);
     }
     catch (ArgumentNullException e)
     {
         Assert.NotNull(e);
         return;
     }
     throw new Exception("Failed.");
 }
Example #11
0
 public void TargetUrlsSelector_Null()
 {
     try
     {
         var entity2    = EntityDefine.Parse <Entity15>();
         var processor2 = new EntityProcessor(new Site(), entity2);
     }
     catch (Exception e)
     {
         Assert.Equal("Region xpath and patterns should not be null both.", e.Message);
         return;
     }
     throw new Exception("Failed.");
 }
Example #12
0
        public void DefineUpdateEntity()
        {
            if (!RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
            {
                return;
            }
            SqlServerEntityPipeline insertPipeline = new SqlServerEntityPipeline(ConnectString);

            try
            {
                insertPipeline.AddEntity(EntityDefine.Parse <UpdateEntity1>());
                throw new SpiderException("TEST FAILED.");
            }
            catch (SpiderException e)
            {
                Assert.Equal("Columns set as primary is not a property of your entity.", e.Message);
            }

            try
            {
                insertPipeline.AddEntity(EntityDefine.Parse <UpdateEntity2>());
                throw new SpiderException("TEST FAILED.");
            }
            catch (SpiderException e)
            {
                Assert.Equal("Columns set as update is not a property of your entity.", e.Message);
            }

            try
            {
                insertPipeline.AddEntity(EntityDefine.Parse <UpdateEntity3>());
                throw new SpiderException("TEST FAILED.");
            }
            catch (SpiderException e)
            {
                Assert.Equal("There is no column need update.", e.Message);
            }
            var metadata = EntityDefine.Parse <UpdateEntity4>();

            insertPipeline.AddEntity(metadata);
            Assert.Single(insertPipeline.GetUpdateColumns(metadata.Name));
            Assert.Equal("Value", insertPipeline.GetUpdateColumns(metadata.Name).First());

            SqlServerEntityPipeline insertPipeline2 = new SqlServerEntityPipeline(ConnectString);
            var metadata2 = EntityDefine.Parse <UpdateEntity5>();

            insertPipeline2.AddEntity(metadata2);
            Assert.Single(insertPipeline2.GetUpdateColumns(metadata2.Name));
            Assert.Equal("Value", insertPipeline2.GetUpdateColumns(metadata2.Name).First());
        }
Example #13
0
        public void UpdateConnectString()
        {
            ClearDb();

            using (MySqlConnection conn = new MySqlConnection(ConnectString))
            {
                conn.Execute("CREATE DATABASE IF NOT EXISTS `dotnetspider1` DEFAULT CHARACTER SET utf8;");
                conn.Execute("CREATE TABLE IF NOT EXISTS `dotnetspider1`.`settings` (`id` int(11) NOT NULL AUTO_INCREMENT,`type` varchar(45) NOT NULL,`key` varchar(45) DEFAULT NULL,`value` text,PRIMARY KEY(`id`),UNIQUE KEY `UNIQUE` (`key`,`type`)) AUTO_INCREMENT = 1");
                try
                {
                    conn.Execute("INSERT `dotnetspider1`.`settings` (`value`,`type`,`key`) VALUES (\"Database='mysql';Data Source=127.0.0.1;User ID=root;Password=;Port=3306\",'ConnectString','MySql01')");
                }
                catch (Exception)
                {
                    // ignored
                }
                ISpider spider = new DefaultSpider("test", new Site());

                MySqlEntityPipeline insertPipeline = new MySqlEntityPipeline(null)
                {
                    UpdateConnectString = new DbUpdateConnectString
                    {
                        ConnectString = ConnectString,
                        QueryString   = "SELECT value from `dotnetspider1`.`settings` where `type`='ConnectString' and `key`='MySql01' LIMIT 1"
                    }
                };
                var metadata = EntityDefine.Parse <ProductInsert>();
                insertPipeline.AddEntity(metadata);
                insertPipeline.InitPipeline(spider);

                DataObject data1 = new DataObject {
                    { "Sku", "110" }, { "Category", "3C" }, { "Url", "http://jd.com/110" }, { "CDate", "2016-08-13" }
                };
                DataObject data2 = new DataObject {
                    { "Sku", "111" }, { "Category", "3C" }, { "Url", "http://jd.com/111" }, { "CDate", "2016-08-13" }
                };

                insertPipeline.Process(metadata.Name, new List <DataObject> {
                    data1, data2
                });

                var list = conn.Query <ProductInsert>($"select * from test.sku_{DateTime.Now.ToString("yyyy_MM_dd")}").ToList();
                Assert.Equal(2, list.Count);
                Assert.Equal("110", list[0].Sku);
                Assert.Equal("111", list[1].Sku);
                conn.Execute("DROP DATABASE IF EXISTS `dotnetspider1`");
            }

            ClearDb();
        }
Example #14
0
        public void HandlerWhenExtractZeroResult()
        {
            var entityMetadata = EntityDefine.Parse <Product>();
            var identity       = Guid.NewGuid().ToString("N");

            entityMetadata.DataHandler = new MyDataHanlder(identity);
            EntityProcessor processor = new EntityProcessor(new Site(), entityMetadata);

            processor.Process(new Page(new Request("http://www.abcd.com"))
            {
                Content = "{'data':[{'name':'1'},{'name':'2'}]}"
            });
            Assert.True(File.Exists(identity));
            File.Delete(identity);
        }
Example #15
0
        public void Update()
        {
            if (!RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
            {
                return;
            }

            ClearDb();

            using (SqlConnection conn = new SqlConnection(ConnectString))
            {
                ISpider spider = new DefaultSpider("test", new Site());

                SqlServerEntityPipeline insertPipeline = new SqlServerEntityPipeline(ConnectString);
                var metadata = EntityDefine.Parse <ProductInsert>();
                insertPipeline.AddEntity(metadata);
                insertPipeline.InitPipeline(spider);

                DataObject data1 = new DataObject {
                    { "Sku", "110" }, { "Category", "3C" }, { "Url", "http://jd.com/110" }, { "CDate", "2016-08-13" }
                };
                DataObject data2 = new DataObject {
                    { "Sku", "111" }, { "Category", "3C" }, { "Url", "http://jd.com/111" }, { "CDate", "2016-08-13" }
                };
                insertPipeline.Process(metadata.Name, new List <DataObject> {
                    data1, data2
                });

                SqlServerEntityPipeline updatePipeline = new SqlServerEntityPipeline(ConnectString);
                var metadat2 = EntityDefine.Parse <ProductUpdate>();
                updatePipeline.AddEntity(metadat2);
                updatePipeline.InitPipeline(spider);

                DataObject data3 = new DataObject {
                    { "Sku", "110" }, { "Category", "4C" }, { "Url", "http://jd.com/110" }, { "CDate", "2016-08-13" }
                };
                updatePipeline.Process(metadat2.Name, new List <DataObject> {
                    data3
                });

                var list = conn.Query <ProductInsert>($"use test;select * from sku_{DateTime.Now.ToString("yyyy_MM_dd")}").ToList();
                Assert.Equal(2, list.Count);
                Assert.Equal("110", list[0].Sku);
                Assert.Equal("4C", list[0].Category);
            }

            ClearDb();
        }
Example #16
0
        public void UpdateWhenUnionPrimaryCheckIfSameBeforeUpdateUseAppConfig()
        {
            ClearDb();

            using (MySqlConnection conn = new MySqlConnection(ConnectString))
            {
                ISpider spider = new DefaultSpider("test", new Site());

                MySqlEntityPipeline insertPipeline = new MySqlEntityPipeline();
                var metadata = EntityDefine.Parse <Product2Insert>();
                insertPipeline.AddEntity(metadata);
                insertPipeline.InitPipeline(spider);

                DataObject data1 = new DataObject {
                    { "Sku", "110" }, { "Category1", "4C" }, { "Category", "3C" }, { "Url", "http://jd.com/110" }, { "CDate", "2016-08-13" }
                };
                DataObject data2 = new DataObject {
                    { "Sku", "111" }, { "Category1", "4C" }, { "Category", "3C" }, { "Url", "http://jd.com/111" }, { "CDate", "2016-08-13" }
                };
                insertPipeline.Process(metadata.Name, new List <DataObject> {
                    data1, data2
                });

                MySqlEntityPipeline updatePipeline = new MySqlEntityPipeline(null, true);
                var metadata2 = EntityDefine.Parse <Product2Update>();
                updatePipeline.AddEntity(metadata2);
                updatePipeline.InitPipeline(spider);


                DataObject data3 = new DataObject {
                    { "Sku", "110" }, { "Category1", "4C" }, { "Category", "AAAA" }, { "Url", "http://jd.com/110" }, { "CDate", "2016-08-13" }
                };
                updatePipeline.Process(metadata2.Name, new List <DataObject> {
                    data3
                });

                var list = conn.Query <Product2Insert>($"select * from test.sku2_{DateTime.Now.ToString("yyyy_MM_dd")}").ToList();
                Assert.Equal(2, list.Count);
                Assert.Equal("110", list[0].Sku);
                Assert.Equal("AAAA", list[0].Category);
            }

            ClearDb();
        }
Example #17
0
        public void EntitySelector()
        {
            var entity1 = EntityDefine.Parse <Entity7>();

            Assert.Equal("expression", entity1.Selector.Expression);
            Assert.Equal(SelectorType.XPath, entity1.Selector.Type);
            Assert.True(entity1.Multi);

            var entity2 = EntityDefine.Parse <Entity8>();

            Assert.Equal("expression2", entity2.Selector.Expression);
            Assert.Equal(SelectorType.Css, entity2.Selector.Type);
            Assert.True(entity2.Multi);

            var entity3 = EntityDefine.Parse <Entity9>();

            Assert.False(entity3.Multi);
            Assert.Null(entity3.Selector);
            Assert.Equal("DotnetSpider.Extension.Test.EntitySpiderTest2+Entity9", entity3.Name);
        }
Example #18
0
        public EntityDefine AddEntityType(Type type, DataHandler dataHandler, string tableName)
        {
            CheckIfRunning();

            if (typeof(SpiderEntity).IsAssignableFrom(type))
            {
                var entity = EntityDefine.Parse(type.GetTypeInfoCrossPlatform());
                if (entity.TableInfo != null && !string.IsNullOrEmpty(tableName))
                {
                    entity.TableInfo.Name = tableName;
                }
                entity.DataHandler = dataHandler;

                Entities.Add(entity);
                EntityProcessor processor = new EntityProcessor(Site, entity);
                AddPageProcessor(processor);
                return(entity);
            }
            else
            {
                throw new SpiderException($"Type: {type.FullName} is not a SpiderEntity.");
            }
        }
Example #19
0
        public void InsertUseAppConfig()
        {
            ClearDb();

            using (MySqlConnection conn = new MySqlConnection(ConnectString))
            {
                ISpider spider = new DefaultSpider("test", new Site());

                MySqlEntityPipeline insertPipeline = new MySqlEntityPipeline();
                var metadata = EntityDefine.Parse <ProductInsert>();
                insertPipeline.AddEntity(metadata);
                insertPipeline.InitPipeline(spider);

                // Common data
                DataObject data1 = new DataObject {
                    { "Sku", "110" }, { "Category", "3C" }, { "Url", "http://jd.com/110" }, { "CDate", "2016-08-13" }
                };
                DataObject data2 = new DataObject {
                    { "Sku", "111" }, { "Category", "3C" }, { "Url", "http://jd.com/111" }, { "CDate", "2016-08-13" }
                };
                // Value is null
                DataObject data3 = new DataObject {
                    { "Sku", "112" }, { "Category", null }, { "Url", "http://jd.com/111" }, { "CDate", "2016-08-13" }
                };
                insertPipeline.Process(metadata.Name, new List <DataObject> {
                    data1, data2, data3
                });

                var list = conn.Query <ProductInsert>($"select * from test.sku_{DateTime.Now.ToString("yyyy_MM_dd")}").ToList();
                Assert.Equal(3, list.Count);
                Assert.Equal("110", list[0].Sku);
                Assert.Equal("111", list[1].Sku);
                Assert.Null(list[2].Category);
            }

            ClearDb();
        }
Example #20
0
        public void Extract()
        {
            var             entityMetadata = EntityDefine.Parse <Product>();
            EntityExtractor extractor      = new EntityExtractor("test", null, entityMetadata);
            var             results        = extractor.Extract(new Page(new Request("http://list.jd.com/list.html?cat=9987,653,655&page=2&JL=6_0_0&ms=5#J_main", new Dictionary <string, dynamic>
            {
                { "cat", "手机" },
                { "cat3", "110" }
            }), null)
            {
                Content = File.ReadAllText(Path.Combine(Env.BaseDirectory, "Jd.html"))
            });

            Assert.Equal(60, results.Count);
            Assert.Equal("手机", results[0]["CategoryName"]);
            Assert.Equal("110", results[0]["CategoryId"]);
            Assert.Equal("http://item.jd.com/3031737.html", results[0]["Url"]);
            Assert.Equal("3031737", results[0]["Sku"]);
            Assert.Equal("荣耀官方旗舰店", results[0]["ShopName"]);
            Assert.Equal("荣耀 NOTE 8 4GB+32GB 全网通版 冰河银", results[0]["Name"]);
            Assert.Equal("1000000904", results[0]["VenderId"]);
            Assert.Equal("1000000904", results[0]["JdzyShopId"]);
            Assert.Equal(DateTime.Now.ToString("yyyy-MM-dd"), results[0]["RunId"]);
        }