public void Insert()
        {
            ClearDb();

            using (SqlConnection conn = new SqlConnection(ConnectString))
            {
                ISpider spider = new DefaultSpider("test", new Site());

                MsSqlEntityPipeline insertPipeline = new MsSqlEntityPipeline(ConnectString);
                insertPipeline.InitiEntity(EntitySpider.ParseEntityMetaData(typeof(Product).GetTypeInfo()));
                insertPipeline.InitPipeline(spider);

                // Common data
                JObject data1 = new JObject {
                    { "sku", "110" }, { "category", "3C" }, { "url", "http://jd.com/110" }, { "cdate", "2016-08-13" }
                };
                JObject data2 = new JObject {
                    { "sku", "111" }, { "category", "3C" }, { "url", "http://jd.com/111" }, { "cdate", "2016-08-13" }
                };
                // Value is null
                JObject data3 = new JObject {
                    { "sku", "112" }, { "category", null }, { "url", "http://jd.com/111" }, { "cdate", "2016-08-13" }
                };
                insertPipeline.Process(new List <JObject> {
                    data1, data2, data3
                });

                var list = conn.Query <Product>($"use test;select * from sku_{DateTime.Now.ToString("yyyy_MM_dd")}").ToList();
                Assert.AreEqual(3, list.Count);
                Assert.AreEqual("110", list[0].Sku);
                Assert.AreEqual("111", list[1].Sku);
                Assert.AreEqual(null, list[2].Category);
            }

            ClearDb();
        }
예제 #2
0
        public void PushAndPollBreadthFirst()
        {
            QueueDuplicateRemovedScheduler scheduler = new QueueDuplicateRemovedScheduler();

            scheduler.DepthFirst = false;
            ISpider spider = new DefaultSpider("test", new Site());

            scheduler.Init(spider);

            scheduler.Push(new Request("http://www.a.com", 1, null));
            scheduler.Push(new Request("http://www.a.com", 1, null));
            scheduler.Push(new Request("http://www.a.com", 1, null));
            scheduler.Push(new Request("http://www.b.com", 1, null));

            var request = scheduler.Poll();

            Assert.Equal(request.Url.ToString(), "http://www.a.com/");

            long left  = scheduler.GetLeftRequestsCount();
            long total = scheduler.GetTotalRequestsCount();

            Assert.Equal(left, 1);
            Assert.Equal(total, 2);
        }
        public void Load()
        {
            QueueDuplicateRemovedScheduler scheduler = new QueueDuplicateRemovedScheduler();
            ISpider spider = new DefaultSpider("test");


            scheduler.Push(new Request("http://www.a.com/", null));
            scheduler.Push(new Request("http://www.b.com/", null));
            scheduler.Push(new Request("http://www.c.com/", null));
            scheduler.Push(new Request("http://www.d.com/", null));

            Extension.Scheduler.RedisScheduler redisScheduler = GetRedisScheduler(spider.Identity);

            redisScheduler.Dispose();

            redisScheduler.Reload(scheduler.All);

            Assert.Equal("http://www.d.com/", redisScheduler.Poll().Url.ToString());
            Assert.Equal("http://www.c.com/", redisScheduler.Poll().Url.ToString());
            Assert.Equal("http://www.b.com/", redisScheduler.Poll().Url.ToString());
            Assert.Equal("http://www.a.com/", redisScheduler.Poll().Url.ToString());

            redisScheduler.Dispose();
        }
예제 #4
0
        public void Clear()
        {
            Extension.Scheduler.RedisScheduler scheduler = GetRedisScheduler();

            ISpider spider = new DefaultSpider();

            scheduler.Init(spider);
            scheduler.Dispose();
            Request request1 = new Request("http://www.ibm.com/1", null);
            Request request2 = new Request("http://www.ibm.com/2", null);
            Request request3 = new Request("http://www.ibm.com/3", null);
            Request request4 = new Request("http://www.ibm.com/4", null);

            scheduler.Push(request1);
            scheduler.Push(request2);
            scheduler.Push(request3);
            scheduler.Push(request4);

            Request result = scheduler.Poll();

            Assert.AreEqual("http://www.ibm.com/4", result.Url.ToString());

            scheduler.Dispose();
        }
예제 #5
0
        public void PushAndPollBreadthFirst()
        {
            QueueDuplicateRemovedScheduler scheduler = new QueueDuplicateRemovedScheduler();

            scheduler.DepthFirst = false;
            ISpider spider = new DefaultSpider("test", new Site());

            scheduler.Init(spider);

            scheduler.Push(new Request("http://www.a.com", null)
            {
                Site = spider.Site
            });
            scheduler.Push(new Request("http://www.a.com", null)
            {
                Site = spider.Site
            });
            scheduler.Push(new Request("http://www.a.com", null)
            {
                Site = spider.Site
            });
            scheduler.Push(new Request("http://www.b.com", null)
            {
                Site = spider.Site
            });

            var request = scheduler.Poll();

            Assert.Equal("http://www.a.com/", request.Url.ToString());

            long left  = scheduler.LeftRequestsCount;
            long total = scheduler.TotalRequestsCount;

            Assert.Equal(1, left);
            Assert.Equal(2, total);
        }
예제 #6
0
        public void InsertAndIgnoreDuplicate()
        {
#if NETSTANDARD
            if (!RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
            {
                return;
            }
#endif
            ClearDb();

            ISpider spider = new DefaultSpider("test", new Site());

            CassandraEntityPipeline insertPipeline = new CassandraEntityPipeline(connectString);
            var metadata = new EntityDefine <ProductInsert>();
            insertPipeline.AddEntity(metadata);
            insertPipeline.Init();

            // Common data
            var data1 = new ProductInsert {
                Sku = "110", Category = "3C", Url = "http://jd.com/110", CDate = new DateTime(2016, 8, 13)
            };
            var data2 = new ProductInsert {
                Sku = "111", Category = "3C", Url = "http://jd.com/111", CDate = new DateTime(2016, 8, 13)
            };
            var data3 = new ProductInsert {
                Sku = "112", Category = null, Url = "http://jd.com/111", CDate = new DateTime(2016, 8, 13)
            };
            // Value is null
            insertPipeline.Process(metadata.Name, new List <dynamic> {
                data1, data2, data3
            }, spider);

            var cluster = CassandraUtil.CreateCluster(connectString);

            var session = cluster.Connect();
            session.ChangeKeyspace("test");
            var rows    = session.Execute($"SELECT * FROM test.sku_cassandra_{DateTime.Now.ToString("yyyy_MM_dd")}").GetRows().ToList();
            var results = new List <ProductInsert>();
            foreach (var row in rows)
            {
                results.Add(new ProductInsert
                {
                    Sku      = row.GetValue <string>("sku"),
                    Category = row.GetValue <string>("category"),
                    Id       = row.GetValue <Guid>("id")
                });
            }
            insertPipeline.DefaultPipelineModel = PipelineMode.InsertAndIgnoreDuplicate;
            var sku   = results.First().Sku;
            var data4 = new ProductInsert {
                Id = results.First().Id, Sku = "113", Category = "asdfasf", Url = "http://jd.com/111", CDate = new DateTime(2016, 8, 13)
            };
            insertPipeline.Process(metadata.Name, new List <dynamic> {
                data4
            }, spider);

            rows    = session.Execute($"SELECT * FROM test.sku_cassandra_{DateTime.Now.ToString("yyyy_MM_dd")}").GetRows().ToList();
            results = new List <ProductInsert>();
            foreach (var row in rows)
            {
                results.Add(new ProductInsert
                {
                    Sku      = row.GetValue <string>("sku"),
                    Category = row.GetValue <string>("category")
                });
            }
            Assert.Equal(3, results.Count);
            Assert.DoesNotContain(results, r => r.Sku == sku);

            Assert.Contains(results, r => r.Sku == "113");
            Assert.Contains(results, r => r.Category == "asdfasf");

            ClearDb();
        }
        public virtual void DataTypes()
        {
            using (MySqlConnection conn = new MySqlConnection("Database='mysql';Data Source=localhost;User ID=root;Password=;Port=3306;SslMode=None;"))
            {
                try
                {
                    conn.Execute("use test;  drop table table15;");
                }
                catch
                {
                }

                var spider = new DefaultSpider();

                EntityProcessor <Entity15> processor = new EntityProcessor <Entity15>();

                var pipeline    = new MySqlEntityPipeline("Database='mysql';Data Source=localhost;User ID=root;Password=;Port=3306;SslMode=None;");
                var resultItems = new ResultItems();
                resultItems.Request = new Request();
                resultItems.AddOrUpdateResultItem(processor.Model.Identity,
                                                  new Tuple <IModel, IList <dynamic> >(processor.Model, new[] {
                    new Dictionary <string, dynamic>
                    {
                        { "int", "1" },
                        { "bool", "1" },
                        { "bigint", "11" },
                        { "string", "aaa" },
                        { "time", "2018-06-12" },
                        { "float", "1" },
                        { "double", "1" },
                        { "string1", "abc" },
                        { "string2", "abcdd" },
                        { "decimal", "1" }
                    }
                }));
                pipeline.Process(new ResultItems[] { resultItems }, spider.Logger, spider);

                var columns = conn.Query <ColumnInfo>("SELECT COLUMN_NAME as `Name`, COLUMN_TYPE as `Type` FROM information_schema.columns WHERE table_name='table15' AND table_schema = 'test';").ToList();;
                Assert.Equal(12, columns.Count);

                Assert.Equal("int".ToLower(), columns[0].Name);
                Assert.Equal("bool".ToLower(), columns[1].Name);
                Assert.Equal("bigint".ToLower(), columns[2].Name);
                Assert.Equal("string".ToLower(), columns[3].Name);
                Assert.Equal("time".ToLower(), columns[4].Name);
                Assert.Equal("float".ToLower(), columns[5].Name);
                Assert.Equal("double".ToLower(), columns[6].Name);
                Assert.Equal("string1".ToLower(), columns[7].Name);
                Assert.Equal("string2".ToLower(), columns[8].Name);
                Assert.Equal("decimal".ToLower(), columns[9].Name);
                Assert.Equal("creation_time".ToLower(), columns[10].Name);
                Assert.Equal("creation_date".ToLower(), columns[11].Name);


                Assert.Equal("int(11)", columns[0].Type);
                Assert.Equal("tinyint(1)", columns[1].Type);
                Assert.Equal("bigint(20)", columns[2].Type);
                Assert.Equal("varchar(255)", columns[3].Type);
                Assert.Equal("timestamp", columns[4].Type);
                Assert.Equal("float", columns[5].Type);
                Assert.Equal("double", columns[6].Type);
                Assert.Equal("varchar(100)", columns[7].Type);
                Assert.Equal("longtext", columns[8].Type);
                Assert.Equal("decimal(18,2)", columns[9].Type);
                Assert.Equal("timestamp", columns[10].Type);
                Assert.Equal("date", columns[11].Type);


                try
                {
                    conn.Execute("use test;  drop table table15;");
                }
                catch
                {
                }
            }
        }
예제 #8
0
        public void Status()
        {
            Extension.Scheduler.RedisScheduler scheduler = GetRedisScheduler();
            ISpider spider = new DefaultSpider("test", new Site());

            scheduler.Init(spider);

            scheduler.Dispose();

            scheduler.Push(new Request("http://www.a.com/", null)
            {
                Site = spider.Site
            });
            scheduler.Push(new Request("http://www.b.com/", null)
            {
                Site = spider.Site
            });
            scheduler.Push(new Request("http://www.c.com/", null)
            {
                Site = spider.Site
            });
            scheduler.Push(new Request("http://www.d.com/", null)
            {
                Site = spider.Site
            });

            Assert.Equal(0, scheduler.ErrorRequestsCount);
            Assert.Equal(4, scheduler.LeftRequestsCount);
            Assert.Equal(4, scheduler.TotalRequestsCount);
            scheduler.IncreaseErrorCount();
            Assert.Equal(1, scheduler.ErrorRequestsCount);
            Assert.Equal(0, scheduler.SuccessRequestsCount);
            scheduler.IncreaseSuccessCount();
            Assert.Equal(1, scheduler.SuccessRequestsCount);

            scheduler.Poll();
            Assert.Equal(3, scheduler.LeftRequestsCount);
            Assert.Equal(1, scheduler.SuccessRequestsCount);
            Assert.Equal(1, scheduler.ErrorRequestsCount);
            Assert.Equal(4, scheduler.TotalRequestsCount);

            scheduler.Poll();
            Assert.Equal(2, scheduler.LeftRequestsCount);
            Assert.Equal(1, scheduler.SuccessRequestsCount);
            Assert.Equal(1, scheduler.ErrorRequestsCount);
            Assert.Equal(4, scheduler.TotalRequestsCount);

            scheduler.Poll();
            Assert.Equal(1, scheduler.LeftRequestsCount);
            Assert.Equal(1, scheduler.SuccessRequestsCount);
            Assert.Equal(1, scheduler.ErrorRequestsCount);
            Assert.Equal(4, scheduler.TotalRequestsCount);

            scheduler.Poll();
            Assert.Equal(0, scheduler.LeftRequestsCount);
            Assert.Equal(1, scheduler.SuccessRequestsCount);
            Assert.Equal(1, scheduler.ErrorRequestsCount);
            Assert.Equal(4, scheduler.TotalRequestsCount);

            scheduler.Poll();
            scheduler.Poll();
            Assert.Equal(0, scheduler.LeftRequestsCount);
            Assert.Equal(1, scheduler.SuccessRequestsCount);
            Assert.Equal(1, scheduler.ErrorRequestsCount);
            Assert.Equal(4, scheduler.TotalRequestsCount);

            scheduler.Dispose();
        }
        public void SubContentHandler()
        {
            var            spider      = new DefaultSpider("test", new Site());
            TestDownloader downloader1 = new TestDownloader();

            downloader1.AddAfterDownloadCompleteHandler(new SubContentHandler
            {
                Start = "a",
                End   = "c"
            });
            var  request1 = new Request("http://a.com/", null);
            Page page     = downloader1.Download(request1, spider);

            Assert.AreEqual("aabbc", page.Content);

            downloader1 = new TestDownloader();
            downloader1.AddAfterDownloadCompleteHandler(new SubContentHandler
            {
                Start     = "a",
                End       = "c",
                EndOffset = 1
            });

            page = downloader1.Download(request1, spider);
            Assert.AreEqual("aabb", page.Content);

            downloader1 = new TestDownloader();
            downloader1.AddAfterDownloadCompleteHandler(new SubContentHandler
            {
                Start       = "a",
                End         = "c",
                StartOffset = 1
            });
            page = downloader1.Download(request1, spider);
            Assert.AreEqual("abbc", page.Content);

            downloader1 = new TestDownloader();
            downloader1.AddAfterDownloadCompleteHandler(new SubContentHandler
            {
                Start       = "a",
                End         = "c",
                StartOffset = 1,
                EndOffset   = 1
            });

            page = downloader1.Download(request1, spider);
            Assert.AreEqual("abb", page.Content);

            downloader1 = new TestDownloader();
            downloader1.AddAfterDownloadCompleteHandler(new SubContentHandler
            {
                Start       = "a",
                End         = "c",
                StartOffset = 10
            });

            var downloader2 = downloader1;

            try
            {
                page = downloader2.Download(request1, spider);
                throw new System.Exception("test failed.");
            }
            catch (SpiderException exception)
            {
                Assert.AreEqual("Sub content failed. Please check your settings.", exception.Message);
            }

            downloader1 = new TestDownloader();
            downloader1.AddAfterDownloadCompleteHandler(new SubContentHandler
            {
                Start     = "a",
                End       = "c",
                EndOffset = 20
            });

            try
            {
                page = downloader1.Download(request1, spider);
                throw new System.Exception("test failed.");
            }
            catch (SpiderException exception)
            {
                Assert.AreEqual("Sub content failed. Please check your settings.", exception.Message);
            }
        }
        public void SubContentHandler()
        {
            var            spider      = new DefaultSpider("test", new Site());
            TestDownloader downloader1 = new TestDownloader()
            {
                DownloadCompleteHandlers = new IDownloadCompleteHandler[]
                {
                    new SubContentHandler
                    {
                        Start = "a",
                        End   = "c"
                    }
                }
            };
            var  request1 = new Request("http://a.com/", 0, null);
            Page page     = downloader1.Download(request1, spider);

            Assert.Equal("aabbc", page.Content);

            downloader1 = new TestDownloader
            {
                DownloadCompleteHandlers = new IDownloadCompleteHandler[]
                {
                    new SubContentHandler
                    {
                        Start     = "a",
                        End       = "c",
                        EndOffset = 1
                    }
                }
            };

            page = downloader1.Download(request1, spider);
            Assert.Equal("aabb", page.Content);

            downloader1 = new TestDownloader
            {
                DownloadCompleteHandlers = new IDownloadCompleteHandler[]
                {
                    new SubContentHandler
                    {
                        Start       = "a",
                        End         = "c",
                        StartOffset = 1
                    }
                }
            };

            page = downloader1.Download(request1, spider);
            Assert.Equal("abbc", page.Content);

            downloader1 = new TestDownloader
            {
                DownloadCompleteHandlers = new IDownloadCompleteHandler[]
                {
                    new SubContentHandler
                    {
                        Start       = "a",
                        End         = "c",
                        StartOffset = 1,
                        EndOffset   = 1
                    }
                }
            };

            page = downloader1.Download(request1, spider);
            Assert.Equal("abb", page.Content);

            downloader1 = new TestDownloader
            {
                DownloadCompleteHandlers = new IDownloadCompleteHandler[]
                {
                    new SubContentHandler
                    {
                        Start       = "a",
                        End         = "c",
                        StartOffset = 10
                    }
                }
            };

            var downloader2 = downloader1;
            var exception   = Assert.Throws <SpiderException>(() =>
            {
                page = downloader2.Download(request1, spider);
            });

            Assert.Equal("Sub content failed. Please check your settings.", exception.Message);

            downloader1 = new TestDownloader
            {
                DownloadCompleteHandlers = new IDownloadCompleteHandler[]
                {
                    new SubContentHandler
                    {
                        Start     = "a",
                        End       = "c",
                        EndOffset = 20
                    }
                }
            };

            exception = Assert.Throws <SpiderException>(() =>
            {
                page = downloader1.Download(request1, spider);
            });
            Assert.Equal("Sub content failed. Please check your settings.", exception.Message);
        }
예제 #11
0
        public override void DataTypes()
        {
            if (!Env.IsWindows)
            {
                return;
            }
            using (var conn = new SqlConnection("Server=.\\SQLEXPRESS;Database=master;Trusted_Connection=True;MultipleActiveResultSets=true"))
            {
                try
                {
                    conn.Execute("create database test;");
                }
                catch
                {
                }
                try
                {
                    conn.Execute("USE [test]; drop table [test].dbo.[table15]");
                }
                catch
                {
                }


                var spider = new DefaultSpider();

                EntityProcessor <Entity15> processor = new EntityProcessor <Entity15>();

                var pipeline    = new SqlServerEntityPipeline("Server=.\\SQLEXPRESS;Database=master;Trusted_Connection=True;MultipleActiveResultSets=true");
                var resultItems = new ResultItems();
                resultItems.Request = new Request();
                resultItems.AddOrUpdateResultItem(processor.Model.Identity, new Tuple <IModel, IEnumerable <dynamic> >(processor.Model, new dynamic[] {
                    new Dictionary <string, dynamic>
                    {
                        { "int", "1" },
                        { "bool", "1" },
                        { "bigint", "11" },
                        { "string", "aaa" },
                        { "time", "2018-06-12" },
                        { "float", "1" },
                        { "double", "1" },
                        { "string1", "abc" },
                        { "string2", "abcdd" },
                        { "decimal", "1" }
                    }
                }));
                pipeline.Process(new ResultItems[] { resultItems }, spider);

                var columns = conn.Query <ColumnInfo>("USE [test];select  b.name Name,c.name+'(' + cast(c.length as varchar)+')' [Type] from sysobjects a,syscolumns b,systypes c where a.id=b.id and a.name='table15' and a.xtype='U'and b.xtype=c.xtype").ToList();
                Assert.Equal(15, columns.Count);

                Assert.Equal("creation_date".ToLower(), columns[0].Name);
                Assert.Equal("int".ToLower(), columns[1].Name);
                Assert.Equal("time".ToLower(), columns[2].Name);
                Assert.Equal("creation_time".ToLower(), columns[3].Name);
                Assert.Equal("float".ToLower(), columns[4].Name);
                Assert.Equal("double".ToLower(), columns[5].Name);
                Assert.Equal("bool".ToLower(), columns[6].Name);
                Assert.Equal("decimal".ToLower(), columns[7].Name);
                Assert.Equal("bigint".ToLower(), columns[8].Name);
                Assert.Equal("string".ToLower(), columns[9].Name);
                Assert.Equal("string1".ToLower(), columns[10].Name);
                Assert.Equal("string2".ToLower(), columns[11].Name);


                Assert.Equal("date(3)", columns[0].Type);
                Assert.Equal("int(4)", columns[1].Type);
                Assert.Equal("datetime(8)", columns[2].Type);
                Assert.Equal("datetime(8)", columns[3].Type);
                Assert.Equal("float(8)", columns[4].Type);
                Assert.Equal("float(8)", columns[5].Type);
                Assert.Equal("bit(1)", columns[6].Type);
                Assert.Equal("decimal(17)", columns[7].Type);
                Assert.Equal("bigint(8)", columns[8].Type);
                Assert.Equal("nvarchar(8000)", columns[9].Type);
                Assert.Equal("nvarchar(8000)", columns[10].Type);
                Assert.Equal("nvarchar(8000)", columns[11].Type);

                conn.Execute("USE [test]; drop table [test].dbo.[table15]");
            }
        }