protected override void MyInit(params string[] arguments)
        {
            Identity              = "HaoBrowser Hao360Spider Buble " + DateTime.Now.ToString("yyyy-MM-dd HHmmss");
            CachedSize            = 1;
            ThreadNum             = 1;
            SkipWhenResultIsEmpty = true;
            var downloader = new HttpClientDownloader();

            downloader.AddAfterDownloadCompleteHandler(new SubContentHandler
            {
                Start       = "sales[\"hotsite_yixing\"] = [",
                End         = "}}",
                StartOffset = 27,
                EndOffset   = 0
            });
            downloader.AddAfterDownloadCompleteHandler(new ReplaceContentHandler
            {
                NewValue = "/",
                OldValue = "\\/",
            });
            Scheduler = new Extension.Scheduler.RedisScheduler("127.0.0.1:6379,serviceName=Scheduler.NET,keepAlive=8,allowAdmin=True,connectTimeout=10000,password=6GS9F2QTkP36GggE0c3XwVwI,abortConnect=True,connectRetry=20");
            AddPipeline(new MySqlEntityPipeline("Database='testhao';Data Source= localhost;User ID=root;Password=root@123456;Port=3306"));
            AddStartUrl("https://hao.360.cn/");
            AddEntityType(typeof(UpdateHao360Info));
        }
示例#2
0
        public static void Main(string[] args)
        {
            var path = "www.baidu.com.cookies";

            if (File.Exists(path))
            {
                File.Delete(path);
            }
            File.WriteAllText(path, "a=b&c=d");

            Spider spider = Spider.Create(new Site {
                EncodingName = "UTF-8", SleepTime = 1000
            }, new TestPageProcessor()).AddPipeline(new TestPipeline());

            spider.ThreadNum = 1;
            var downloader = new HttpClientDownloader();

            downloader.AddAfterDownloadCompleteHandler(new TimerUpdateCookieHandler(5, new FileCookieInject()));
            spider.Downloader = downloader;

            for (int i = 0; i < 10000; i++)
            {
                spider.AddStartUrl("http://www.baidu.com/" + i);
            }
            spider.Run();
        }
        protected override void MyInit(params string[] arguments)
        {
            ThreadNum  = 1;
            Scheduler  = new RedisScheduler("127.0.0.1:6379,serviceName=Scheduler.NET,keepAlive=8,allowAdmin=True,connectTimeout=10000,password=6GS9F2QTkP36GggE0c3XwVwI,abortConnect=True,connectRetry=20");
            Downloader = new HttpClientDownloader();
            Downloader.AddAfterDownloadCompleteHandler(new SubContentHandler
            {
                Start       = "json(",
                End         = ");",
                StartOffset = 5,
                EndOffset   = 0
            });

            PrepareStartUrls = new PrepareStartUrls[]
            {
                new BaseDbPrepareStartUrls()
                {
                    Source         = DataSource.MySql,
                    ConnectString  = "Database='test';Data Source= localhost;User ID=root;Password=1qazZAQ!;Port=3306",
                    QueryString    = $"SELECT * FROM jd.sku_v2_{DateTimeUtils.RunIdOfMonday} WHERE shopname is null or shopid is null order by sku",
                    Columns        = new [] { new DataColumn("sku") },
                    FormateStrings = new List <string> {
                        "http://chat1.jd.com/api/checkChat?my=list&pidList={0}&callback=json"
                    }
                }
            };
            AddPipeline(new MySqlEntityPipeline("Database='taobao';Data Source=localhost ;User ID=root;Password=1qazZAQ!;Port=4306"));
            AddEntityType(typeof(ProductUpdater));
        }
示例#4
0
 protected override void OnInit(params string[] arguments)
 {
     AddRequest($"http://api.search.sina.com.cn/?c=news&t=&q=赵丽颖&pf=2136012948&ps=2130770082&page=0&stime={DateTime.Now.AddYears(-7).AddDays(-1).ToString("yyyy-MM-dd")}&etime={DateTime.Now.AddDays(1).ToString("yyyy-MM-dd")}&sort=rel&highlight=1&num=10&ie=utf-8&callback=jQuery1720001955628746606708_1508996230766&_=1508996681484", new Dictionary <string, dynamic> {
         { "keyword", "赵丽颖" }
     });
     AddPipeline(new ConsoleEntityPipeline());
     Downloader = new HttpClientDownloader();
     Downloader.AddAfterDownloadCompleteHandler(new ReplaceHandler());
     AddEntityType <SinaNews>();
 }
示例#5
0
        protected override void MyInit(params string[] arguments)
        {
            Scheduler = new RedisScheduler();
            var downloader = new HttpClientDownloader();

            downloader.AddAfterDownloadCompleteHandler(new ReplaceContentHandler
            {
                NewValue = "/",
                OldValue = "\\/",
            });
            downloader.AddAfterDownloadCompleteHandler(new IncrementTargetUrlsBuilder("&s=0", 44));
            Downloader            = downloader;
            ThreadNum             = 1;
            SkipWhenResultIsEmpty = true;
            if (!arguments.Contains("noprepare"))
            {
                AddStartUrlBuilder(new DbStartUrlBuilder(Database.MySql, Env.DataConnectionStringSettings.ConnectionString, "SELECT * FROM taobao.result_keywords limit 10000", new[] { "bidwordstr", "tab" }, "https://s.taobao.com/search?q={0}&imgfile=&js=1&stats_click=search_radio_all%3A1&ie=utf8&sort=sale-desc&s=0&tab={1}"));
            }
            AddEntityType(typeof(Item), new MyDataHanlder());
        }
示例#6
0
        protected override void MyInit(params string[] arguments)
        {
            Identity = ("qidian_" + DateTime.Now.ToString("yyyy_MM_dd_HHmmss"));
            var downloader = new HttpClientDownloader();

            downloader.AddAfterDownloadCompleteHandler(new IncrementTargetUrlsBuilder("index_1.shtml"));
            Downloader = downloader;
            ThreadNum  = 1;
            AddStartUrl("http://www.cas.cn/kx/kpwz/index.shtml");
            AddStartUrl("http://www.cas.cn/kx/kpwz/index_1.shtml");
            AddEntityType(typeof(ArticleSummary));
            AddEntityType(typeof(Article));
        }
        protected override void MyInit(params string[] arguments)
        {
            Scheduler = new RedisScheduler(Config.RedisConnectString);
            var downloader = new HttpClientDownloader();

            downloader.AddAfterDownloadCompleteHandler(new ReplaceContentHandler
            {
                NewValue = "/",
                OldValue = "\\/",
            });
            downloader.AddAfterDownloadCompleteHandler(new IncrementTargetUrlsCreator("&s=0", null, 44));
            Downloader            = downloader;
            ThreadNum             = 1;
            SkipWhenResultIsEmpty = true;
            if (!arguments.Contains("noprepare"))
            {
                PrepareStartUrls = new PrepareStartUrls[]
                {
                    new BaseDbPrepareStartUrls
                    {
                        BulkInsert    = true,
                        ConnectString = Config.ConnectString,
                        QueryString   = "SELECT * FROM taobao.result_keywords limit 10000",
                        Columns       = new []
                        {
                            new DataColumn("bidwordstr"),
                            new DataColumn("tab")
                        },
                        FormateStrings = new List <string> {
                            "https://s.taobao.com/search?q={0}&imgfile=&js=1&stats_click=search_radio_all%3A1&ie=utf8&sort=sale-desc&s=0&tab={1}"
                        }
                    }
                };
            }
            AddEntityType(typeof(Item), new MyDataHanlder());
        }
示例#8
0
        protected override void MyInit(params string[] arguments)
        {
            Site.AddStartUrl("http://chat1.jd.com/api/checkChat?my=list&pidList=3355984&callback=json");
            Site.AddStartUrl("http://chat1.jd.com/api/checkChat?my=list&pidList=3682523&callback=json");
            var downloader = new HttpClientDownloader();

            downloader.AddAfterDownloadCompleteHandler(new SubContentHandler
            {
                Start       = "json(",
                End         = ");",
                StartOffset = 5,
                EndOffset   = 2
            });

            AddPipeline(new MySqlEntityPipeline("Database='mysql';Data Source=localhost ;User ID=root;Password=1qazZAQ!;Port=3306"));
            AddEntityType(typeof(ProductUpdater));
        }