Beispiel #1
0
 protected override void MyInit(params string[] arguments)
 {
     CachedSize = 1;
     ThreadNum  = 8;
     Scheduler  = new RedisScheduler("127.0.0.1:6379,serviceName=Scheduler.NET,keepAlive=8,allowAdmin=True,connectTimeout=10000,password=6GS9F2QTkP36GggE0c3XwVwI,abortConnect=True,connectRetry=20");
     Downloader = new HttpClientDownloader
     {
         DownloadCompleteHandlers = new IDownloadCompleteHandler[]
         {
             new SubContentHandler
             {
                 Start       = "json(",
                 End         = ");",
                 StartOffset = 5,
                 EndOffset   = 0
             }
         }
     };
     PrepareStartUrls = new PrepareStartUrls[]
     {
         new BaseDbPrepareStartUrls()
         {
             Source         = DataSource.MySql,
             ConnectString  = "Database='test';Data Source= localhost;User ID=root;Password=1qazZAQ!;Port=3306",
             QueryString    = $"SELECT * FROM jd.sku_v2_{DateTimeUtils.RunIdOfMonday} WHERE shopname is null or shopid is null order by sku",
             Columns        = new [] { new DataColumn("sku") },
             FormateStrings = new List <string> {
                 "http://chat1.jd.com/api/checkChat?my=list&pidList={0}&callback=json"
             }
         }
     };
     AddPipeline(new MySqlEntityPipeline("Database='taobao';Data Source=localhost ;User ID=root;Password=1qazZAQ!;Port=4306"));
     AddEntityType(typeof(ProductUpdater));
 }
Beispiel #2
0
 protected override void MyInit(params string[] arguments)
 {
     Site = new Site
     {
         Headers = new Dictionary <string, string>
         {
             { "Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8" },
             { "Referer", "https://www.taobao.com/?spm=a230r.1.0.0.ebb2eb2VkWVc7" }
         },
         CookiesStringPart = "thw=cn; miid=715530502217916458; tracknick=style9898123; _cc_=VT5L2FSpdA%3D%3D; tg=0; t=fdf1eb945c2d6b41909558f5c373c37e; cookie2=1cb7771c61122989bb7327f9116858cb; v=0; mt=ci=-1_0; cna=wBEiEVwsTwoCAXTrIc4M/zwX; _tb_token_=e38beee05307e; l=AhoatVWMG7a9HNd5Ar0vu7CJ6so0I54m; isg=AlhY8tnotW2k3pghow1NKSZGIYbqQbzLLM8WWZJJ0RNGLfgXOlGMW27LMVzj; uc3=nk2=EEomLiIV%2BYptPBTr&id2=VyySWWIEs2Gx&vt3=F8dARV%2Bke6706b8vtTM%3D&lg2=VT5L2FSpMGV7TQ%3D%3D; existShop=MTQ5NTYxOTEwMA%3D%3D; lgc=style9898123; skt=57e445e7876bfe9c; publishItemObj=Ng%3D%3D; _m_user_unitinfo_=unit|unzbyun; _m_unitapi_v_=1492572565585; _m_h5_tk=a64b9ef97931dc791ae1708fa1293e93_1496410667055; _m_h5_tk_enc=ade4c443f5c9b6358cfb9821ccf02282; UM_distinctid=15c39e8263a835-05097c28e0b965-37624605-1fa400-15c39e8263bbcd; ali_ab=116.235.37.69.1495620049800.4; linezing_session=3vGYfK3a2T0nRJgCZKSJS15W_1497875606644xXAh_3; uc2=wuf=https%3A%2F%2Fpassport.alibaba.com%2Fac%2Fpassword_reset.htm%3FfromSite%3D6%26appName%3Daliyun%26lang%3Dzh_CN; uc1=cookie14=UoW%2BsOlp%2B6aVYg%3D%3D"
     };
     Scheduler  = new RedisScheduler(Configuration.RedisConnectString);
     Downloader = new HttpClientDownloader
     {
         DownloadCompleteHandlers = new IDownloadCompleteHandler[]
         {
             new SubContentHandler
             {
                 StartOffset = 16,
                 EndOffset   = 22,
                 Start       = "g_page_config = {",
                 End         = "g_srp_loadCss();"
             },
             new IncrementTargetUrlsCreator("&s=0", null, 44)
         }
     };
     ThreadNum             = 20;
     SkipWhenResultIsEmpty = true;
     if (!arguments.Contains("noprepare"))
     {
         PrepareStartUrls = new PrepareStartUrls[]
         {
             new BaseDbPrepareStartUrls
             {
                 BulkInsert    = true,
                 ConnectString = Configuration.ConnectString,
                 QueryString   = "SELECT * FROM taobao.result_keywords",
                 Columns       = new []
                 {
                     new DataColumn("bidwordstr"),
                     new DataColumn("tab")
                 },
                 FormateStrings = new List <string> {
                     "https://s.taobao.com/search?q={0}&imgfile=&js=1&stats_click=search_radio_all%3A1&ie=utf8&sort=sale-desc&s=0&tab={1}"
                 }
             }
         };
     }
     AddEntityType(typeof(Item), new MyDataHanlder());
 }
 protected override void MyInit(params string[] arguments)
 {
     Scheduler  = new RedisScheduler(Config.RedisConnectString);
     Downloader = new HttpClientDownloader
     {
         DownloadCompleteHandlers = new IDownloadCompleteHandler[]
         {
             new SubContentHandler
             {
                 StartOffset = 16,
                 EndOffset   = 22,
                 Start       = "g_page_config = {",
                 End         = "g_srp_loadCss();"
             },
             new IncrementTargetUrlsCreator("&s=0", null, 44)
         }
     };
     ThreadNum             = 1;
     SkipWhenResultIsEmpty = true;
     if (!arguments.Contains("noprepare"))
     {
         PrepareStartUrls = new PrepareStartUrls[]
         {
             new BaseDbPrepareStartUrls
             {
                 BulkInsert    = true,
                 ConnectString = Config.ConnectString,
                 QueryString   = "SELECT * FROM taobao.result_keywords limit 10000",
                 Columns       = new []
                 {
                     new DataColumn("bidwordstr"),
                     new DataColumn("tab")
                 },
                 FormateStrings = new List <string> {
                     "https://s.taobao.com/search?q={0}&imgfile=&js=1&stats_click=search_radio_all%3A1&ie=utf8&sort=sale-desc&s=0&tab={1}"
                 }
             }
         };
     }
     AddEntityType(typeof(Item), new MyDataHanlder());
 }
Beispiel #4
0
        private void PrepareSite()
        {
            var type = _spiderContext.PrepareStartUrls.SelectToken("$.Type").ToObject <PrepareStartUrls.Types>();
            PrepareStartUrls prepareStartUrls = null;

            switch (type)
            {
            case PrepareStartUrls.Types.GeneralDb:
            {
                var tmp = _spiderContext.PrepareStartUrls.ToObject <GeneralDbPrepareStartUrls>();
                prepareStartUrls = tmp;
                break;
            }

            case PrepareStartUrls.Types.Cycle:
            {
                prepareStartUrls = _spiderContext.PrepareStartUrls.ToObject <CyclePrepareStartUrls>();
                break;
            }
            }

            prepareStartUrls?.Build(_spiderContext.Site);
        }
		public SpiderContext AddPrepareStartUrls(PrepareStartUrls prepareStartUrls)
		{
			PrepareStartUrls.Add(prepareStartUrls);
			return this;
		}