protected override void MyInit(params string[] arguments) { CachedSize = 1; ThreadNum = 8; Scheduler = new RedisScheduler("127.0.0.1:6379,serviceName=Scheduler.NET,keepAlive=8,allowAdmin=True,connectTimeout=10000,password=6GS9F2QTkP36GggE0c3XwVwI,abortConnect=True,connectRetry=20"); Downloader = new HttpClientDownloader { DownloadCompleteHandlers = new IDownloadCompleteHandler[] { new SubContentHandler { Start = "json(", End = ");", StartOffset = 5, EndOffset = 0 } } }; PrepareStartUrls = new PrepareStartUrls[] { new BaseDbPrepareStartUrls() { Source = DataSource.MySql, ConnectString = "Database='test';Data Source= localhost;User ID=root;Password=1qazZAQ!;Port=3306", QueryString = $"SELECT * FROM jd.sku_v2_{DateTimeUtils.RunIdOfMonday} WHERE shopname is null or shopid is null order by sku", Columns = new [] { new DataColumn("sku") }, FormateStrings = new List <string> { "http://chat1.jd.com/api/checkChat?my=list&pidList={0}&callback=json" } } }; AddPipeline(new MySqlEntityPipeline("Database='taobao';Data Source=localhost ;User ID=root;Password=1qazZAQ!;Port=4306")); AddEntityType(typeof(ProductUpdater)); }
protected override void MyInit(params string[] arguments) { Site = new Site { Headers = new Dictionary <string, string> { { "Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8" }, { "Referer", "https://www.taobao.com/?spm=a230r.1.0.0.ebb2eb2VkWVc7" } }, CookiesStringPart = "thw=cn; miid=715530502217916458; tracknick=style9898123; _cc_=VT5L2FSpdA%3D%3D; tg=0; t=fdf1eb945c2d6b41909558f5c373c37e; cookie2=1cb7771c61122989bb7327f9116858cb; v=0; mt=ci=-1_0; cna=wBEiEVwsTwoCAXTrIc4M/zwX; _tb_token_=e38beee05307e; l=AhoatVWMG7a9HNd5Ar0vu7CJ6so0I54m; isg=AlhY8tnotW2k3pghow1NKSZGIYbqQbzLLM8WWZJJ0RNGLfgXOlGMW27LMVzj; uc3=nk2=EEomLiIV%2BYptPBTr&id2=VyySWWIEs2Gx&vt3=F8dARV%2Bke6706b8vtTM%3D&lg2=VT5L2FSpMGV7TQ%3D%3D; existShop=MTQ5NTYxOTEwMA%3D%3D; lgc=style9898123; skt=57e445e7876bfe9c; publishItemObj=Ng%3D%3D; _m_user_unitinfo_=unit|unzbyun; _m_unitapi_v_=1492572565585; _m_h5_tk=a64b9ef97931dc791ae1708fa1293e93_1496410667055; _m_h5_tk_enc=ade4c443f5c9b6358cfb9821ccf02282; UM_distinctid=15c39e8263a835-05097c28e0b965-37624605-1fa400-15c39e8263bbcd; ali_ab=116.235.37.69.1495620049800.4; linezing_session=3vGYfK3a2T0nRJgCZKSJS15W_1497875606644xXAh_3; uc2=wuf=https%3A%2F%2Fpassport.alibaba.com%2Fac%2Fpassword_reset.htm%3FfromSite%3D6%26appName%3Daliyun%26lang%3Dzh_CN; uc1=cookie14=UoW%2BsOlp%2B6aVYg%3D%3D" }; Scheduler = new RedisScheduler(Configuration.RedisConnectString); Downloader = new HttpClientDownloader { DownloadCompleteHandlers = new IDownloadCompleteHandler[] { new SubContentHandler { StartOffset = 16, EndOffset = 22, Start = "g_page_config = {", End = "g_srp_loadCss();" }, new IncrementTargetUrlsCreator("&s=0", null, 44) } }; ThreadNum = 20; SkipWhenResultIsEmpty = true; if (!arguments.Contains("noprepare")) { PrepareStartUrls = new PrepareStartUrls[] { new BaseDbPrepareStartUrls { BulkInsert = true, ConnectString = Configuration.ConnectString, QueryString = "SELECT * FROM taobao.result_keywords", Columns = new [] { new DataColumn("bidwordstr"), new DataColumn("tab") }, FormateStrings = new List <string> { "https://s.taobao.com/search?q={0}&imgfile=&js=1&stats_click=search_radio_all%3A1&ie=utf8&sort=sale-desc&s=0&tab={1}" } } }; } AddEntityType(typeof(Item), new MyDataHanlder()); }
protected override void MyInit(params string[] arguments) { Scheduler = new RedisScheduler(Config.RedisConnectString); Downloader = new HttpClientDownloader { DownloadCompleteHandlers = new IDownloadCompleteHandler[] { new SubContentHandler { StartOffset = 16, EndOffset = 22, Start = "g_page_config = {", End = "g_srp_loadCss();" }, new IncrementTargetUrlsCreator("&s=0", null, 44) } }; ThreadNum = 1; SkipWhenResultIsEmpty = true; if (!arguments.Contains("noprepare")) { PrepareStartUrls = new PrepareStartUrls[] { new BaseDbPrepareStartUrls { BulkInsert = true, ConnectString = Config.ConnectString, QueryString = "SELECT * FROM taobao.result_keywords limit 10000", Columns = new [] { new DataColumn("bidwordstr"), new DataColumn("tab") }, FormateStrings = new List <string> { "https://s.taobao.com/search?q={0}&imgfile=&js=1&stats_click=search_radio_all%3A1&ie=utf8&sort=sale-desc&s=0&tab={1}" } } }; } AddEntityType(typeof(Item), new MyDataHanlder()); }
private void PrepareSite() { var type = _spiderContext.PrepareStartUrls.SelectToken("$.Type").ToObject <PrepareStartUrls.Types>(); PrepareStartUrls prepareStartUrls = null; switch (type) { case PrepareStartUrls.Types.GeneralDb: { var tmp = _spiderContext.PrepareStartUrls.ToObject <GeneralDbPrepareStartUrls>(); prepareStartUrls = tmp; break; } case PrepareStartUrls.Types.Cycle: { prepareStartUrls = _spiderContext.PrepareStartUrls.ToObject <CyclePrepareStartUrls>(); break; } } prepareStartUrls?.Build(_spiderContext.Site); }
public SpiderContext AddPrepareStartUrls(PrepareStartUrls prepareStartUrls) { PrepareStartUrls.Add(prepareStartUrls); return this; }