protected override void MyInit(params string[] arguments) { Identity = "HaoBrowser Hao360Spider Buble " + DateTime.Now.ToString("yyyy-MM-dd HHmmss"); CachedSize = 1; ThreadNum = 1; SkipWhenResultIsEmpty = true; var downloader = new HttpClientDownloader(); downloader.AddAfterDownloadCompleteHandler(new SubContentHandler { Start = "sales[\"hotsite_yixing\"] = [", End = "}}", StartOffset = 27, EndOffset = 0 }); downloader.AddAfterDownloadCompleteHandler(new ReplaceContentHandler { NewValue = "/", OldValue = "\\/", }); Scheduler = new Extension.Scheduler.RedisScheduler("127.0.0.1:6379,serviceName=Scheduler.NET,keepAlive=8,allowAdmin=True,connectTimeout=10000,password=6GS9F2QTkP36GggE0c3XwVwI,abortConnect=True,connectRetry=20"); AddPipeline(new MySqlEntityPipeline("Database='testhao';Data Source= localhost;User ID=root;Password=root@123456;Port=3306")); AddStartUrl("https://hao.360.cn/"); AddEntityType(typeof(UpdateHao360Info)); }
public static void Main(string[] args) { var path = "www.baidu.com.cookies"; if (File.Exists(path)) { File.Delete(path); } File.WriteAllText(path, "a=b&c=d"); Spider spider = Spider.Create(new Site { EncodingName = "UTF-8", SleepTime = 1000 }, new TestPageProcessor()).AddPipeline(new TestPipeline()); spider.ThreadNum = 1; var downloader = new HttpClientDownloader(); downloader.AddAfterDownloadCompleteHandler(new TimerUpdateCookieHandler(5, new FileCookieInject())); spider.Downloader = downloader; for (int i = 0; i < 10000; i++) { spider.AddStartUrl("http://www.baidu.com/" + i); } spider.Run(); }
protected override void MyInit(params string[] arguments) { ThreadNum = 1; Scheduler = new RedisScheduler("127.0.0.1:6379,serviceName=Scheduler.NET,keepAlive=8,allowAdmin=True,connectTimeout=10000,password=6GS9F2QTkP36GggE0c3XwVwI,abortConnect=True,connectRetry=20"); Downloader = new HttpClientDownloader(); Downloader.AddAfterDownloadCompleteHandler(new SubContentHandler { Start = "json(", End = ");", StartOffset = 5, EndOffset = 0 }); PrepareStartUrls = new PrepareStartUrls[] { new BaseDbPrepareStartUrls() { Source = DataSource.MySql, ConnectString = "Database='test';Data Source= localhost;User ID=root;Password=1qazZAQ!;Port=3306", QueryString = $"SELECT * FROM jd.sku_v2_{DateTimeUtils.RunIdOfMonday} WHERE shopname is null or shopid is null order by sku", Columns = new [] { new DataColumn("sku") }, FormateStrings = new List <string> { "http://chat1.jd.com/api/checkChat?my=list&pidList={0}&callback=json" } } }; AddPipeline(new MySqlEntityPipeline("Database='taobao';Data Source=localhost ;User ID=root;Password=1qazZAQ!;Port=4306")); AddEntityType(typeof(ProductUpdater)); }
protected override void OnInit(params string[] arguments) { AddRequest($"http://api.search.sina.com.cn/?c=news&t=&q=赵丽颖&pf=2136012948&ps=2130770082&page=0&stime={DateTime.Now.AddYears(-7).AddDays(-1).ToString("yyyy-MM-dd")}&etime={DateTime.Now.AddDays(1).ToString("yyyy-MM-dd")}&sort=rel&highlight=1&num=10&ie=utf-8&callback=jQuery1720001955628746606708_1508996230766&_=1508996681484", new Dictionary <string, dynamic> { { "keyword", "赵丽颖" } }); AddPipeline(new ConsoleEntityPipeline()); Downloader = new HttpClientDownloader(); Downloader.AddAfterDownloadCompleteHandler(new ReplaceHandler()); AddEntityType <SinaNews>(); }
protected override void MyInit(params string[] arguments) { Scheduler = new RedisScheduler(); var downloader = new HttpClientDownloader(); downloader.AddAfterDownloadCompleteHandler(new ReplaceContentHandler { NewValue = "/", OldValue = "\\/", }); downloader.AddAfterDownloadCompleteHandler(new IncrementTargetUrlsBuilder("&s=0", 44)); Downloader = downloader; ThreadNum = 1; SkipWhenResultIsEmpty = true; if (!arguments.Contains("noprepare")) { AddStartUrlBuilder(new DbStartUrlBuilder(Database.MySql, Env.DataConnectionStringSettings.ConnectionString, "SELECT * FROM taobao.result_keywords limit 10000", new[] { "bidwordstr", "tab" }, "https://s.taobao.com/search?q={0}&imgfile=&js=1&stats_click=search_radio_all%3A1&ie=utf8&sort=sale-desc&s=0&tab={1}")); } AddEntityType(typeof(Item), new MyDataHanlder()); }
protected override void MyInit(params string[] arguments) { Identity = ("qidian_" + DateTime.Now.ToString("yyyy_MM_dd_HHmmss")); var downloader = new HttpClientDownloader(); downloader.AddAfterDownloadCompleteHandler(new IncrementTargetUrlsBuilder("index_1.shtml")); Downloader = downloader; ThreadNum = 1; AddStartUrl("http://www.cas.cn/kx/kpwz/index.shtml"); AddStartUrl("http://www.cas.cn/kx/kpwz/index_1.shtml"); AddEntityType(typeof(ArticleSummary)); AddEntityType(typeof(Article)); }
protected override void MyInit(params string[] arguments) { Scheduler = new RedisScheduler(Config.RedisConnectString); var downloader = new HttpClientDownloader(); downloader.AddAfterDownloadCompleteHandler(new ReplaceContentHandler { NewValue = "/", OldValue = "\\/", }); downloader.AddAfterDownloadCompleteHandler(new IncrementTargetUrlsCreator("&s=0", null, 44)); Downloader = downloader; ThreadNum = 1; SkipWhenResultIsEmpty = true; if (!arguments.Contains("noprepare")) { PrepareStartUrls = new PrepareStartUrls[] { new BaseDbPrepareStartUrls { BulkInsert = true, ConnectString = Config.ConnectString, QueryString = "SELECT * FROM taobao.result_keywords limit 10000", Columns = new [] { new DataColumn("bidwordstr"), new DataColumn("tab") }, FormateStrings = new List <string> { "https://s.taobao.com/search?q={0}&imgfile=&js=1&stats_click=search_radio_all%3A1&ie=utf8&sort=sale-desc&s=0&tab={1}" } } }; } AddEntityType(typeof(Item), new MyDataHanlder()); }
protected override void MyInit(params string[] arguments) { Site.AddStartUrl("http://chat1.jd.com/api/checkChat?my=list&pidList=3355984&callback=json"); Site.AddStartUrl("http://chat1.jd.com/api/checkChat?my=list&pidList=3682523&callback=json"); var downloader = new HttpClientDownloader(); downloader.AddAfterDownloadCompleteHandler(new SubContentHandler { Start = "json(", End = ");", StartOffset = 5, EndOffset = 2 }); AddPipeline(new MySqlEntityPipeline("Database='mysql';Data Source=localhost ;User ID=root;Password=1qazZAQ!;Port=3306")); AddEntityType(typeof(ProductUpdater)); }