protected override void OnInit(params string[] arguments) { Downloader = new WebDriverDownloader(Browser.Chrome); AddRequest("https://list.jd.com/list.html?cat=1713,3263,3395", new Dictionary <string, object> { { "name", "童书" }, { "cat", "幼儿启蒙" } }); AddPipeline(new ConsoleEntityPipeline()); AddEntityType <Product>(); }
protected override void OnInit(params string[] arguments) { Downloader = new WebDriverDownloader(Browser.Chrome); AddStartUrl("http://list.jd.com/list.html?cat=9987,653,655&page=2&JL=6_0_0&ms=5#J_main", new Dictionary <string, object> { { "name", "手机" }, { "cat3", "655" } }); AddPipeline(new ConsoleEntityPipeline()); AddEntityType <Product>(); }
protected override void MyInit(params string[] arguments) { Identity = ("JD sku/store test " + DateTime.Now.ToString("yyyy-MM-dd HHmmss")); AddStartUrl("http://list.jd.com/list.html?cat=9987,653,655&page=2&JL=6_0_0&ms=5#J_main", new Dictionary <string, object> { { "name", "手机" }, { "cat3", "655" } }); AddEntityType(typeof(Product)); Downloader = new WebDriverDownloader(Browser.Chrome); }
protected override void MyInit(params string[] arguments) { Identity = ("JD sku/store test " + DateTime.Now.ToString("yyyy-MM-dd HHmmss")); AddPipeline(new MySqlEntityPipeline("Database='mysql';Data Source=localhost;User ID=root;Password=1qazZAQ!;Port=3306")); AddStartUrl("http://list.jd.com/list.html?cat=9987,653,655&page=2&JL=6_0_0&ms=5#J_main", new Dictionary <string, object> { { "name", "手机" }, { "cat3", "655" } }); AddEntityType(typeof(Product)); Downloader = new WebDriverDownloader(Browser.Chrome); }
protected override void OnInit(params string[] arguments) { var word = "可乐|雪碧"; AddRequest(string.Format("http://news.baidu.com/ns?word={0}&tn=news&from=news&cl=2&pn=0&rn=20&ct=1", word), new Dictionary <string, dynamic> { { "Keyword", word } }); Downloader = new WebDriverDownloader(Browser.Chrome); AddPipeline(new ConsoleEntityPipeline()); AddEntityType <BaiduSearchEntry>(); }
protected override void MyInit(params string[] arguments) { Identity = "hello"; var word = "可乐|雪碧"; AddStartUrl(string.Format("http://news.baidu.com/ns?word={0}&tn=news&from=news&cl=2&pn=0&rn=20&ct=1", word), new Dictionary <string, dynamic> { { "Keyword", word } }); Downloader = new WebDriverDownloader(Core.Infrastructure.Browser.Chrome); AddEntityType <BaiduSearchEntry>(); }
private bool SignIn(string url, string userName, string password) { var succeeded = true; try { // Config encoding, header, cookie, proxy etc... 定义采集的 Site 对象, 设置 Header、Cookie、代理等 var headers = new Dictionary <string, string>(); headers.Add("Accept", "application/xml, text/xml, */*; q=0.01"); headers.Add("Content-Type", "application/x-www-form-urlencoded; charset=UTF-8"); var site = new Site(url) { EncodingName = "UTF-8", Headers = headers, RemoveOutboundLinks = true, DownloadFiles = false }; var option = new Option(LanguageEnum.English); var webDownloader = new WebDriverDownloader(Browser.Chrome, 200, null, option); webDownloader.FormSubmit = new FormSubmitHandler() { Selectors = new List <KeyValuePair <Selector, string> > { new KeyValuePair <Selector, string>(new Selector(SelectorType.Id, "userId"), userName), new KeyValuePair <Selector, string>(new Selector(SelectorType.Id, "passwd"), password) }, SubmitSelector = new Selector(SelectorType.XPath, "//p[@class='oauth_formbtn']/a[1]"), WaitSeconds = 5, // 等待5秒后进行下一步操作 ChildFormActions = GetChildFormActions() }; Spider spider = Spider.Create(site, webDownloader); Page page = spider.Run(); if (!page.TargetUrl.ToLower().Contains("hufen123.vipsinaapp.com")) { // 签到失败 succeeded = false; Logger.Info($"签到失败 : {userName}"); } else { Logger.Info($"签到成功 : {userName}"); } } catch (Exception ex) { Logger.Info(ex); Logger.Info($"签到失败 : {userName}"); // 签到失败 succeeded = false; } return(succeeded); }
protected override void MyInit(params string[] arguments) { Monitor = new NLogMonitor(); Identity = "hello"; var word = "可乐|雪碧"; AddStartUrl(string.Format("http://news.baidu.com/ns?word={0}&tn=news&from=news&cl=2&pn=0&rn=20&ct=1", word), new Dictionary <string, dynamic> { { "Keyword", word } }); Downloader = new WebDriverDownloader(Browser.Chrome, new Option { Headless = true }); EmptySleepTime = 6000; AddEntityType <BaiduSearchEntry>(); }
private IDownloader GetDownloader(JObject jobject) { if (jobject == null) { return(new HttpClientDownloader()); } IDownloader downloader; var downloaderType = jobject.SelectToken("$.Type")?.ToString(); switch (downloaderType) { case "JsEngine": { #if !NET_CORE var webDriverDownloader = new WebDriverDownloader(Browser.Chrome); downloader = webDriverDownloader; break; #else throw new SpiderException("UNSPORT WEBDRIVER DOWNLOADER."); #endif } case "Http": { downloader = new HttpClientDownloader(); break; } default: { downloader = new HttpClientDownloader(); break; } } return(downloader); }
private static Page Crawl(string url) { try { // Config encoding, header, cookie, proxy etc... 定义采集的 Site 对象, 设置 Header、Cookie、代理等 var headers = new Dictionary <string, string>(); headers.Add("Accept", "application/xml, text/xml, */*; q=0.01"); headers.Add("Content-Type", "application/x-www-form-urlencoded; charset=UTF-8"); var site = new Site(url) { EncodingName = "UTF-8", Headers = headers, RemoveOutboundLinks = true, DownloadFiles = false }; var option = new Option(LanguageEnum.English); var webDownloader = new WebDriverDownloader(Browser.Chrome, 200, null, option); webDownloader.FormSubmit = new FormSubmitHandler() { Selectors = new List <KeyValuePair <Selector, string> > { new KeyValuePair <Selector, string>(new Selector(SelectorType.Id, "userId"), "*****@*****.**"), new KeyValuePair <Selector, string>(new Selector(SelectorType.Id, "passwd"), "Worm0429@") }, SubmitSelector = new Selector(SelectorType.XPath, "//p[@class='oauth_formbtn']/a[1]"), ChildFormActions = GetChildFormActions() }; Spider spider = Spider.Create(site, webDownloader); Page page = spider.Run(); return(page); } catch (Exception ex) { Logger.Info(ex); return(null); } }
private Downloader GetDownloader(JObject jobject) { if (jobject == null) { return(new HttpDownloader()); } Downloader downloader; var downloaderType = jobject.SelectToken("$.Type")?.ToObject <Downloader.Types>(); if (downloaderType == null) { throw new SpiderExceptoin("Missing Downloader type: " + jobject); } switch (downloaderType) { case Configuration.Downloader.Types.WebDriverDownloader: { #if !NET_CORE var webDriverDownloader = new WebDriverDownloader(); var loginType = jobject.SelectToken("$.Login.Type"); if (loginType != null) { switch (loginType.ToObject <Loginer.Types>()) { case Loginer.Types.Common: { var login = jobject.SelectToken("$.Login").ToObject <CommonLoginer>(); webDriverDownloader.Login = login; break; } case Loginer.Types.Manual: { webDriverDownloader.Login = jobject.SelectToken("$.Login").ToObject <ManualLoginer>(); break; } } } webDriverDownloader.Browser = jobject.SelectToken("$.Browser").ToObject <Browser>(); webDriverDownloader.RedialLimit = jobject.SelectToken("$.RedialLimit").ToObject <int>(); webDriverDownloader.GeneratePostBody = jobject.SelectToken("$.GeneratePostBody").ToObject <GeneratePostBody>(); webDriverDownloader.VerifyCode = jobject.SelectToken("$.VerifyCode").ToObject <VerifyCode>(); downloader = webDriverDownloader; break; #else throw new SpiderExceptoin("UNSPORT WEBDRIVER DOWNLOADER."); #endif } case Configuration.Downloader.Types.HttpClientDownloader: { downloader = new HttpDownloader(); break; } case Configuration.Downloader.Types.FileDownloader: { downloader = new FileDownloader(); break; } default: { downloader = new HttpDownloader(); break; } } var validations = jobject.SelectToken("$.DownloadValidations"); if (validations != null) { foreach (var validation in validations) { var downloadValidationType = validation.SelectToken("$.Type")?.ToObject <DownloadValidation.Types>(); if (downloadValidationType == null) { throw new SpiderExceptoin("Missing DownloadValidation Type: " + jobject); } switch (downloadValidationType) { case DownloadValidation.Types.Contains: { downloader.DownloadValidations.Add(validation.ToObject <ContainsDownloadValidation>()); break; } default: { throw new SpiderExceptoin("Unspodrt validation type: " + downloadValidationType); } } } } var generatePostBody = jobject.SelectToken("$.GeneratePostBody")?.ToObject <GeneratePostBody>(); if (generatePostBody != null) { downloader.GeneratePostBody = generatePostBody; } return(downloader); }
private Downloader GetDownloader(JObject jobject) { if (jobject == null) { return(new HttpDownloader()); } Downloader downloader; var downloaderType = jobject.SelectToken("$.Type")?.ToObject <Downloader.Types>(); if (downloaderType == null) { throw new SpiderException("Missing Downloader type: " + jobject); } switch (downloaderType) { case Configuration.Downloader.Types.WebDriverDownloader: { #if !NET_CORE var webDriverDownloader = new WebDriverDownloader(); var loginType = jobject.SelectToken("$.Login.Type"); if (loginType != null) { switch (loginType.ToObject <Loginer.Types>()) { case Loginer.Types.Common: { var login = jobject.SelectToken("$.Login").ToObject <CommonLoginer>(); webDriverDownloader.Login = login; break; } case Loginer.Types.Manual: { webDriverDownloader.Login = jobject.SelectToken("$.Login").ToObject <ManualLoginer>(); break; } } } webDriverDownloader.Browser = jobject.SelectToken("$.Browser").ToObject <Browser>(); //webDriverDownloader.RedialLimit = jobject.SelectToken("$.RedialLimit").ToObject<int>(); webDriverDownloader.PostBodyGenerator = jobject.SelectToken("$.PostBodyGenerator").ToObject <PostBodyGenerator>(); webDriverDownloader.VerifyCode = jobject.SelectToken("$.VerifyCode").ToObject <VerifyCode>(); downloader = webDriverDownloader; break; #else throw new SpiderException("UNSPORT WEBDRIVER DOWNLOADER."); #endif } case Configuration.Downloader.Types.HttpClientDownloader: { downloader = new HttpDownloader(); break; } case Configuration.Downloader.Types.FileDownloader: { downloader = new FileDownloader(); break; } default: { downloader = new HttpDownloader(); break; } } downloader.Handlers = GetDownloadHandlers(jobject.SelectTokens("$.Handlers[*]")); var postBodyGenerator = jobject.SelectToken("$.PostBodyGenerator")?.ToObject <PostBodyGenerator>(); if (postBodyGenerator != null) { downloader.PostBodyGenerator = postBodyGenerator; } return(downloader); }