Exemple #1
0
 protected override void OnInit(params string[] arguments)
 {
     Downloader = new WebDriverDownloader(Browser.Chrome);
     AddRequest("https://list.jd.com/list.html?cat=1713,3263,3395", new Dictionary <string, object> {
         { "name", "童书" }, { "cat", "幼儿启蒙" }
     });
     AddPipeline(new ConsoleEntityPipeline());
     AddEntityType <Product>();
 }
 protected override void OnInit(params string[] arguments)
 {
     Downloader = new WebDriverDownloader(Browser.Chrome);
     AddStartUrl("http://list.jd.com/list.html?cat=9987,653,655&page=2&JL=6_0_0&ms=5#J_main", new Dictionary <string, object> {
         { "name", "手机" }, { "cat3", "655" }
     });
     AddPipeline(new ConsoleEntityPipeline());
     AddEntityType <Product>();
 }
 protected override void MyInit(params string[] arguments)
 {
     Identity = ("JD sku/store test " + DateTime.Now.ToString("yyyy-MM-dd HHmmss"));
     AddStartUrl("http://list.jd.com/list.html?cat=9987,653,655&page=2&JL=6_0_0&ms=5#J_main", new Dictionary <string, object> {
         { "name", "手机" }, { "cat3", "655" }
     });
     AddEntityType(typeof(Product));
     Downloader = new WebDriverDownloader(Browser.Chrome);
 }
Exemple #4
0
 protected override void MyInit(params string[] arguments)
 {
     Identity = ("JD sku/store test " + DateTime.Now.ToString("yyyy-MM-dd HHmmss"));
     AddPipeline(new MySqlEntityPipeline("Database='mysql';Data Source=localhost;User ID=root;Password=1qazZAQ!;Port=3306"));
     AddStartUrl("http://list.jd.com/list.html?cat=9987,653,655&page=2&JL=6_0_0&ms=5#J_main", new Dictionary <string, object> {
         { "name", "手机" }, { "cat3", "655" }
     });
     AddEntityType(typeof(Product));
     Downloader = new WebDriverDownloader(Browser.Chrome);
 }
            protected override void OnInit(params string[] arguments)
            {
                var word = "可乐|雪碧";

                AddRequest(string.Format("http://news.baidu.com/ns?word={0}&tn=news&from=news&cl=2&pn=0&rn=20&ct=1", word), new Dictionary <string, dynamic> {
                    { "Keyword", word }
                });
                Downloader = new WebDriverDownloader(Browser.Chrome);
                AddPipeline(new ConsoleEntityPipeline());
                AddEntityType <BaiduSearchEntry>();
            }
Exemple #6
0
            protected override void MyInit(params string[] arguments)
            {
                Identity = "hello";
                var word = "可乐|雪碧";

                AddStartUrl(string.Format("http://news.baidu.com/ns?word={0}&tn=news&from=news&cl=2&pn=0&rn=20&ct=1", word), new Dictionary <string, dynamic> {
                    { "Keyword", word }
                });
                Downloader = new WebDriverDownloader(Core.Infrastructure.Browser.Chrome);
                AddEntityType <BaiduSearchEntry>();
            }
Exemple #7
0
        private bool SignIn(string url, string userName, string password)
        {
            var succeeded = true;

            try
            {
                // Config encoding, header, cookie, proxy etc... 定义采集的 Site 对象, 设置 Header、Cookie、代理等
                var headers = new Dictionary <string, string>();
                headers.Add("Accept", "application/xml, text/xml, */*; q=0.01");
                headers.Add("Content-Type", "application/x-www-form-urlencoded; charset=UTF-8");
                var site = new Site(url)
                {
                    EncodingName = "UTF-8", Headers = headers, RemoveOutboundLinks = true, DownloadFiles = false
                };

                var option        = new Option(LanguageEnum.English);
                var webDownloader = new WebDriverDownloader(Browser.Chrome, 200, null, option);
                webDownloader.FormSubmit = new FormSubmitHandler()
                {
                    Selectors = new List <KeyValuePair <Selector, string> >
                    {
                        new KeyValuePair <Selector, string>(new Selector(SelectorType.Id, "userId"), userName),
                        new KeyValuePair <Selector, string>(new Selector(SelectorType.Id, "passwd"), password)
                    },
                    SubmitSelector   = new Selector(SelectorType.XPath, "//p[@class='oauth_formbtn']/a[1]"),
                    WaitSeconds      = 5, // 等待5秒后进行下一步操作
                    ChildFormActions = GetChildFormActions()
                };

                Spider spider = Spider.Create(site, webDownloader);
                Page   page   = spider.Run();

                if (!page.TargetUrl.ToLower().Contains("hufen123.vipsinaapp.com"))
                {
                    // 签到失败
                    succeeded = false;
                    Logger.Info($"签到失败 : {userName}");
                }
                else
                {
                    Logger.Info($"签到成功 : {userName}");
                }
            }
            catch (Exception ex)
            {
                Logger.Info(ex);
                Logger.Info($"签到失败 : {userName}");
                // 签到失败
                succeeded = false;
            }

            return(succeeded);
        }
Exemple #8
0
            protected override void MyInit(params string[] arguments)
            {
                Monitor  = new NLogMonitor();
                Identity = "hello";
                var word = "可乐|雪碧";

                AddStartUrl(string.Format("http://news.baidu.com/ns?word={0}&tn=news&from=news&cl=2&pn=0&rn=20&ct=1", word), new Dictionary <string, dynamic> {
                    { "Keyword", word }
                });
                Downloader = new WebDriverDownloader(Browser.Chrome, new Option
                {
                    Headless = true
                });
                EmptySleepTime = 6000;
                AddEntityType <BaiduSearchEntry>();
            }
        private IDownloader GetDownloader(JObject jobject)
        {
            if (jobject == null)
            {
                return(new HttpClientDownloader());
            }

            IDownloader downloader;

            var downloaderType = jobject.SelectToken("$.Type")?.ToString();

            switch (downloaderType)
            {
            case "JsEngine":
            {
#if !NET_CORE
                var webDriverDownloader = new WebDriverDownloader(Browser.Chrome);
                downloader = webDriverDownloader;
                break;
#else
                throw new SpiderException("UNSPORT WEBDRIVER DOWNLOADER.");
#endif
            }

            case "Http":
            {
                downloader = new HttpClientDownloader();
                break;
            }

            default:
            {
                downloader = new HttpClientDownloader();
                break;
            }
            }
            return(downloader);
        }
Exemple #10
0
        private static Page Crawl(string url)
        {
            try
            {
                // Config encoding, header, cookie, proxy etc... 定义采集的 Site 对象, 设置 Header、Cookie、代理等
                var headers = new Dictionary <string, string>();
                headers.Add("Accept", "application/xml, text/xml, */*; q=0.01");
                headers.Add("Content-Type", "application/x-www-form-urlencoded; charset=UTF-8");
                var site = new Site(url)
                {
                    EncodingName = "UTF-8", Headers = headers, RemoveOutboundLinks = true, DownloadFiles = false
                };

                var option        = new Option(LanguageEnum.English);
                var webDownloader = new WebDriverDownloader(Browser.Chrome, 200, null, option);
                webDownloader.FormSubmit = new FormSubmitHandler()
                {
                    Selectors = new List <KeyValuePair <Selector, string> >
                    {
                        new KeyValuePair <Selector, string>(new Selector(SelectorType.Id, "userId"), "*****@*****.**"),
                        new KeyValuePair <Selector, string>(new Selector(SelectorType.Id, "passwd"), "Worm0429@")
                    },
                    SubmitSelector   = new Selector(SelectorType.XPath, "//p[@class='oauth_formbtn']/a[1]"),
                    ChildFormActions = GetChildFormActions()
                };

                Spider spider = Spider.Create(site, webDownloader);
                Page   page   = spider.Run();

                return(page);
            }
            catch (Exception ex)
            {
                Logger.Info(ex);
                return(null);
            }
        }
Exemple #11
0
        private Downloader GetDownloader(JObject jobject)
        {
            if (jobject == null)
            {
                return(new HttpDownloader());
            }

            Downloader downloader;

            var downloaderType = jobject.SelectToken("$.Type")?.ToObject <Downloader.Types>();

            if (downloaderType == null)
            {
                throw new SpiderExceptoin("Missing Downloader type: " + jobject);
            }

            switch (downloaderType)
            {
            case Configuration.Downloader.Types.WebDriverDownloader:
            {
#if !NET_CORE
                var webDriverDownloader = new WebDriverDownloader();
                var loginType           = jobject.SelectToken("$.Login.Type");
                if (loginType != null)
                {
                    switch (loginType.ToObject <Loginer.Types>())
                    {
                    case Loginer.Types.Common:
                    {
                        var login = jobject.SelectToken("$.Login").ToObject <CommonLoginer>();
                        webDriverDownloader.Login = login;
                        break;
                    }

                    case Loginer.Types.Manual:
                    {
                        webDriverDownloader.Login = jobject.SelectToken("$.Login").ToObject <ManualLoginer>();
                        break;
                    }
                    }
                }
                webDriverDownloader.Browser          = jobject.SelectToken("$.Browser").ToObject <Browser>();
                webDriverDownloader.RedialLimit      = jobject.SelectToken("$.RedialLimit").ToObject <int>();
                webDriverDownloader.GeneratePostBody = jobject.SelectToken("$.GeneratePostBody").ToObject <GeneratePostBody>();
                webDriverDownloader.VerifyCode       = jobject.SelectToken("$.VerifyCode").ToObject <VerifyCode>();

                downloader = webDriverDownloader;
                break;
#else
                throw new SpiderExceptoin("UNSPORT WEBDRIVER DOWNLOADER.");
#endif
            }

            case Configuration.Downloader.Types.HttpClientDownloader:
            {
                downloader = new HttpDownloader();
                break;
            }

            case Configuration.Downloader.Types.FileDownloader:
            {
                downloader = new FileDownloader();
                break;
            }

            default:
            {
                downloader = new HttpDownloader();
                break;
            }
            }

            var validations = jobject.SelectToken("$.DownloadValidations");
            if (validations != null)
            {
                foreach (var validation in validations)
                {
                    var downloadValidationType = validation.SelectToken("$.Type")?.ToObject <DownloadValidation.Types>();
                    if (downloadValidationType == null)
                    {
                        throw new SpiderExceptoin("Missing DownloadValidation Type: " + jobject);
                    }

                    switch (downloadValidationType)
                    {
                    case DownloadValidation.Types.Contains:
                    {
                        downloader.DownloadValidations.Add(validation.ToObject <ContainsDownloadValidation>());

                        break;
                    }

                    default:
                    {
                        throw new SpiderExceptoin("Unspodrt validation type: " + downloadValidationType);
                    }
                    }
                }
            }

            var generatePostBody = jobject.SelectToken("$.GeneratePostBody")?.ToObject <GeneratePostBody>();
            if (generatePostBody != null)
            {
                downloader.GeneratePostBody = generatePostBody;
            }

            return(downloader);
        }
Exemple #12
0
        private Downloader GetDownloader(JObject jobject)
        {
            if (jobject == null)
            {
                return(new HttpDownloader());
            }

            Downloader downloader;

            var downloaderType = jobject.SelectToken("$.Type")?.ToObject <Downloader.Types>();

            if (downloaderType == null)
            {
                throw new SpiderException("Missing Downloader type: " + jobject);
            }

            switch (downloaderType)
            {
            case Configuration.Downloader.Types.WebDriverDownloader:
            {
#if !NET_CORE
                var webDriverDownloader = new WebDriverDownloader();
                var loginType           = jobject.SelectToken("$.Login.Type");
                if (loginType != null)
                {
                    switch (loginType.ToObject <Loginer.Types>())
                    {
                    case Loginer.Types.Common:
                    {
                        var login = jobject.SelectToken("$.Login").ToObject <CommonLoginer>();
                        webDriverDownloader.Login = login;
                        break;
                    }

                    case Loginer.Types.Manual:
                    {
                        webDriverDownloader.Login = jobject.SelectToken("$.Login").ToObject <ManualLoginer>();
                        break;
                    }
                    }
                }
                webDriverDownloader.Browser = jobject.SelectToken("$.Browser").ToObject <Browser>();
                //webDriverDownloader.RedialLimit = jobject.SelectToken("$.RedialLimit").ToObject<int>();
                webDriverDownloader.PostBodyGenerator = jobject.SelectToken("$.PostBodyGenerator").ToObject <PostBodyGenerator>();
                webDriverDownloader.VerifyCode        = jobject.SelectToken("$.VerifyCode").ToObject <VerifyCode>();

                downloader = webDriverDownloader;
                break;
#else
                throw new SpiderException("UNSPORT WEBDRIVER DOWNLOADER.");
#endif
            }

            case Configuration.Downloader.Types.HttpClientDownloader:
            {
                downloader = new HttpDownloader();
                break;
            }

            case Configuration.Downloader.Types.FileDownloader:
            {
                downloader = new FileDownloader();
                break;
            }

            default:
            {
                downloader = new HttpDownloader();
                break;
            }
            }

            downloader.Handlers = GetDownloadHandlers(jobject.SelectTokens("$.Handlers[*]"));

            var postBodyGenerator = jobject.SelectToken("$.PostBodyGenerator")?.ToObject <PostBodyGenerator>();
            if (postBodyGenerator != null)
            {
                downloader.PostBodyGenerator = postBodyGenerator;
            }

            return(downloader);
        }