Пример #1
0
        public async Task<WebClientStringResult> GetString(WebRequest request)
        {
            logger.Debug(string.Format("UnixLibCurlWebClient:GetString(Url:{0})", request.Url));
            var result = await GetBytes(request);

            var sresult = new WebClientStringResult()
            {
                Content = Encoding.UTF8.GetString(result.Content),
                Cookies = result.Cookies,
                Status = result.Status
            };

            return sresult;
        }
Пример #2
0
 protected async Task FollowIfRedirect(WebClientStringResult response, string referrer = null, string overrideRedirectUrl = null, string overrideCookies = null)
 {
     var byteResult = new WebClientByteResult();
     // Map to byte
     Mapper.Map(response, byteResult);
     await FollowIfRedirect(byteResult, referrer, overrideRedirectUrl, overrideCookies);
     // Map to string
     Mapper.Map(byteResult, response);
 }
Пример #3
0
        public override async Task<ConfigurationData> GetConfigurationForSetup()
        {
            var Login = Definition.Login;

            if (Login == null || Login.Method != "form")
                return configData;

            var LoginUrl = SiteLink + Login.Path;

            configData.CookieHeader.Value = null;
            landingResult = await RequestStringWithCookies(LoginUrl, null, SiteLink);

            var htmlParser = new HtmlParser();
            landingResultDocument = htmlParser.Parse(landingResult.Content);

            var grecaptcha = landingResultDocument.QuerySelector(".g-recaptcha");
            if (grecaptcha != null)
            {
                var CaptchaItem = new RecaptchaItem();
                CaptchaItem.Name = "Captcha";
                CaptchaItem.Version = "2";
                CaptchaItem.SiteKey = grecaptcha.GetAttribute("data-sitekey");
                if (CaptchaItem.SiteKey == null) // some sites don't store the sitekey in the .g-recaptcha div (e.g. cloudflare captcha challenge page)
                    CaptchaItem.SiteKey = landingResultDocument.QuerySelector("[data-sitekey]").GetAttribute("data-sitekey");

                configData.AddDynamic("Captcha", CaptchaItem);
            }

            return configData;
        }
Пример #4
0
        protected bool CheckIfLoginIsNeeded(WebClientStringResult Result, IHtmlDocument document)
        {
            if (Result.IsRedirect)
            {
                return true;
            }

            if (Definition.Login == null || Definition.Login.Test == null)
                return false;

            if (Definition.Login.Test.Selector != null)
            {
                var selection = document.QuerySelectorAll(Definition.Login.Test.Selector);
                if (selection.Length == 0)
                {
                    return true;
                }
            }
            return false;
        }
Пример #5
0
        protected async Task<bool> DoLogin()
        {
            var Login = Definition.Login;

            if (Login == null)
                return false;

            if (Login.Method == "post")
            {
                var pairs = new Dictionary<string, string>();
                foreach (var Input in Definition.Login.Inputs)
                {
                    var value = applyGoTemplateText(Input.Value);
                    pairs.Add(Input.Key, value);
                }

                var LoginUrl = SiteLink + Login.Path;
                configData.CookieHeader.Value = null;
                var loginResult = await RequestLoginAndFollowRedirect(LoginUrl, pairs, null, true, null, SiteLink, true);
                configData.CookieHeader.Value = loginResult.Cookies;

                checkForLoginError(loginResult);
            }
            else if (Login.Method == "form")
            {
                var LoginUrl = SiteLink + Login.Path;

                var pairs = new Dictionary<string, string>();

                var CaptchaConfigItem = (RecaptchaItem)configData.GetDynamic("Captcha");

                if (CaptchaConfigItem != null)
                { 
                    if (!string.IsNullOrWhiteSpace(CaptchaConfigItem.Cookie))
                    {
                        // for remote users just set the cookie and return
                        CookieHeader = CaptchaConfigItem.Cookie;
                        return true;
                    }

                    var CloudFlareCaptchaChallenge = landingResultDocument.QuerySelector("script[src=\"/cdn-cgi/scripts/cf.challenge.js\"]");
                    if (CloudFlareCaptchaChallenge != null)
                    {
                        var CloudFlareQueryCollection = new NameValueCollection();
                        CloudFlareQueryCollection["id"] = CloudFlareCaptchaChallenge.GetAttribute("data-ray");
                    
                        CloudFlareQueryCollection["g-recaptcha-response"] = CaptchaConfigItem.Value;
                        var ClearanceUrl = resolvePath("/cdn-cgi/l/chk_captcha?" + CloudFlareQueryCollection.GetQueryString());

                        var ClearanceResult = await RequestStringWithCookies(ClearanceUrl.ToString(), null, SiteLink);

                        if (ClearanceResult.IsRedirect) // clearance successfull
                        {
                            // request real login page again
                            landingResult = await RequestStringWithCookies(LoginUrl, null, SiteLink);
                            var htmlParser = new HtmlParser();
                            landingResultDocument = htmlParser.Parse(landingResult.Content);
                        }
                        else
                        {
                            throw new ExceptionWithConfigData(string.Format("Login failed: Cloudflare clearance failed using cookies {0}: {1}", CookieHeader, ClearanceResult.Content), configData);
                        }
                    }
                    else
                    {
                        pairs.Add("g-recaptcha-response", CaptchaConfigItem.Value);
                    }
                }

                var FormSelector = Login.Form;
                if (FormSelector == null)
                    FormSelector = "form";

                // landingResultDocument might not be initiated if the login is caused by a relogin during a query
                if (landingResultDocument == null)
                {
                    await GetConfigurationForSetup();
                }

                var form = landingResultDocument.QuerySelector(FormSelector);
                if (form == null)
                {
                    throw new ExceptionWithConfigData(string.Format("Login failed: No form found on {0} using form selector {1}", LoginUrl, FormSelector), configData);
                }

                var inputs = form.QuerySelectorAll("input");
                if (inputs == null)
                {
                    throw new ExceptionWithConfigData(string.Format("Login failed: No inputs found on {0} using form selector {1}", LoginUrl, FormSelector), configData);
                }

                var submitUrl = resolvePath(form.GetAttribute("action"));
                
                foreach (var input in inputs)
                {
                    var name = input.GetAttribute("name");
                    if (name == null)
                        continue;

                    var value = input.GetAttribute("value");
                    if (value == null)
                        value = "";

                    pairs[name] = value;
                }
   
                foreach (var Input in Definition.Login.Inputs)
                {
                    var value = applyGoTemplateText(Input.Value);
                    pairs[Input.Key] = value;
                }

                // automatically solve simpleCaptchas, if used
                var simpleCaptchaPresent = landingResultDocument.QuerySelector("script[src*=\"simpleCaptcha\"]");
                if(simpleCaptchaPresent != null)
                {
                    var captchaUrl = resolvePath("simpleCaptcha.php?numImages=1");
                    var simpleCaptchaResult = await RequestStringWithCookies(captchaUrl.ToString(), null, LoginUrl);
                    var simpleCaptchaJSON = JObject.Parse(simpleCaptchaResult.Content);
                    var captchaSelection = simpleCaptchaJSON["images"][0]["hash"].ToString();
                    pairs["captchaSelection"] = captchaSelection;
                    pairs["submitme"] = "X";
                }

                // clear landingResults/Document, otherwise we might use an old version for a new relogin (if GetConfigurationForSetup() wasn't called before)
                landingResult = null;
                landingResultDocument = null;

                WebClientStringResult loginResult = null;
                var enctype = form.GetAttribute("enctype");
                if (enctype == "multipart/form-data")
                {
                    var headers = new Dictionary<string, string>();
                    var boundary = "---------------------------" + (DateTime.UtcNow.Subtract(new DateTime(1970, 1, 1))).TotalSeconds.ToString().Replace(".", "");
                    var bodyParts = new List<string>();

                    foreach (var pair in pairs)
                    {
                        var part = "--" + boundary + "\r\n" +
                          "Content-Disposition: form-data; name=\"" + pair.Key + "\"\r\n" +
                          "\r\n" +
                          pair.Value;
                        bodyParts.Add(part);
                    }

                    bodyParts.Add("--" + boundary + "--");

                    headers.Add("Content-Type", "multipart/form-data; boundary=" + boundary);
                    var body = string.Join("\r\n",  bodyParts);
                    loginResult = await PostDataWithCookies(submitUrl.ToString(), pairs, configData.CookieHeader.Value, SiteLink, headers, body);
                } else {
                    loginResult = await RequestLoginAndFollowRedirect(submitUrl.ToString(), pairs, configData.CookieHeader.Value, true, null, SiteLink, true);
                }

                configData.CookieHeader.Value = loginResult.Cookies;

                checkForLoginError(loginResult);
            }
            else if (Login.Method == "cookie")
            {
                configData.CookieHeader.Value = ((StringItem)configData.GetDynamic("cookie")).Value;
            }
            else
            {
                throw new NotImplementedException("Login method " + Definition.Login.Method + " not implemented");
            }
            return true;
        }
Пример #6
0
        protected bool checkForLoginError(WebClientStringResult loginResult)
        {
            var ErrorBlocks = Definition.Login.Error;

            if (ErrorBlocks == null)
                return true; // no error

                var loginResultParser = new HtmlParser();
            var loginResultDocument = loginResultParser.Parse(loginResult.Content);
            foreach (errorBlock error in ErrorBlocks)
            {
                var selection = loginResultDocument.QuerySelector(error.Selector);
                if (selection != null)
                {
                    string errorMessage = selection.TextContent;
                    if (error.Message != null)
                    {
                        var errorSubMessage = loginResultDocument.QuerySelector(error.Message.Selector);
                        errorMessage = errorSubMessage.TextContent;
                    }
                    throw new ExceptionWithConfigData(string.Format("Login failed: {0}", errorMessage.Trim()), configData);
                }
            }
            return true; // no error
        }
Пример #7
0
        virtual public async Task <WebClientStringResult> GetString(WebRequest request)
        {
            logger.Debug(string.Format("IWebClient.GetString(Url:{0})", request.Url));
            PrepareRequest(request);
            DelayRequest(request);
            var result = await Run(request);

            result.Request = request;
            WebClientStringResult stringResult = Mapper.Map <WebClientStringResult>(result);
            Encoding encoding = null;

            if (request.Encoding != null)
            {
                encoding = request.Encoding;
            }
            else if (result.Headers.ContainsKey("content-type"))
            {
                Regex CharsetRegex      = new Regex(@"charset=([\w-]+)", RegexOptions.Compiled);
                var   CharsetRegexMatch = CharsetRegex.Match(result.Headers["content-type"][0]);
                if (CharsetRegexMatch.Success)
                {
                    var charset = CharsetRegexMatch.Groups[1].Value;
                    try
                    {
                        encoding = Encoding.GetEncoding(charset);
                    }
                    catch (Exception ex)
                    {
                        logger.Error(string.Format("IWebClient.GetString(Url:{0}): Error loading encoding {0} based on header {1}: {2}", request.Url, charset, result.Headers["content-type"][0], ex));
                    }
                }
                else
                {
                    logger.Error(string.Format("IWebClient.GetString(Url:{0}): Got header without charset: {0}", request.Url, result.Headers["content-type"][0]));
                }
            }

            if (encoding == null)
            {
                logger.Error(string.Format("IWebClient.GetString(Url:{0}): No encoding detected, defaulting to UTF-8", request.Url));
                encoding = Encoding.UTF8;
            }

            string decodedContent = null;

            if (result.Content != null)
            {
                decodedContent = encoding.GetString(result.Content);
            }

            stringResult.Content = decodedContent;
            logger.Debug(string.Format("IWebClient: Returning {0} => {1}", result.Status, (result.IsRedirect ? result.RedirectingTo + " " : "") + (decodedContent == null ? "<NULL>" : decodedContent)));

            string[] server;
            if (stringResult.Headers.TryGetValue("server", out server))
            {
                if (server[0] == "cloudflare-nginx")
                {
                    stringResult.Content = BrowserUtil.DecodeCloudFlareProtectedEmailFromHTML(stringResult.Content);
                }
            }
            return(stringResult);
        }
Пример #8
0
        public async Task<WebClientStringResult> GetString(WebRequest request)
        {
            logger.Debug(string.Format("WindowsWebClient:GetString(Url:{0})", request.Url));
            var cookies = new CookieContainer();

            if (!string.IsNullOrEmpty(request.Cookies))
            {
                var uri = new Uri(request.Url);
                foreach (var c in request.Cookies.Split(';'))
                {
                    try
                    {
                        cookies.SetCookies(uri, c);
                    }
                    catch (CookieException ex)
                    {
                        logger.Info("(Non-critical) Problem loading cookie {0}, {1}, {2}", uri, c, ex.Message);
                    }
                }
            }

            var client = new HttpClient(new HttpClientHandler
            {
                CookieContainer = cookies,
                AllowAutoRedirect = false, // Do not use this - Bugs ahoy! Lost cookies and more.
                UseCookies = true,
            });

            client.DefaultRequestHeaders.Add("User-Agent", BrowserUtil.ChromeUserAgent);
            HttpResponseMessage response = null;

            if (request.Type == RequestType.POST)
            {
                var content = new FormUrlEncodedContent(request.PostData);
                response = await client.PostAsync(request.Url, content);
            } else
            {
                response = await client.GetAsync(request.Url);
            }

            var result = new WebClientStringResult();
            result.Content = await response.Content.ReadAsStringAsync();

            // Compatiblity issue between the cookie format and httpclient
            // Pull it out manually ignoring the expiry date then set it manually
            // http://stackoverflow.com/questions/14681144/httpclient-not-storing-cookies-in-cookiecontainer
            IEnumerable<string> cookieHeaders;
            if (response.Headers.TryGetValues("set-cookie", out cookieHeaders))
            {
                var cookieBuilder = new StringBuilder();
                foreach (var c in cookieHeaders)
                {
                    if (cookieBuilder.Length > 0)
                    {
                        cookieBuilder.Append("; ");
                    }

                    cookieBuilder.Append( c.Substring(0, c.IndexOf(';')));
                }

                result.Cookies = cookieBuilder.ToString();
            }

            result.Status = response.StatusCode;
            if (null != response.Headers.Location)
            {
                result.RedirectingTo = response.Headers.Location.ToString();
            }
            return result;
        }