public async Task<WebClientStringResult> GetString(WebRequest request) { logger.Debug(string.Format("UnixLibCurlWebClient:GetString(Url:{0})", request.Url)); var result = await GetBytes(request); var sresult = new WebClientStringResult() { Content = Encoding.UTF8.GetString(result.Content), Cookies = result.Cookies, Status = result.Status }; return sresult; }
protected async Task FollowIfRedirect(WebClientStringResult response, string referrer = null, string overrideRedirectUrl = null, string overrideCookies = null) { var byteResult = new WebClientByteResult(); // Map to byte Mapper.Map(response, byteResult); await FollowIfRedirect(byteResult, referrer, overrideRedirectUrl, overrideCookies); // Map to string Mapper.Map(byteResult, response); }
public override async Task<ConfigurationData> GetConfigurationForSetup() { var Login = Definition.Login; if (Login == null || Login.Method != "form") return configData; var LoginUrl = SiteLink + Login.Path; configData.CookieHeader.Value = null; landingResult = await RequestStringWithCookies(LoginUrl, null, SiteLink); var htmlParser = new HtmlParser(); landingResultDocument = htmlParser.Parse(landingResult.Content); var grecaptcha = landingResultDocument.QuerySelector(".g-recaptcha"); if (grecaptcha != null) { var CaptchaItem = new RecaptchaItem(); CaptchaItem.Name = "Captcha"; CaptchaItem.Version = "2"; CaptchaItem.SiteKey = grecaptcha.GetAttribute("data-sitekey"); if (CaptchaItem.SiteKey == null) // some sites don't store the sitekey in the .g-recaptcha div (e.g. cloudflare captcha challenge page) CaptchaItem.SiteKey = landingResultDocument.QuerySelector("[data-sitekey]").GetAttribute("data-sitekey"); configData.AddDynamic("Captcha", CaptchaItem); } return configData; }
protected bool CheckIfLoginIsNeeded(WebClientStringResult Result, IHtmlDocument document) { if (Result.IsRedirect) { return true; } if (Definition.Login == null || Definition.Login.Test == null) return false; if (Definition.Login.Test.Selector != null) { var selection = document.QuerySelectorAll(Definition.Login.Test.Selector); if (selection.Length == 0) { return true; } } return false; }
protected async Task<bool> DoLogin() { var Login = Definition.Login; if (Login == null) return false; if (Login.Method == "post") { var pairs = new Dictionary<string, string>(); foreach (var Input in Definition.Login.Inputs) { var value = applyGoTemplateText(Input.Value); pairs.Add(Input.Key, value); } var LoginUrl = SiteLink + Login.Path; configData.CookieHeader.Value = null; var loginResult = await RequestLoginAndFollowRedirect(LoginUrl, pairs, null, true, null, SiteLink, true); configData.CookieHeader.Value = loginResult.Cookies; checkForLoginError(loginResult); } else if (Login.Method == "form") { var LoginUrl = SiteLink + Login.Path; var pairs = new Dictionary<string, string>(); var CaptchaConfigItem = (RecaptchaItem)configData.GetDynamic("Captcha"); if (CaptchaConfigItem != null) { if (!string.IsNullOrWhiteSpace(CaptchaConfigItem.Cookie)) { // for remote users just set the cookie and return CookieHeader = CaptchaConfigItem.Cookie; return true; } var CloudFlareCaptchaChallenge = landingResultDocument.QuerySelector("script[src=\"/cdn-cgi/scripts/cf.challenge.js\"]"); if (CloudFlareCaptchaChallenge != null) { var CloudFlareQueryCollection = new NameValueCollection(); CloudFlareQueryCollection["id"] = CloudFlareCaptchaChallenge.GetAttribute("data-ray"); CloudFlareQueryCollection["g-recaptcha-response"] = CaptchaConfigItem.Value; var ClearanceUrl = resolvePath("/cdn-cgi/l/chk_captcha?" + CloudFlareQueryCollection.GetQueryString()); var ClearanceResult = await RequestStringWithCookies(ClearanceUrl.ToString(), null, SiteLink); if (ClearanceResult.IsRedirect) // clearance successfull { // request real login page again landingResult = await RequestStringWithCookies(LoginUrl, null, SiteLink); var htmlParser = new HtmlParser(); landingResultDocument = htmlParser.Parse(landingResult.Content); } else { throw new ExceptionWithConfigData(string.Format("Login failed: Cloudflare clearance failed using cookies {0}: {1}", CookieHeader, ClearanceResult.Content), configData); } } else { pairs.Add("g-recaptcha-response", CaptchaConfigItem.Value); } } var FormSelector = Login.Form; if (FormSelector == null) FormSelector = "form"; // landingResultDocument might not be initiated if the login is caused by a relogin during a query if (landingResultDocument == null) { await GetConfigurationForSetup(); } var form = landingResultDocument.QuerySelector(FormSelector); if (form == null) { throw new ExceptionWithConfigData(string.Format("Login failed: No form found on {0} using form selector {1}", LoginUrl, FormSelector), configData); } var inputs = form.QuerySelectorAll("input"); if (inputs == null) { throw new ExceptionWithConfigData(string.Format("Login failed: No inputs found on {0} using form selector {1}", LoginUrl, FormSelector), configData); } var submitUrl = resolvePath(form.GetAttribute("action")); foreach (var input in inputs) { var name = input.GetAttribute("name"); if (name == null) continue; var value = input.GetAttribute("value"); if (value == null) value = ""; pairs[name] = value; } foreach (var Input in Definition.Login.Inputs) { var value = applyGoTemplateText(Input.Value); pairs[Input.Key] = value; } // automatically solve simpleCaptchas, if used var simpleCaptchaPresent = landingResultDocument.QuerySelector("script[src*=\"simpleCaptcha\"]"); if(simpleCaptchaPresent != null) { var captchaUrl = resolvePath("simpleCaptcha.php?numImages=1"); var simpleCaptchaResult = await RequestStringWithCookies(captchaUrl.ToString(), null, LoginUrl); var simpleCaptchaJSON = JObject.Parse(simpleCaptchaResult.Content); var captchaSelection = simpleCaptchaJSON["images"][0]["hash"].ToString(); pairs["captchaSelection"] = captchaSelection; pairs["submitme"] = "X"; } // clear landingResults/Document, otherwise we might use an old version for a new relogin (if GetConfigurationForSetup() wasn't called before) landingResult = null; landingResultDocument = null; WebClientStringResult loginResult = null; var enctype = form.GetAttribute("enctype"); if (enctype == "multipart/form-data") { var headers = new Dictionary<string, string>(); var boundary = "---------------------------" + (DateTime.UtcNow.Subtract(new DateTime(1970, 1, 1))).TotalSeconds.ToString().Replace(".", ""); var bodyParts = new List<string>(); foreach (var pair in pairs) { var part = "--" + boundary + "\r\n" + "Content-Disposition: form-data; name=\"" + pair.Key + "\"\r\n" + "\r\n" + pair.Value; bodyParts.Add(part); } bodyParts.Add("--" + boundary + "--"); headers.Add("Content-Type", "multipart/form-data; boundary=" + boundary); var body = string.Join("\r\n", bodyParts); loginResult = await PostDataWithCookies(submitUrl.ToString(), pairs, configData.CookieHeader.Value, SiteLink, headers, body); } else { loginResult = await RequestLoginAndFollowRedirect(submitUrl.ToString(), pairs, configData.CookieHeader.Value, true, null, SiteLink, true); } configData.CookieHeader.Value = loginResult.Cookies; checkForLoginError(loginResult); } else if (Login.Method == "cookie") { configData.CookieHeader.Value = ((StringItem)configData.GetDynamic("cookie")).Value; } else { throw new NotImplementedException("Login method " + Definition.Login.Method + " not implemented"); } return true; }
protected bool checkForLoginError(WebClientStringResult loginResult) { var ErrorBlocks = Definition.Login.Error; if (ErrorBlocks == null) return true; // no error var loginResultParser = new HtmlParser(); var loginResultDocument = loginResultParser.Parse(loginResult.Content); foreach (errorBlock error in ErrorBlocks) { var selection = loginResultDocument.QuerySelector(error.Selector); if (selection != null) { string errorMessage = selection.TextContent; if (error.Message != null) { var errorSubMessage = loginResultDocument.QuerySelector(error.Message.Selector); errorMessage = errorSubMessage.TextContent; } throw new ExceptionWithConfigData(string.Format("Login failed: {0}", errorMessage.Trim()), configData); } } return true; // no error }
virtual public async Task <WebClientStringResult> GetString(WebRequest request) { logger.Debug(string.Format("IWebClient.GetString(Url:{0})", request.Url)); PrepareRequest(request); DelayRequest(request); var result = await Run(request); result.Request = request; WebClientStringResult stringResult = Mapper.Map <WebClientStringResult>(result); Encoding encoding = null; if (request.Encoding != null) { encoding = request.Encoding; } else if (result.Headers.ContainsKey("content-type")) { Regex CharsetRegex = new Regex(@"charset=([\w-]+)", RegexOptions.Compiled); var CharsetRegexMatch = CharsetRegex.Match(result.Headers["content-type"][0]); if (CharsetRegexMatch.Success) { var charset = CharsetRegexMatch.Groups[1].Value; try { encoding = Encoding.GetEncoding(charset); } catch (Exception ex) { logger.Error(string.Format("IWebClient.GetString(Url:{0}): Error loading encoding {0} based on header {1}: {2}", request.Url, charset, result.Headers["content-type"][0], ex)); } } else { logger.Error(string.Format("IWebClient.GetString(Url:{0}): Got header without charset: {0}", request.Url, result.Headers["content-type"][0])); } } if (encoding == null) { logger.Error(string.Format("IWebClient.GetString(Url:{0}): No encoding detected, defaulting to UTF-8", request.Url)); encoding = Encoding.UTF8; } string decodedContent = null; if (result.Content != null) { decodedContent = encoding.GetString(result.Content); } stringResult.Content = decodedContent; logger.Debug(string.Format("IWebClient: Returning {0} => {1}", result.Status, (result.IsRedirect ? result.RedirectingTo + " " : "") + (decodedContent == null ? "<NULL>" : decodedContent))); string[] server; if (stringResult.Headers.TryGetValue("server", out server)) { if (server[0] == "cloudflare-nginx") { stringResult.Content = BrowserUtil.DecodeCloudFlareProtectedEmailFromHTML(stringResult.Content); } } return(stringResult); }
public async Task<WebClientStringResult> GetString(WebRequest request) { logger.Debug(string.Format("WindowsWebClient:GetString(Url:{0})", request.Url)); var cookies = new CookieContainer(); if (!string.IsNullOrEmpty(request.Cookies)) { var uri = new Uri(request.Url); foreach (var c in request.Cookies.Split(';')) { try { cookies.SetCookies(uri, c); } catch (CookieException ex) { logger.Info("(Non-critical) Problem loading cookie {0}, {1}, {2}", uri, c, ex.Message); } } } var client = new HttpClient(new HttpClientHandler { CookieContainer = cookies, AllowAutoRedirect = false, // Do not use this - Bugs ahoy! Lost cookies and more. UseCookies = true, }); client.DefaultRequestHeaders.Add("User-Agent", BrowserUtil.ChromeUserAgent); HttpResponseMessage response = null; if (request.Type == RequestType.POST) { var content = new FormUrlEncodedContent(request.PostData); response = await client.PostAsync(request.Url, content); } else { response = await client.GetAsync(request.Url); } var result = new WebClientStringResult(); result.Content = await response.Content.ReadAsStringAsync(); // Compatiblity issue between the cookie format and httpclient // Pull it out manually ignoring the expiry date then set it manually // http://stackoverflow.com/questions/14681144/httpclient-not-storing-cookies-in-cookiecontainer IEnumerable<string> cookieHeaders; if (response.Headers.TryGetValues("set-cookie", out cookieHeaders)) { var cookieBuilder = new StringBuilder(); foreach (var c in cookieHeaders) { if (cookieBuilder.Length > 0) { cookieBuilder.Append("; "); } cookieBuilder.Append( c.Substring(0, c.IndexOf(';'))); } result.Cookies = cookieBuilder.ToString(); } result.Status = response.StatusCode; if (null != response.Headers.Location) { result.RedirectingTo = response.Headers.Location.ToString(); } return result; }