private async Task <SolveCaptchaResult> SubmitCaptchaAnswerAsync(string captcha, string amzn, int tries, CancellationToken cancellationToken) { var query = HttpUtility.ParseQueryString(string.Empty); query["amzn"] = amzn; query["amzn-r"] = "%2F"; query["field-keywords"] = captcha; using (var response = await _httpClient.GetAsync(ValidateCaptcha + "?" + query, cancellationToken)) { var doc = await response.Content.ReadAsHtmlDocumentAsync(); // ensure not blocked var title = new ExtractTitle().Execute(doc); if (title != null && title.Contains("Robot Check")) { Log.Information("{Uri}: Solve Captcha Failed ({Tries} tries)", _uriForLogging, tries); return(new SolveCaptchaResult { Success = false, NextCaptchaPage = doc }); } Log.Information("{Uri}: Solve Captcha Success ({Tries} tries)", _uriForLogging, tries); return(new SolveCaptchaResult { Success = true }); } }
private async Task <AmazonResponseResult> GetAsyncAsHtmlDoc(Uri uri, CancellationToken cancellationToken, bool setRedirectUri = false) { using (var response = await GetAsync(uri, cancellationToken)) { if (response.StatusCode == HttpStatusCode.MovedPermanently || response.StatusCode == HttpStatusCode.Found) { var redirectedUri = response.Headers.Location; return(await GetAsyncAsHtmlDoc(redirectedUri, cancellationToken, true)); } if (response.StatusCode == HttpStatusCode.OK) { var result = new AmazonResponseResult { HtmlDocument = await response.Content.ReadAsHtmlDocumentAsync() }; if (setRedirectUri) { result.RedirectUri = uri; } // ensure not blocked var title = new ExtractTitle().Execute(result.HtmlDocument); if (title != null && title.Contains("Robot Check")) { throw new EncounterCaptchaException("Amazon has blocked scraper.", uri, result.HtmlDocument); } return(result); } throw new NotSupportedException(); } }