Example #1
0
            private async Task <SolveCaptchaResult> SubmitCaptchaAnswerAsync(string captcha, string amzn, int tries, CancellationToken cancellationToken)
            {
                var query = HttpUtility.ParseQueryString(string.Empty);

                query["amzn"]           = amzn;
                query["amzn-r"]         = "%2F";
                query["field-keywords"] = captcha;

                using (var response = await _httpClient.GetAsync(ValidateCaptcha + "?" + query, cancellationToken))
                {
                    var doc = await response.Content.ReadAsHtmlDocumentAsync();

                    // ensure not blocked
                    var title = new ExtractTitle().Execute(doc);

                    if (title != null && title.Contains("Robot Check"))
                    {
                        Log.Information("{Uri}: Solve Captcha Failed ({Tries} tries)", _uriForLogging, tries);

                        return(new SolveCaptchaResult
                        {
                            Success = false,
                            NextCaptchaPage = doc
                        });
                    }

                    Log.Information("{Uri}: Solve Captcha Success ({Tries} tries)", _uriForLogging, tries);

                    return(new SolveCaptchaResult
                    {
                        Success = true
                    });
                }
            }
Example #2
0
        private async Task <AmazonResponseResult> GetAsyncAsHtmlDoc(Uri uri, CancellationToken cancellationToken, bool setRedirectUri = false)
        {
            using (var response = await GetAsync(uri, cancellationToken))
            {
                if (response.StatusCode == HttpStatusCode.MovedPermanently || response.StatusCode == HttpStatusCode.Found)
                {
                    var redirectedUri = response.Headers.Location;
                    return(await GetAsyncAsHtmlDoc(redirectedUri, cancellationToken, true));
                }

                if (response.StatusCode == HttpStatusCode.OK)
                {
                    var result = new AmazonResponseResult
                    {
                        HtmlDocument = await response.Content.ReadAsHtmlDocumentAsync()
                    };

                    if (setRedirectUri)
                    {
                        result.RedirectUri = uri;
                    }

                    // ensure not blocked
                    var title = new ExtractTitle().Execute(result.HtmlDocument);

                    if (title != null && title.Contains("Robot Check"))
                    {
                        throw new EncounterCaptchaException("Amazon has blocked scraper.", uri, result.HtmlDocument);
                    }

                    return(result);
                }

                throw new NotSupportedException();
            }
        }