示例#1
0
        private async Task <AmazonResponseResult> GetAsyncAsHtmlDocWithEnsureAllowed(Uri uri, CancellationToken cancellationToken)
        {
            await Semaphore.WaitAsync(TimeSpan.FromHours(SemaphoreMaxWaitTimeInHrs), cancellationToken);

            try
            {
                var success = true;
                AmazonResponseResult result = null;

                var targetUri = uri;

                do
                {
                    try
                    {
                        result = await GetAsyncAsHtmlDoc(targetUri, cancellationToken);

                        success = true;
                    }
                    catch (EncounterCaptchaException e)
                    {
                        success = false;

                        lock (Locker)
                        {
                            var timeDiff = DateTime.Now - LastCaptchaSolvedOn;

                            // last captcha solved more than 10 seconds ago
                            if (timeDiff.TotalSeconds > 10)
                            {
                                Log.Information("{Uri}: Encounter Captcha", e.Uri);

                                // ensures it will be solved
                                new SolveCaptcha(this, e.Uri).ExecuteAsync(e.HtmlDocument, cancellationToken).Wait(cancellationToken);

                                targetUri = e.Uri;
                            }
                            else
                            {
                                Log.Information("{Uri}: Not required to solve Captcha", e.Uri);
                            }
                        }
                    }
                } while (!success);

                return(result);
            }
            catch (Exception e)
            {
                Log.Error(e.StackTrace);
                throw;
            }
            finally
            {
                Semaphore.Release();
            }
        }
示例#2
0
        private async Task <AmazonResponseResult> GetAsyncAsHtmlDoc(Uri uri, CancellationToken cancellationToken, bool setRedirectUri = false)
        {
            using (var response = await GetAsync(uri, cancellationToken))
            {
                if (response.StatusCode == HttpStatusCode.MovedPermanently || response.StatusCode == HttpStatusCode.Found)
                {
                    var redirectedUri = response.Headers.Location;
                    return(await GetAsyncAsHtmlDoc(redirectedUri, cancellationToken, true));
                }

                if (response.StatusCode == HttpStatusCode.OK)
                {
                    var result = new AmazonResponseResult
                    {
                        HtmlDocument = await response.Content.ReadAsHtmlDocumentAsync()
                    };

                    if (setRedirectUri)
                    {
                        result.RedirectUri = uri;
                    }

                    // ensure not blocked
                    var title = new ExtractTitle().Execute(result.HtmlDocument);

                    if (title != null && title.Contains("Robot Check"))
                    {
                        throw new EncounterCaptchaException("Amazon has blocked scraper.", uri, result.HtmlDocument);
                    }

                    return(result);
                }

                throw new NotSupportedException();
            }
        }