Beispiel #1
0
        public async Task <FetchResult> FetchDom(Uri uri, bool isJsonContainer = false, bool returnNull = false)
        {
            var fetchResult = new FetchResult();

            var domainRequestAssistant = _requestResetEvents.GetOrAdd(uri.Host, s => new DomainRequestAssistant());

            var timeBeforeWait = DateTime.Now;

            Console.WriteLine("Waiting before call");
            domainRequestAssistant.ResetEvent.WaitOne(2000);
            Console.WriteLine("Waited '{0}' before call", (DateTime.Now - timeBeforeWait).TotalMilliseconds);

            if (domainRequestAssistant.FrequentTotalRequestCounter > 0 &&
                domainRequestAssistant.FrequentTotalRequestCounter % NumberOfSequenctialRequests == 0)
            {
                Console.WriteLine("Reseting the domain request assistant");
                domainRequestAssistant.ResetEvent.Reset();

                Console.WriteLine("Delaying request... ({0})", uri.OriginalString);
                await Task.Delay(1000);

                Console.WriteLine("Setting the domain request assistant");
                domainRequestAssistant.ResetEvent.Set();
            }

            _requestResetEvents.AddOrUpdate(uri.Host, s => new DomainRequestAssistant(), (s, assistant) =>
            {
                if ((DateTime.UtcNow - domainRequestAssistant.UpdatedTime).TotalMilliseconds <
                    MaxGoodGapBetweenCalls)
                {
                    assistant.FrequentTotalRequestCounter++;
                }
                else
                {
                    assistant.FrequentTotalRequestCounter = 0;
                }

                assistant.UpdatedTime = DateTime.UtcNow;
                assistant.TotalRequestCounter++;

                return(assistant);
            });

            try
            {
                if (isJsonContainer)
                {
                    fetchResult = await FetchJsonData(uri);
                }
                else
                {
                    fetchResult = await FetchData(uri);
                }

                Console.WriteLine("Handling result from: {0}", uri.PathAndQuery);
                if (!string.IsNullOrWhiteSpace(fetchResult.RawPage))
                {
                    fetchResult.HtmlDocument = BuildDom(fetchResult.RawPage, returnNull);
                }
                else
                {
                    fetchResult.HtmlDocument = null;
                }
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex);
            }

            if (!returnNull && fetchResult.HtmlDocument == null)
            {
                fetchResult.HtmlDocument = new HtmlDocument();
            }

            return(fetchResult);
        }
Beispiel #2
0
        private async Task <FetchResult> FetchData(Uri uri)
        {
            var originalUri = uri;
            var fetchResult = new FetchResult();
            var tryCount    = 3;
            var isSuccess   = false;
            var failed      = false;
            var rndTime     = new Random((int)DateTime.Now.Ticks & 0x0000FFFF);

            while (tryCount > 0 && !isSuccess)
            {
                tryCount--;
                try
                {
                    if (failed)
                    {
                        failed = false;
                        fetchResult.UsedProxy = true;
                        uri = new Uri(string.Format(ProxyPath, System.Web.HttpUtility.UrlEncode(originalUri.OriginalString)));

                        var domainRequestAssistant = _requestResetEvents.GetOrAdd(originalUri.Host,
                                                                                  s => new DomainRequestAssistant());

                        Console.WriteLine("Reseting the domain request assistant");
                        domainRequestAssistant.ResetEvent.Reset();

                        Console.WriteLine("Delaying request (after error)... ({0})", originalUri);
                        await Task.Delay(rndTime.Next(MinGoodGapBetweenCalls, MaxGoodGapBetweenCalls));

                        Console.WriteLine("Setting the domain request assistant");
                        domainRequestAssistant.ResetEvent.Set();
                    }

                    var httpClient = _bypassCache ? new HttpClient(new BypassCacheHttpRequestHandler(_noCacheUri), true) : new HttpClient();

                    httpClient.Timeout = this.Timeout;

                    Console.WriteLine("Requesting: {0}", uri.PathAndQuery);
                    fetchResult.RawPage = await httpClient.GetStringAsync(uri);

                    isSuccess = true;
                }
                catch (ArgumentNullException aex)
                {
                    Console.WriteLine(aex);
                }
                catch (WebException wex)
                {
                    var res = (HttpWebResponse)wex.Response;

                    Console.WriteLine(wex);

                    int statusCode;
                    int.TryParse(res.StatusCode.ToString(), out statusCode);

                    if (res != null && statusCode == 429 || statusCode == 503 || statusCode == 500)
                    {
                        failed = true;
                    }
                }
                catch (HttpRequestException exception)
                {
                    Console.WriteLine(exception);

                    if (exception.Message.Contains("429") || exception.Message.Contains("503") || exception.Message.Contains("500"))
                    {
                        failed = true;
                    }
                }
                catch (TaskCanceledException exception)
                {
                    Console.WriteLine(exception);

                    failed = true;
                }
                catch (Exception ex)
                {
                    Console.WriteLine(ex);
                }
            }

            if (tryCount == 0 && !isSuccess)
            {
                var msg = string.Format("Failed all attempts to get url: {0}", originalUri.OriginalString);
                Console.WriteLine(msg);
            }

            return(fetchResult);
        }