public async Task <FetchResult> FetchDom(Uri uri, bool isJsonContainer = false, bool returnNull = false) { var fetchResult = new FetchResult(); var domainRequestAssistant = _requestResetEvents.GetOrAdd(uri.Host, s => new DomainRequestAssistant()); var timeBeforeWait = DateTime.Now; Console.WriteLine("Waiting before call"); domainRequestAssistant.ResetEvent.WaitOne(2000); Console.WriteLine("Waited '{0}' before call", (DateTime.Now - timeBeforeWait).TotalMilliseconds); if (domainRequestAssistant.FrequentTotalRequestCounter > 0 && domainRequestAssistant.FrequentTotalRequestCounter % NumberOfSequenctialRequests == 0) { Console.WriteLine("Reseting the domain request assistant"); domainRequestAssistant.ResetEvent.Reset(); Console.WriteLine("Delaying request... ({0})", uri.OriginalString); await Task.Delay(1000); Console.WriteLine("Setting the domain request assistant"); domainRequestAssistant.ResetEvent.Set(); } _requestResetEvents.AddOrUpdate(uri.Host, s => new DomainRequestAssistant(), (s, assistant) => { if ((DateTime.UtcNow - domainRequestAssistant.UpdatedTime).TotalMilliseconds < MaxGoodGapBetweenCalls) { assistant.FrequentTotalRequestCounter++; } else { assistant.FrequentTotalRequestCounter = 0; } assistant.UpdatedTime = DateTime.UtcNow; assistant.TotalRequestCounter++; return(assistant); }); try { if (isJsonContainer) { fetchResult = await FetchJsonData(uri); } else { fetchResult = await FetchData(uri); } Console.WriteLine("Handling result from: {0}", uri.PathAndQuery); if (!string.IsNullOrWhiteSpace(fetchResult.RawPage)) { fetchResult.HtmlDocument = BuildDom(fetchResult.RawPage, returnNull); } else { fetchResult.HtmlDocument = null; } } catch (Exception ex) { Console.WriteLine(ex); } if (!returnNull && fetchResult.HtmlDocument == null) { fetchResult.HtmlDocument = new HtmlDocument(); } return(fetchResult); }
private async Task <FetchResult> FetchData(Uri uri) { var originalUri = uri; var fetchResult = new FetchResult(); var tryCount = 3; var isSuccess = false; var failed = false; var rndTime = new Random((int)DateTime.Now.Ticks & 0x0000FFFF); while (tryCount > 0 && !isSuccess) { tryCount--; try { if (failed) { failed = false; fetchResult.UsedProxy = true; uri = new Uri(string.Format(ProxyPath, System.Web.HttpUtility.UrlEncode(originalUri.OriginalString))); var domainRequestAssistant = _requestResetEvents.GetOrAdd(originalUri.Host, s => new DomainRequestAssistant()); Console.WriteLine("Reseting the domain request assistant"); domainRequestAssistant.ResetEvent.Reset(); Console.WriteLine("Delaying request (after error)... ({0})", originalUri); await Task.Delay(rndTime.Next(MinGoodGapBetweenCalls, MaxGoodGapBetweenCalls)); Console.WriteLine("Setting the domain request assistant"); domainRequestAssistant.ResetEvent.Set(); } var httpClient = _bypassCache ? new HttpClient(new BypassCacheHttpRequestHandler(_noCacheUri), true) : new HttpClient(); httpClient.Timeout = this.Timeout; Console.WriteLine("Requesting: {0}", uri.PathAndQuery); fetchResult.RawPage = await httpClient.GetStringAsync(uri); isSuccess = true; } catch (ArgumentNullException aex) { Console.WriteLine(aex); } catch (WebException wex) { var res = (HttpWebResponse)wex.Response; Console.WriteLine(wex); int statusCode; int.TryParse(res.StatusCode.ToString(), out statusCode); if (res != null && statusCode == 429 || statusCode == 503 || statusCode == 500) { failed = true; } } catch (HttpRequestException exception) { Console.WriteLine(exception); if (exception.Message.Contains("429") || exception.Message.Contains("503") || exception.Message.Contains("500")) { failed = true; } } catch (TaskCanceledException exception) { Console.WriteLine(exception); failed = true; } catch (Exception ex) { Console.WriteLine(ex); } } if (tryCount == 0 && !isSuccess) { var msg = string.Format("Failed all attempts to get url: {0}", originalUri.OriginalString); Console.WriteLine(msg); } return(fetchResult); }