private static HttpMessageHandler BuildMessageHandler(string endpoint, IProxyProvider proxyProvider) { if (string.IsNullOrWhiteSpace(endpoint)) { throw new ArgumentOutOfRangeException(nameof(endpoint), endpoint, LocalizedStrings.MustBeValidEndpoint); } // Create the URI directly from a string (rather than using a hard-coded scheme or port) because // even though production use of ApiPort should always use HTTPS, developers using a non-production // portability service URL (via the -e command line parameter) may need to specify a different // scheme or port. var uri = new Uri(endpoint); var clientHandler = new HttpClientHandler { #if !FEATURE_SERVICE_POINT_MANAGER SslProtocols = CompressedHttpClient.SupportedSSLProtocols, #endif Proxy = proxyProvider?.GetProxy(uri), AutomaticDecompression = (DecompressionMethods.GZip | DecompressionMethods.Deflate) }; if (clientHandler.Proxy == null) { return(clientHandler); } return(new ProxyAuthenticationHandler(clientHandler, proxyProvider)); }
public ApiPortService(string endpoint, ProductInformation info, IProxyProvider proxyProvider) { if (string.IsNullOrWhiteSpace(endpoint)) { throw new ArgumentOutOfRangeException(nameof(endpoint), endpoint, "Must be a valid endpoint"); } if (proxyProvider == null) { throw new ArgumentNullException(nameof(proxyProvider)); } var uri = new Uri(endpoint); var proxy = proxyProvider.GetProxy(uri); // replace the handler with the proxy aware handler var clientHandler = new HttpClientHandler { Proxy = proxy, AutomaticDecompression = (DecompressionMethods.GZip | DecompressionMethods.Deflate) }; // HTTP handler pipeline can be injected here, around the client handler HttpMessageHandler messageHandler = clientHandler; if (proxy != null) { messageHandler = new ProxyAuthenticationHandler(clientHandler, proxyProvider); } _client = new CompressedHttpClient(info, messageHandler) { BaseAddress = new Uri(endpoint), Timeout = Timeout }; }
public static object GetProxy(this IProxyProvider proxyProvider, Type classType) { if (classType == null) { throw new ArgumentNullException(nameof(classType)); } return(proxyProvider.GetProxy(classType, (object[])null)); }
private string Request(string url, int retryCount = 5) { if (retryCount <= 0) { IsSpoiled = true; return(string.Empty); } _holder.WaitOne(RequestsDelayMs); string html = string.Empty; var proxy = _proxy.GetProxy(); try { HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url); request.Proxy = proxy; request.AutomaticDecompression = DecompressionMethods.GZip; request.UserAgent = "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.85 Safari/537.36"; using (HttpWebResponse response = (HttpWebResponse)request.GetResponse()) using (Stream stream = response.GetResponseStream()) using (StreamReader reader = new StreamReader(stream)) { html = reader.ReadToEnd(); } if (html.Contains("\"error_msg\":\"Too many requests per second\"")) { RequestsDelayMs = Convert.ToInt32(RequestsDelayMs * 1.2); _log.Verbose($"Delay set {RequestsDelayMs}. Retry {5 - retryCount}/5."); Thread.Sleep(RequestsDelayMs); return(Request(url, retryCount - 1)); } else { RequestsDelayMs = Math.Max(500, Convert.ToInt32(RequestsDelayMs * 0.8)); } } catch (Exception ex) { _log.Error($"Error, retry {5 - retryCount}"); Thread.Sleep(1000 * 60 * 5); Request(url, retryCount - 1); } finally { _proxy.PullBack(proxy, 0); } if (html.Contains("User authorization failed: invalid session.")) { IsSpoiled = true; } return(html); }
public ApiPortService(string endpoint, ProductInformation info, IProxyProvider proxyProvider) { if (string.IsNullOrWhiteSpace(endpoint)) { throw new ArgumentOutOfRangeException(nameof(endpoint), endpoint, LocalizedStrings.MustBeValidEndpoint); } if (proxyProvider == null) { throw new ArgumentNullException(nameof(proxyProvider)); } // Create the URI directly from a string (rather than using a hard-coded scheme or port) because // even though production use of ApiPort should always use HTTPS, developers using a non-production // portability service URL (via the -e command line parameter) may need to specify a different // scheme or port. var uri = new Uri(endpoint); var proxy = proxyProvider.GetProxy(uri); // replace the handler with the proxy aware handler var clientHandler = new HttpClientHandler { #if !FEATURE_SERVICE_POINT_MANAGER SslProtocols = CompressedHttpClient.SupportedSSLProtocols, #endif Proxy = proxy, AutomaticDecompression = (DecompressionMethods.GZip | DecompressionMethods.Deflate) }; // HTTP handler pipeline can be injected here, around the client handler HttpMessageHandler messageHandler = clientHandler; if (proxy != null) { messageHandler = new ProxyAuthenticationHandler(clientHandler, proxyProvider); } _client = new CompressedHttpClient(info, messageHandler) { BaseAddress = new Uri(endpoint), Timeout = Timeout }; }
public override Page Download(Request request, ITask task) { if (task == null || task.GetSite() == null) { throw new ArgumentNullException("task", "task or site can not be null"); } HttpResults httpResponse = null; var httpClient = GetHttpClient(task.GetSite()); var proxy = _proxyProvider?.GetProxy(task); var requestContext = _httpUriRequestConverter.Convert(request, task.GetSite(), proxy); Page page = Page.Fail(); try { httpResponse = httpClient.GetHtml(); _contentType = httpClient.Items.ContentType; _charset = httpClient.Items.EncodingStr; page = HandleResponse(request, request.GetCharset() ?? task.GetSite().Charset, httpResponse, task); OnSuccess(request); Debug.WriteLine($"downloading page success {request.GetUrl()}"); return(page); } catch (Exception e) { Debug.WriteLine($"download page {request.GetUrl()} error : {e.Message}"); OnError(request); return(page); } finally { if (httpResponse != null) { //ensure the connection is released back to pool } if (_proxyProvider != null && proxy != null) { _proxyProvider.ReturnProxy(proxy, page, task); } } }
public static TInterface GetProxy <TInterface, TClass>(this IProxyProvider proxyProvider, params object[] parameters) where TInterface : class where TClass : TInterface { return(proxyProvider.GetProxy(typeof(TInterface), typeof(TClass), parameters) as TInterface); }
public static TClass GetProxy <TClass>(this IProxyProvider proxyProvider, params object[] parameters) where TClass : class { return(proxyProvider.GetProxy(typeof(TClass), parameters) as TClass); }
public static TClass GetProxy <TClass>(this IProxyProvider proxyProvider) where TClass : class { return(proxyProvider.GetProxy(typeof(TClass), (object[])null) as TClass); }
private void HandleDomainUrl(ParserInput message) { IndexingData indexingData = null; using (var webClient = new WebClient()) { if (message.UseProxy) { webClient.Proxy = _proxyProvider.GetProxy(); } var url = _proxyProvider.GetRequestUrl(string.Format(GoogleSearchUrl, message.Url)); webClient.Headers.Add("Accept-Language", "en-US"); var pageData = webClient.DownloadString(url); var page = new HtmlDocument(); page.LoadHtml(pageData); HtmlNode resultStatsNode = page.GetElementbyId("resultStats"); if (resultStatsNode == null || resultStatsNode.InnerHtml == string.Empty) { indexingData = new IndexingData { PagesNumber = 0, ProcessingDate = DateTime.Now.Date }; var domainStat = new DomainStat { DomainURL = message.Url, IndexingData = indexingData }; Sender.Tell(domainStat, Self); return; } var resultStats = HtmlEntity.DeEntitize(resultStatsNode.InnerHtml); var firstNumberIndex = resultStats.IndexOfAny("123456789".ToCharArray()); var lastNumberIndex = resultStats.LastIndexOfAny("0123456789".ToCharArray()); if (firstNumberIndex >= 0 && lastNumberIndex >= 0 && lastNumberIndex >= firstNumberIndex) { var match = resultStats.Substring(firstNumberIndex, lastNumberIndex - firstNumberIndex + 1); var numberString = match.Replace(",", string.Empty).Replace(".", string.Empty); numberString = Regex.Replace(numberString, @"\s+", ""); var indexedPagesNumber = long.Parse(numberString); indexingData = new IndexingData { PagesNumber = indexedPagesNumber, ProcessingDate = DateTime.Now.Date }; var domainStat = new DomainStat { DomainURL = message.Url, IndexingData = indexingData }; Sender.Tell(domainStat, Self); } else { var errorMessage = "Match was not successful! Result stats: " + resultStats; var parsingError = new ParsingError { DomainURL = message.Url, ErrorMessage = errorMessage }; Sender.Tell(parsingError, Self); } } }
private string Request(string url, int retryCount = 3) { if (retryCount <= 0) { IsSpoiled = true; return(string.Empty); } var elapsed = (DateTime.Now - _lastCall).TotalMilliseconds; if (elapsed < RequestsDelayMs) { Thread.Sleep(Math.Min(RequestsDelayMs, Convert.ToInt32(RequestsDelayMs - elapsed))); } var html = string.Empty; var proxy = _proxyProvider.GetProxy(); var sw = Stopwatch.StartNew(); try { var request = (HttpWebRequest)WebRequest.Create(url); request.ServicePoint.Expect100Continue = false; request.Proxy = proxy; request.AutomaticDecompression = DecompressionMethods.GZip; request.UserAgent = config.Default.UserAgents[_rnd.Next(config.Default.UserAgents.Count)]; using (HttpWebResponse response = (HttpWebResponse)request.GetResponse()) using (Stream stream = response.GetResponseStream()) using (StreamReader reader = new StreamReader(stream)) { _lastCall = DateTime.Now; html = reader.ReadToEnd(); } if (html.Contains("\"error_msg\":\"Too many requests per second\"")) { _log.Warning($"[{Id}] Delay set {RequestsDelayMs}. Retry {retryCount}."); Thread.Sleep(10000); return(Request(url, retryCount - 1)); } } catch (WebException wex) { Thread.Sleep((4 - retryCount) * 60 * 1000); return(Request(url, retryCount - 1)); } catch (Exception ex) { _log.Error(ex, "Failed Vkkey: {FailedVkKey} Proxy {FailedProxy}", _vkAccessToken, (proxy as WebProxy).Address); //Thread.Sleep((15 - retryCount) * 200 * RequestsDelayMs); Thread.Sleep((4 - retryCount) * 60 * 1000); return(Request(url, retryCount - 1)); } finally { _proxyProvider.PullBack(proxy, sw.ElapsedMilliseconds); } if (html.Contains("User authorization failed: invalid session.")) { IsSpoiled = true; } //_log.Information("Request {RequestURL} takes {RequestTime}", url, sw.ElapsedMilliseconds); return(html); }
public bool Authorization() { if (!AccountData.Cookies.Any()) { return(false); } var storage = new CookieStorage(); foreach (var cookie in AccountData.Cookies) { try { if (cookie.name == "c_user") { actor_id = cookie.value; } storage.Add(new Cookie(cookie.name, cookie.value, cookie.path, cookie.domain)); } catch { continue; } } _request.Cookies = storage; if (EnableProxies) { _request.Proxy = _proxyProvider.GetProxy(); } var reconnectCount = 3; do { try { _request["Accept-Language"] = "en-US,en;q=0.9,ru-RU;q=0.8,ru;q=0.7"; _request["Origin"] = "https://www.facebook.com"; _request["Referer"] = "https://www.facebook.com/"; _request["content-type"] = "application/x-www-form-urlencoded"; var response = _request.Get("https://www.facebook.com/"); fb_dtsg = Regex.Match(response.ToString(), "\"token\":\"(.*?)\"").Groups[1].Value; if (!string.IsNullOrWhiteSpace(fb_dtsg)) { IsAuthorized = true; } else { IsAuthorized = false; } break; } catch (HttpException) { if (EnableProxies) { _request.Proxy = _proxyProvider.GetProxy();//Reconnecting to another proxy reconnectCount--; } else { break; } } catch { break; } } while (reconnectCount != 0); return(IsAuthorized); }