Example #1
0
        private static HttpMessageHandler BuildMessageHandler(string endpoint, IProxyProvider proxyProvider)
        {
            if (string.IsNullOrWhiteSpace(endpoint))
            {
                throw new ArgumentOutOfRangeException(nameof(endpoint), endpoint, LocalizedStrings.MustBeValidEndpoint);
            }

            // Create the URI directly from a string (rather than using a hard-coded scheme or port) because
            // even though production use of ApiPort should always use HTTPS, developers using a non-production
            // portability service URL (via the -e command line parameter) may need to specify a different
            // scheme or port.
            var uri = new Uri(endpoint);

            var clientHandler = new HttpClientHandler
            {
#if !FEATURE_SERVICE_POINT_MANAGER
                SslProtocols = CompressedHttpClient.SupportedSSLProtocols,
#endif
                Proxy = proxyProvider?.GetProxy(uri),
                AutomaticDecompression = (DecompressionMethods.GZip | DecompressionMethods.Deflate)
            };

            if (clientHandler.Proxy == null)
            {
                return(clientHandler);
            }

            return(new ProxyAuthenticationHandler(clientHandler, proxyProvider));
        }
Example #2
0
        public ApiPortService(string endpoint, ProductInformation info, IProxyProvider proxyProvider)
        {
            if (string.IsNullOrWhiteSpace(endpoint))
            {
                throw new ArgumentOutOfRangeException(nameof(endpoint), endpoint, "Must be a valid endpoint");
            }
            if (proxyProvider == null)
            {
                throw new ArgumentNullException(nameof(proxyProvider));
            }

            var uri   = new Uri(endpoint);
            var proxy = proxyProvider.GetProxy(uri);

            // replace the handler with the proxy aware handler
            var clientHandler = new HttpClientHandler
            {
                Proxy = proxy,
                AutomaticDecompression = (DecompressionMethods.GZip | DecompressionMethods.Deflate)
            };

            // HTTP handler pipeline can be injected here, around the client handler
            HttpMessageHandler messageHandler = clientHandler;

            if (proxy != null)
            {
                messageHandler = new ProxyAuthenticationHandler(clientHandler, proxyProvider);
            }

            _client = new CompressedHttpClient(info, messageHandler)
            {
                BaseAddress = new Uri(endpoint),
                Timeout     = Timeout
            };
        }
Example #3
0
        public static object GetProxy(this IProxyProvider proxyProvider, Type classType)
        {
            if (classType == null)
            {
                throw new ArgumentNullException(nameof(classType));
            }

            return(proxyProvider.GetProxy(classType, (object[])null));
        }
Example #4
0
        private string Request(string url, int retryCount = 5)
        {
            if (retryCount <= 0)
            {
                IsSpoiled = true;
                return(string.Empty);
            }

            _holder.WaitOne(RequestsDelayMs);
            string html = string.Empty;

            var proxy = _proxy.GetProxy();

            try
            {
                HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
                request.Proxy = proxy;
                request.AutomaticDecompression = DecompressionMethods.GZip;
                request.UserAgent = "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.85 Safari/537.36";

                using (HttpWebResponse response = (HttpWebResponse)request.GetResponse())
                    using (Stream stream = response.GetResponseStream())
                        using (StreamReader reader = new StreamReader(stream))
                        {
                            html = reader.ReadToEnd();
                        }
                if (html.Contains("\"error_msg\":\"Too many requests per second\""))
                {
                    RequestsDelayMs = Convert.ToInt32(RequestsDelayMs * 1.2);
                    _log.Verbose($"Delay set {RequestsDelayMs}. Retry {5 - retryCount}/5.");
                    Thread.Sleep(RequestsDelayMs);
                    return(Request(url, retryCount - 1));
                }
                else
                {
                    RequestsDelayMs = Math.Max(500, Convert.ToInt32(RequestsDelayMs * 0.8));
                }
            }
            catch (Exception ex)
            {
                _log.Error($"Error, retry {5 - retryCount}");
                Thread.Sleep(1000 * 60 * 5);
                Request(url, retryCount - 1);
            }
            finally
            {
                _proxy.PullBack(proxy, 0);
            }

            if (html.Contains("User authorization failed: invalid session."))
            {
                IsSpoiled = true;
            }
            return(html);
        }
Example #5
0
        public ApiPortService(string endpoint, ProductInformation info, IProxyProvider proxyProvider)
        {
            if (string.IsNullOrWhiteSpace(endpoint))
            {
                throw new ArgumentOutOfRangeException(nameof(endpoint), endpoint, LocalizedStrings.MustBeValidEndpoint);
            }
            if (proxyProvider == null)
            {
                throw new ArgumentNullException(nameof(proxyProvider));
            }

            // Create the URI directly from a string (rather than using a hard-coded scheme or port) because
            // even though production use of ApiPort should always use HTTPS, developers using a non-production
            // portability service URL (via the -e command line parameter) may need to specify a different
            // scheme or port.
            var uri   = new Uri(endpoint);
            var proxy = proxyProvider.GetProxy(uri);

            // replace the handler with the proxy aware handler
            var clientHandler = new HttpClientHandler
            {
#if !FEATURE_SERVICE_POINT_MANAGER
                SslProtocols = CompressedHttpClient.SupportedSSLProtocols,
#endif
                Proxy = proxy,
                AutomaticDecompression = (DecompressionMethods.GZip | DecompressionMethods.Deflate)
            };

            // HTTP handler pipeline can be injected here, around the client handler
            HttpMessageHandler messageHandler = clientHandler;

            if (proxy != null)
            {
                messageHandler = new ProxyAuthenticationHandler(clientHandler, proxyProvider);
            }

            _client = new CompressedHttpClient(info, messageHandler)
            {
                BaseAddress = new Uri(endpoint),
                Timeout     = Timeout
            };
        }
Example #6
0
        public override Page Download(Request request, ITask task)
        {
            if (task == null || task.GetSite() == null)
            {
                throw new ArgumentNullException("task", "task or site can not be null");
            }
            HttpResults httpResponse   = null;
            var         httpClient     = GetHttpClient(task.GetSite());
            var         proxy          = _proxyProvider?.GetProxy(task);
            var         requestContext = _httpUriRequestConverter.Convert(request, task.GetSite(), proxy);
            Page        page           = Page.Fail();

            try
            {
                httpResponse = httpClient.GetHtml();
                _contentType = httpClient.Items.ContentType;
                _charset     = httpClient.Items.EncodingStr;
                page         = HandleResponse(request, request.GetCharset() ?? task.GetSite().Charset, httpResponse, task);
                OnSuccess(request);
                Debug.WriteLine($"downloading page success {request.GetUrl()}");
                return(page);
            }
            catch (Exception e)
            {
                Debug.WriteLine($"download page {request.GetUrl()} error : {e.Message}");
                OnError(request);
                return(page);
            }
            finally
            {
                if (httpResponse != null)
                {
                    //ensure the connection is released back to pool
                }
                if (_proxyProvider != null && proxy != null)
                {
                    _proxyProvider.ReturnProxy(proxy, page, task);
                }
            }
        }
Example #7
0
 public static TInterface GetProxy <TInterface, TClass>(this IProxyProvider proxyProvider, params object[] parameters)
     where TInterface : class
     where TClass : TInterface
 {
     return(proxyProvider.GetProxy(typeof(TInterface), typeof(TClass), parameters) as TInterface);
 }
Example #8
0
 public static TClass GetProxy <TClass>(this IProxyProvider proxyProvider, params object[] parameters)
     where TClass : class
 {
     return(proxyProvider.GetProxy(typeof(TClass), parameters) as TClass);
 }
Example #9
0
 public static TClass GetProxy <TClass>(this IProxyProvider proxyProvider)
     where TClass : class
 {
     return(proxyProvider.GetProxy(typeof(TClass), (object[])null) as TClass);
 }
Example #10
0
        private void HandleDomainUrl(ParserInput message)
        {
            IndexingData indexingData = null;

            using (var webClient = new WebClient())
            {
                if (message.UseProxy)
                {
                    webClient.Proxy = _proxyProvider.GetProxy();
                }

                var url = _proxyProvider.GetRequestUrl(string.Format(GoogleSearchUrl, message.Url));

                webClient.Headers.Add("Accept-Language", "en-US");

                var pageData = webClient.DownloadString(url);

                var page = new HtmlDocument();
                page.LoadHtml(pageData);

                HtmlNode resultStatsNode = page.GetElementbyId("resultStats");
                if (resultStatsNode == null || resultStatsNode.InnerHtml == string.Empty)
                {
                    indexingData = new IndexingData
                    {
                        PagesNumber    = 0,
                        ProcessingDate = DateTime.Now.Date
                    };

                    var domainStat = new DomainStat {
                        DomainURL = message.Url, IndexingData = indexingData
                    };

                    Sender.Tell(domainStat, Self);
                    return;
                }

                var resultStats = HtmlEntity.DeEntitize(resultStatsNode.InnerHtml);

                var firstNumberIndex = resultStats.IndexOfAny("123456789".ToCharArray());
                var lastNumberIndex  = resultStats.LastIndexOfAny("0123456789".ToCharArray());

                if (firstNumberIndex >= 0 && lastNumberIndex >= 0 && lastNumberIndex >= firstNumberIndex)
                {
                    var match = resultStats.Substring(firstNumberIndex, lastNumberIndex - firstNumberIndex + 1);

                    var numberString = match.Replace(",", string.Empty).Replace(".", string.Empty);
                    numberString = Regex.Replace(numberString, @"\s+", "");

                    var indexedPagesNumber = long.Parse(numberString);

                    indexingData = new IndexingData
                    {
                        PagesNumber    = indexedPagesNumber,
                        ProcessingDate = DateTime.Now.Date
                    };

                    var domainStat = new DomainStat {
                        DomainURL = message.Url, IndexingData = indexingData
                    };

                    Sender.Tell(domainStat, Self);
                }
                else
                {
                    var errorMessage = "Match was not successful! Result stats: " + resultStats;
                    var parsingError = new ParsingError {
                        DomainURL = message.Url, ErrorMessage = errorMessage
                    };
                    Sender.Tell(parsingError, Self);
                }
            }
        }
Example #11
0
        private string Request(string url, int retryCount = 3)
        {
            if (retryCount <= 0)
            {
                IsSpoiled = true;
                return(string.Empty);
            }
            var elapsed = (DateTime.Now - _lastCall).TotalMilliseconds;

            if (elapsed < RequestsDelayMs)
            {
                Thread.Sleep(Math.Min(RequestsDelayMs, Convert.ToInt32(RequestsDelayMs - elapsed)));
            }

            var html  = string.Empty;
            var proxy = _proxyProvider.GetProxy();
            var sw    = Stopwatch.StartNew();

            try
            {
                var request = (HttpWebRequest)WebRequest.Create(url);
                request.ServicePoint.Expect100Continue = false;
                request.Proxy = proxy;
                request.AutomaticDecompression = DecompressionMethods.GZip;
                request.UserAgent = config.Default.UserAgents[_rnd.Next(config.Default.UserAgents.Count)];

                using (HttpWebResponse response = (HttpWebResponse)request.GetResponse())
                    using (Stream stream = response.GetResponseStream())
                        using (StreamReader reader = new StreamReader(stream))
                        {
                            _lastCall = DateTime.Now;
                            html      = reader.ReadToEnd();
                        }

                if (html.Contains("\"error_msg\":\"Too many requests per second\""))
                {
                    _log.Warning($"[{Id}] Delay set {RequestsDelayMs}. Retry {retryCount}.");
                    Thread.Sleep(10000);
                    return(Request(url, retryCount - 1));
                }
            }
            catch (WebException wex)
            {
                Thread.Sleep((4 - retryCount) * 60 * 1000);
                return(Request(url, retryCount - 1));
            }
            catch (Exception ex)
            {
                _log.Error(ex, "Failed Vkkey: {FailedVkKey} Proxy {FailedProxy}", _vkAccessToken, (proxy as WebProxy).Address);
                //Thread.Sleep((15 - retryCount) * 200 * RequestsDelayMs);
                Thread.Sleep((4 - retryCount) * 60 * 1000);
                return(Request(url, retryCount - 1));
            }
            finally
            {
                _proxyProvider.PullBack(proxy, sw.ElapsedMilliseconds);
            }


            if (html.Contains("User authorization failed: invalid session."))
            {
                IsSpoiled = true;
            }
            //_log.Information("Request {RequestURL} takes {RequestTime}", url, sw.ElapsedMilliseconds);
            return(html);
        }
Example #12
0
        public bool Authorization()
        {
            if (!AccountData.Cookies.Any())
            {
                return(false);
            }

            var storage = new CookieStorage();

            foreach (var cookie in AccountData.Cookies)
            {
                try
                {
                    if (cookie.name == "c_user")
                    {
                        actor_id = cookie.value;
                    }
                    storage.Add(new Cookie(cookie.name, cookie.value, cookie.path, cookie.domain));
                }
                catch { continue; }
            }
            _request.Cookies = storage;

            if (EnableProxies)
            {
                _request.Proxy = _proxyProvider.GetProxy();
            }

            var reconnectCount = 3;

            do
            {
                try
                {
                    _request["Accept-Language"] = "en-US,en;q=0.9,ru-RU;q=0.8,ru;q=0.7";
                    _request["Origin"]          = "https://www.facebook.com";
                    _request["Referer"]         = "https://www.facebook.com/";
                    _request["content-type"]    = "application/x-www-form-urlencoded";

                    var response = _request.Get("https://www.facebook.com/");
                    fb_dtsg = Regex.Match(response.ToString(), "\"token\":\"(.*?)\"").Groups[1].Value;

                    if (!string.IsNullOrWhiteSpace(fb_dtsg))
                    {
                        IsAuthorized = true;
                    }
                    else
                    {
                        IsAuthorized = false;
                    }

                    break;
                }
                catch (HttpException)
                {
                    if (EnableProxies)
                    {
                        _request.Proxy = _proxyProvider.GetProxy();//Reconnecting to another proxy
                        reconnectCount--;
                    }
                    else
                    {
                        break;
                    }
                }
                catch
                {
                    break;
                }
            } while (reconnectCount != 0);

            return(IsAuthorized);
        }