Ejemplo n.º 1
0
        public string Extract(string address, int maxRetry, int timeout, int verbose)
        {
            int defaultWaitTime = 180000;
            var waitIncr        = 10000;
            int retry           = 0;

            while (retry++ < maxRetry)
            {
                AvailableProxy proxy = null;
                try
                {
                    HttpWebRequest request = (HttpWebRequest)WebRequest.Create(address);
                    request.Timeout = timeout;
                    if (_proxyManager != null)
                    {
                        proxy         = _proxyManager.GetAvailableProxy();
                        request.Proxy = proxy.Proxy;
                    }
                    HttpWebResponse response = (HttpWebResponse)request.GetResponse();

                    string data = null;
                    if (response.StatusCode == HttpStatusCode.OK)
                    {
                        var          receiveStream = response.GetResponseStream();
                        StreamReader readStream;

                        if (string.IsNullOrWhiteSpace(response.CharacterSet))
                        {
                            readStream = new StreamReader(receiveStream);
                        }
                        else
                        {
                            readStream = new StreamReader(receiveStream, Encoding.GetEncoding(response.CharacterSet));
                        }

                        data = readStream.ReadToEnd();

                        response.Close();
                        readStream.Close();
                    }
                    if (verbose > 1)
                    {
                        if (proxy == null)
                        {
                            Console.WriteLine($"[SUCCESS] extracting content from {address} using proxy {proxy.Proxy.Address.AbsoluteUri}");
                        }
                        else
                        {
                            Console.Write($"[SUCCESS] extracting content from {address} without proxy");
                        }
                    }
                    return(data);
                }
                catch (NoAvailableProxyException)
                {
                    if (verbose > 0)
                    {
                        Console.WriteLine($"[ERROR] No more available proxies ... waiting {defaultWaitTime / 1000}s\n");
                    }
                    // no more proxies, wait
                    Thread.Sleep(defaultWaitTime);
                    defaultWaitTime += waitIncr;
                    if (_proxyManager != null)
                    {
                        _proxyManager.EnableAllProxies();
                    }
                }
                catch (Exception e)
                {
                    // other exception: disable current proxy and retry immediatly
                    if (_proxyManager != null)
                    {
                        if (verbose > 1)
                        {
                            Console.WriteLine($"[ERROR] extracting content from {address} using proxy {proxy.Proxy.Address.AbsoluteUri}: disabling proxy\n");
                        }
                        _proxyManager.DisableProxy(proxy);
                    }
                    else
                    {
                        if (verbose > 0)
                        {
                            Console.WriteLine($"[ERROR] ... waiting {defaultWaitTime / 1000}s\n");
                        }
                        // no proxyManager, we wait
                        Thread.Sleep(defaultWaitTime);
                        defaultWaitTime += waitIncr;
                    }
                }
            }
            if (verbose > 1)
            {
                Console.WriteLine($"reached max-retry limit ({maxRetry}), skipping this page");
            }
            return(null);
        }
Ejemplo n.º 2
0
        public string Extract2(string address)
        {
            int defaultWaitTime = 180000;
            var waitIncr        = 10000;

            while (true)
            {
                AvailableProxy proxy = null;
                try
                {
                    HttpWebRequest request = (HttpWebRequest)WebRequest.Create(address);
                    if (_proxyManager != null)
                    {
                        proxy         = _proxyManager.GetAvailableProxy();
                        request.Proxy = proxy.Proxy;
                    }
                    HttpWebResponse response = (HttpWebResponse)request.GetResponse();

                    string data = null;
                    if (response.StatusCode == HttpStatusCode.OK)
                    {
                        var          receiveStream = response.GetResponseStream();
                        StreamReader readStream;

                        if (string.IsNullOrWhiteSpace(response.CharacterSet))
                        {
                            readStream = new StreamReader(receiveStream);
                        }
                        else
                        {
                            readStream = new StreamReader(receiveStream, Encoding.GetEncoding(response.CharacterSet));
                        }

                        data = readStream.ReadToEnd();

                        response.Close();
                        readStream.Close();
                    }
                    return(data);
                }
                catch (NoAvailableProxyException)
                {
                    // no more proxies, wait
                    Thread.Sleep(defaultWaitTime);
                    defaultWaitTime += waitIncr;
                }
                catch (Exception e)
                {
                    // other exception: disable current proxy and retry immediatly
                    if (_proxyManager != null)
                    {
                        _proxyManager.DisableProxy(proxy, defaultWaitTime);
                    }
                    else
                    {
                        // no proxyManager, we wait
                        Thread.Sleep(defaultWaitTime);
                        defaultWaitTime += waitIncr;
                    }
                }
            }
        }