コード例 #1
0
        public Proxy GetProxy(ProxyFilter pf = null)
        {
            lock (_queueLock)
            {
                if (pf != null)
                {
                    ProxyLog.Info("Getting Proxy.  Filter: " + pf.GetFilterLogInfo());
                }
                else
                {
                    ProxyLog.Info("Getting Proxy.  No Filter.");
                }
                CheckIcedProxies();
                Proxy proxy = _availableProxies.FirstOrDefault(p => p.MatchesFilter(pf));

                if (proxy != null)
                {
                    ProxyLog.Info("Proxy Retrieved From Available Proxies: " + proxy.GetLogInfo(pf?.Site));
                }
                else
                {
                    using (ISession session = _sessionFactory.OpenSession())
                    {
                        using (var trans = session.BeginTransaction())
                        {
                            IList <Proxy> results = null;
                            try
                            {
                                var sortMode = pf?.Site != null ? ProxyFilter.ProxyFilterQuery.SortModes.SiteScore : ProxyFilter.ProxyFilterQuery.SortModes.Score;
                                results = QueryDatabaseForProxies(pf, session, sortMode);
                            }
                            catch (Exception ex)
                            {
                                Log.Error("Failed to query database for proxies.", ex);
                                throw new ProxyRepositoryFailureException("A failure occurred while querying proxies", ex);
                            }

                            Proxy newProxy = results.FirstOrDefault();

                            if (newProxy == null)
                            {
                                ProxyLog.Info("No Matching Proxy Available");
                                throw new ProxyRepositoryFailureException("No matching proxies available");
                            }
                            ProxyLog.Info("Proxy Retrieved From Database: " + newProxy.GetLogInfo(pf?.Site));

                            newProxy.LastSession = this.SessionID;
                            newProxy.Repository  = this;
                            session.Save(newProxy);
                            trans.Commit();
                            return(newProxy);
                        }
                    }
                }
                _availableProxies.Remove(proxy);
                return(proxy);
            }
        }
コード例 #2
0
        public IList <Proxy> QueryDatabaseForProxies(ProxyFilter pf, ISession session, ProxyFilter.ProxyFilterQuery.SortModes sortMode)
        {
            var transform = new ProxyFilter.ProxyFilterQuery(pf, this.SessionID, sortMode);
            var results   = session.CreateSQLQuery(transform.GetSql())
                            .AddEntity("proxy", typeof(Proxy))
                            .SetResultTransformer(Transformers.DistinctRootEntity)
                            .List <Proxy>();

            return(results);
        }
コード例 #3
0
        /// <summary>
        /// Create the filter from the factory
        /// </summary>
        /// <returns></returns>
        public ProxyFilter CreateFilter()
        {
            ProxyFilter filter = OnCreateFilter();

            if (filter != null)
            {
                filter.Graph = Graph;
                if (Client != null)
                {
                    filter.Client = Client.Create(null);
                }
                if (Layers != null)
                {
                    filter.Layers = Layers;
                }
                filter.Block = Block;
            }

            return(filter);
        }
コード例 #4
0
        /// <summary>
        /// Create "requests" which are descriptions of what pages to scrape, and how.
        /// </summary>
        protected IEnumerable <Request> GetPagesToScrape()
        {
            var request = new Request("http://www.github.com");

            // You can select a raw HTTP request, which will just return the result of the request, or you can
            // choose to load the page in Chrome.
            request.DriverType = Request.DriverTypes.HeadlessChrome;
            // When using Chrome, you can specify a list of elements whose presence will indicate that the
            // page has fully loaded.  Without them, the request will just be given a long timeout to ensure
            // that everything has loaded.  So this can help with maximizing the rate processing by eliminating
            // the need to wait for the timeout after the necessary components are loaded.
            request.ElementsToWaitFor = new List <string>()
            {
                @"//*/button[contains(text(), 'Sign up for GitHub') and contains(@class, 'btn-primary-mktg')]"
            };

            // Initialize the filter for the proxy.  You can use this to micromanage the quality of the
            // proxy you are getting.  In this case it's just making sure that it doesn't get a proxy
            // that has been overused recently in the current proxy session.
            ProxyFilter pf = new ProxyFilter();

            pf.NotCurrentSession = true;
            request.ProxyFilter  = pf;
            yield return(request);

            // Return some more of the same, to demonstrate the way requests are distributed amongst proxies.
            for (var i = 0; i < 15; i++)
            {
                yield return(new Request("http://www.github.com")
                {
                    DriverType = Request.DriverTypes.HeadlessChrome, ElementsToWaitFor = new List <string>()
                    {
                        @"//*/button[contains(text(), 'Sign up for GitHub') and contains(@class, 'btn-primary-mktg')]"
                    }, ProxyFilter = pf
                });
            }
        }