示例#1
0
        public PageData FetchPage(string url)
        {
            if (indexPrivatePages && authorizationCookies == null)
            {
                TryAuthenticate();
            }

            var fullUrl        = string.Concat(webServer.TrimEnd('/'), "/", url.TrimStart('/'));
            var httpWebRequest = (HttpWebRequest)WebRequest.Create(fullUrl);

            httpWebRequest.AllowAutoRedirect = true;
            httpWebRequest.Timeout           = 60 * 1000;
            httpWebRequest.CookieContainer   = new CookieContainer();
            if (authorizationCookies != null)
            {
                foreach (Cookie authCookie in authorizationCookies)
                {
                    Cookie cookie = new Cookie(authCookie.Name, authCookie.Value, authCookie.Path, authCookie.Domain);
                    httpWebRequest.CookieContainer.Add(cookie);
                }
            }

            HttpWebResponse httpWebResponse = null;
            var             response        = new PageData();

            response.AbsoluteUri  = httpWebRequest.RequestUri.AbsoluteUri;
            response.AbsolutePath = httpWebRequest.RequestUri.AbsolutePath;

            try
            {
                httpWebResponse = (HttpWebResponse)httpWebRequest.GetResponse();

                response.StatusCode   = httpWebResponse.StatusCode;
                response.AbsolutePath = httpWebResponse.ResponseUri.AbsolutePath;
                response.AbsoluteUri  = httpWebResponse.ResponseUri.AbsoluteUri;

                using (Stream responseStream = httpWebResponse.GetResponseStream())
                {
                    if (responseStream != null)
                    {
                        using (var streamReader = new StreamReader(responseStream, Encoding.UTF8))
                        {
                            response.Content = new HtmlDocument();
                            response.Content.LoadHtml(streamReader.ReadToEnd());
                        }
                    }
                }
            }
            catch (SystemException ex)
            {
                Log.ErrorFormat("Lucene web crawler: Failed to fetch page by url {0}.", ex, url);

                if (ex.GetType() == typeof(WebException))
                {
                    var webException = (WebException)ex;
                    response.StatusCode = ((HttpWebResponse)webException.Response).StatusCode;
                }
            }
            finally
            {
                if (httpWebResponse != null)
                {
                    httpWebResponse.Close();
                }
            }

            return(response);
        }
示例#2
0
        public PageData FetchPage(string url)
        {
            if (indexPrivatePages && authorizationCookies == null && authMode != AuthMode.Windows)
            {
                TryAuthenticate(); // Forms authentication.
            }

            var fullUrl        = string.Concat(webServer.TrimEnd('/'), "/", url.TrimStart('/'));
            var httpWebRequest = (HttpWebRequest)WebRequest.Create(fullUrl);

            if (indexPrivatePages && authMode == AuthMode.Windows)
            {
                var userName = cmsConfiguration.Search.GetValue(LuceneSearchConstants.ConfigurationKeys.LuceneAuthorizationWindows_UserName);
                var password = cmsConfiguration.Search.GetValue(LuceneSearchConstants.ConfigurationKeys.LuceneAuthorizationWindows_Password);
                httpWebRequest.Credentials = new NetworkCredential(userName, password);
            }

            httpWebRequest.AllowAutoRedirect = true;
            httpWebRequest.Timeout           = (int)fetchTimeout.TotalMilliseconds;
            httpWebRequest.CookieContainer   = new CookieContainer();

            if (authorizationCookies != null)
            {
                foreach (Cookie authCookie in authorizationCookies)
                {
                    Cookie cookie = new Cookie(authCookie.Name, authCookie.Value, authCookie.Path, authCookie.Domain);
                    httpWebRequest.CookieContainer.Add(cookie);
                }
            }

            HttpWebResponse httpWebResponse = null;
            var             response        = new PageData();

            response.AbsoluteUri  = httpWebRequest.RequestUri.AbsoluteUri;
            response.AbsolutePath = httpWebRequest.RequestUri.AbsolutePath;

            try
            {
                httpWebResponse       = (HttpWebResponse)httpWebRequest.GetResponse();
                response.StatusCode   = httpWebResponse.StatusCode;
                response.AbsolutePath = httpWebResponse.ResponseUri.AbsolutePath;
                response.AbsoluteUri  = httpWebResponse.ResponseUri.AbsoluteUri;

                using (Stream responseStream = httpWebResponse.GetResponseStream())
                {
                    if (responseStream != null)
                    {
                        using (var streamReader = new StreamReader(responseStream, Encoding.UTF8))
                        {
                            response.Content = new HtmlDocument();
                            response.Content.LoadHtml(streamReader.ReadToEnd());
                        }
                    }
                }
            }
            catch (Exception ex)
            {
                Log.ErrorFormat("Lucene web crawler: Failed to fetch page by url {0}.", ex, url);

                if (ex.GetType() == typeof(WebException))
                {
                    var webException = (WebException)ex;
                    response.StatusCode = ((HttpWebResponse)webException.Response).StatusCode;
                }
            }
            finally
            {
                if (httpWebResponse != null)
                {
                    httpWebResponse.Close();
                }
            }

            return(response);
        }