public PageData FetchPage(string url) { if (indexPrivatePages && authorizationCookies == null) { TryAuthenticate(); } var fullUrl = string.Concat(webServer.TrimEnd('/'), "/", url.TrimStart('/')); var httpWebRequest = (HttpWebRequest)WebRequest.Create(fullUrl); httpWebRequest.AllowAutoRedirect = true; httpWebRequest.Timeout = 60 * 1000; httpWebRequest.CookieContainer = new CookieContainer(); if (authorizationCookies != null) { foreach (Cookie authCookie in authorizationCookies) { Cookie cookie = new Cookie(authCookie.Name, authCookie.Value, authCookie.Path, authCookie.Domain); httpWebRequest.CookieContainer.Add(cookie); } } HttpWebResponse httpWebResponse = null; var response = new PageData(); response.AbsoluteUri = httpWebRequest.RequestUri.AbsoluteUri; response.AbsolutePath = httpWebRequest.RequestUri.AbsolutePath; try { httpWebResponse = (HttpWebResponse)httpWebRequest.GetResponse(); response.StatusCode = httpWebResponse.StatusCode; response.AbsolutePath = httpWebResponse.ResponseUri.AbsolutePath; response.AbsoluteUri = httpWebResponse.ResponseUri.AbsoluteUri; using (Stream responseStream = httpWebResponse.GetResponseStream()) { if (responseStream != null) { using (var streamReader = new StreamReader(responseStream, Encoding.UTF8)) { response.Content = new HtmlDocument(); response.Content.LoadHtml(streamReader.ReadToEnd()); } } } } catch (SystemException ex) { Log.ErrorFormat("Lucene web crawler: Failed to fetch page by url {0}.", ex, url); if (ex.GetType() == typeof(WebException)) { var webException = (WebException)ex; response.StatusCode = ((HttpWebResponse)webException.Response).StatusCode; } } finally { if (httpWebResponse != null) { httpWebResponse.Close(); } } return(response); }
public PageData FetchPage(string url) { if (indexPrivatePages && authorizationCookies == null && authMode != AuthMode.Windows) { TryAuthenticate(); // Forms authentication. } var fullUrl = string.Concat(webServer.TrimEnd('/'), "/", url.TrimStart('/')); var httpWebRequest = (HttpWebRequest)WebRequest.Create(fullUrl); if (indexPrivatePages && authMode == AuthMode.Windows) { var userName = cmsConfiguration.Search.GetValue(LuceneSearchConstants.ConfigurationKeys.LuceneAuthorizationWindows_UserName); var password = cmsConfiguration.Search.GetValue(LuceneSearchConstants.ConfigurationKeys.LuceneAuthorizationWindows_Password); httpWebRequest.Credentials = new NetworkCredential(userName, password); } httpWebRequest.AllowAutoRedirect = true; httpWebRequest.Timeout = (int)fetchTimeout.TotalMilliseconds; httpWebRequest.CookieContainer = new CookieContainer(); if (authorizationCookies != null) { foreach (Cookie authCookie in authorizationCookies) { Cookie cookie = new Cookie(authCookie.Name, authCookie.Value, authCookie.Path, authCookie.Domain); httpWebRequest.CookieContainer.Add(cookie); } } HttpWebResponse httpWebResponse = null; var response = new PageData(); response.AbsoluteUri = httpWebRequest.RequestUri.AbsoluteUri; response.AbsolutePath = httpWebRequest.RequestUri.AbsolutePath; try { httpWebResponse = (HttpWebResponse)httpWebRequest.GetResponse(); response.StatusCode = httpWebResponse.StatusCode; response.AbsolutePath = httpWebResponse.ResponseUri.AbsolutePath; response.AbsoluteUri = httpWebResponse.ResponseUri.AbsoluteUri; using (Stream responseStream = httpWebResponse.GetResponseStream()) { if (responseStream != null) { using (var streamReader = new StreamReader(responseStream, Encoding.UTF8)) { response.Content = new HtmlDocument(); response.Content.LoadHtml(streamReader.ReadToEnd()); } } } } catch (Exception ex) { Log.ErrorFormat("Lucene web crawler: Failed to fetch page by url {0}.", ex, url); if (ex.GetType() == typeof(WebException)) { var webException = (WebException)ex; response.StatusCode = ((HttpWebResponse)webException.Response).StatusCode; } } finally { if (httpWebResponse != null) { httpWebResponse.Close(); } } return(response); }