private HttpStatusCode Get(Uri uri, string method, string path, HtmlDocument doc, IWebProxy proxy, ICredentials creds) { string cachePath = null; HttpWebRequest req; bool oldFile = false; req = WebRequest.Create(uri) as HttpWebRequest; req.Method = method; req.UserAgent = UserAgent; if (proxy != null) { if (creds != null) { proxy.Credentials = creds; req.Credentials = creds; } else { proxy.Credentials = CredentialCache.DefaultCredentials; req.Credentials = CredentialCache.DefaultCredentials; } req.Proxy = proxy; } _fromCache = false; _requestDuration = 0; int tc = Environment.TickCount; if (UsingCache) { cachePath = GetCachePath(req.RequestUri); if (File.Exists(cachePath)) { req.IfModifiedSince = File.GetLastAccessTime(cachePath); oldFile = true; } } if (_cacheOnly) { if (!File.Exists(cachePath)) { throw new HtmlWebException("File was not found at cache path: '" + cachePath + "'"); } if (path != null) { IOLibrary.CopyAlways(cachePath, path); // touch the file if (cachePath != null) File.SetLastWriteTime(path, File.GetLastWriteTime(cachePath)); } _fromCache = true; return HttpStatusCode.NotModified; } if (_useCookies) { req.CookieContainer = new CookieContainer(); } if (PreRequest != null) { // allow our user to change the request at will if (!PreRequest(req)) { return HttpStatusCode.ResetContent; } // dump cookie // if (_useCookies) // { // foreach(Cookie cookie in req.CookieContainer.GetCookies(req.RequestUri)) // { // HtmlLibrary.Trace("Cookie " + cookie.Name + "=" + cookie.Value + " path=" + cookie.Path + " domain=" + cookie.Domain); // } // } } HttpWebResponse resp; try { resp = req.GetResponse() as HttpWebResponse; } catch (WebException we) { _requestDuration = Environment.TickCount - tc; resp = (HttpWebResponse)we.Response; if (resp == null) { if (oldFile) { if (path != null) { IOLibrary.CopyAlways(cachePath, path); // touch the file File.SetLastWriteTime(path, File.GetLastWriteTime(cachePath)); } return HttpStatusCode.NotModified; } throw; } } catch (Exception) { _requestDuration = Environment.TickCount - tc; throw; } // allow our user to get some info from the response if (PostResponse != null) { PostResponse(req, resp); } _requestDuration = Environment.TickCount - tc; _responseUri = resp.ResponseUri; bool html = IsHtmlContent(resp.ContentType); Encoding respenc = !string.IsNullOrEmpty(resp.ContentEncoding) ? Encoding.GetEncoding(resp.ContentEncoding) : null; if (OverrideEncoding != null) respenc = OverrideEncoding; if (resp.StatusCode == HttpStatusCode.NotModified) { if (UsingCache) { _fromCache = true; if (path != null) { IOLibrary.CopyAlways(cachePath, path); // touch the file File.SetLastWriteTime(path, File.GetLastWriteTime(cachePath)); } return resp.StatusCode; } // this should *never* happen... throw new HtmlWebException("Server has send a NotModifed code, without cache enabled."); } Stream s = resp.GetResponseStream(); if (s != null) { if (UsingCache) { // NOTE: LastModified does not contain milliseconds, so we remove them to the file SaveStream(s, cachePath, RemoveMilliseconds(resp.LastModified), _streamBufferSize); // save headers SaveCacheHeaders(req.RequestUri, resp); if (path != null) { // copy and touch the file IOLibrary.CopyAlways(cachePath, path); File.SetLastWriteTime(path, File.GetLastWriteTime(cachePath)); } } else { // try to work in-memory if (doc != null && html) { if (respenc == null) { doc.Load(s, true); } else { doc.Load(s, respenc); } } } resp.Close(); } return resp.StatusCode; }
private HttpStatusCode Get(Uri uri, string method, string path, HtmlDocument doc, IWebProxy proxy, ICredentials creds) { string cachePath = null; bool oldFile = false; HttpStatusCode status; using (var request = new HttpRequestMessage(new HttpMethod(method), uri)) using (var handler = new HttpClientHandler()) using (var client = new HttpClient(handler)) { client.DefaultRequestHeaders.Add("User-Agent", UserAgent); if (proxy != null) { if (creds != null) { proxy.Credentials = creds; handler.Credentials = creds; } else { proxy.Credentials = CredentialCache.DefaultCredentials; handler.Credentials = CredentialCache.DefaultCredentials; } handler.Proxy = proxy; handler.UseProxy = true; } _fromCache = false; _requestDuration = 0; int tc = Environment.TickCount; if (UsingCache) { cachePath = GetCachePath(request.RequestUri); if (File.Exists(cachePath)) { client.DefaultRequestHeaders.IfModifiedSince = File.GetLastAccessTime(cachePath); oldFile = true; } } if (_cacheOnly) { if (!File.Exists(cachePath)) { throw new HtmlWebException("File was not found at cache path: '" + cachePath + "'"); } if (path != null) { IOLibrary.CopyAlways(cachePath, path); // touch the file if (cachePath != null) File.SetLastWriteTime(path, File.GetLastWriteTime(cachePath)); } _fromCache = true; return HttpStatusCode.NotModified; } if (_useCookies) { handler.CookieContainer = new CookieContainer(); } if (PreRequest != null) { // allow our user to change the request at will if (!PreRequest(handler, request)) { return HttpStatusCode.ResetContent; } // dump cookie // if (_useCookies) // { // foreach(Cookie cookie in req.CookieContainer.GetCookies(req.RequestUri)) // { // HtmlLibrary.Trace("Cookie " + cookie.Name + "=" + cookie.Value + " path=" + cookie.Path + " domain=" + cookie.Domain); // } // } } HttpResponseMessage response; try { response = client.SendAsync(request).Result; } catch (HttpRequestException) { _requestDuration = Environment.TickCount - tc; if (oldFile) { if (path != null) { IOLibrary.CopyAlways(cachePath, path); // touch the file File.SetLastWriteTime(path, File.GetLastWriteTime(cachePath)); } return HttpStatusCode.NotModified; } throw; } catch (Exception) { _requestDuration = Environment.TickCount - tc; throw; } // allow our user to get some info from the response if (PostResponse != null) { PostResponse(request, response); } _requestDuration = Environment.TickCount - tc; _responseUri = response.RequestMessage.RequestUri; bool html = IsHtmlContent(response.Content.Headers.ContentType.MediaType); var encoding = response.Content.Headers.ContentEncoding.FirstOrDefault(); Encoding respenc = !string.IsNullOrEmpty(encoding) ? Encoding.GetEncoding(encoding) : null; if (response.StatusCode == HttpStatusCode.NotModified) { if (UsingCache) { _fromCache = true; if (path != null) { IOLibrary.CopyAlways(cachePath, path); // touch the file File.SetLastWriteTime(path, File.GetLastWriteTime(cachePath)); } return response.StatusCode; } // this should *never* happen... throw new HtmlWebException("Server has send a NotModifed code, without cache enabled."); } Stream s = response.Content.ReadAsStreamAsync().Result; if (s != null) { if (UsingCache) { // NOTE: LastModified does not contain milliseconds, so we remove them to the file SaveStream(s, cachePath, RemoveMilliseconds(response.Content.Headers.LastModified), _streamBufferSize); // save headers SaveCacheHeaders(request.RequestUri, response); if (path != null) { // copy and touch the file IOLibrary.CopyAlways(cachePath, path); File.SetLastWriteTime(path, File.GetLastWriteTime(cachePath)); } } else { // try to work in-memory if ((doc != null) && (html)) { if (respenc != null) { doc.Load(s, respenc); } else { doc.Load(s, true); } } } } status = response.StatusCode; } return status; }
/// <summary> /// Loads an HTML document from an Internet resource. /// </summary> /// <param name="url">The requested URL, such as "http://Myserver/Mypath/Myfile.asp".</param> /// <param name="method">The HTTP method used to open the connection, such as GET, POST, PUT, or PROPFIND.</param> /// <returns>A new HTML document.</returns> public HtmlDocument Load(string url, string method) { Uri uri = new Uri(url); HtmlDocument doc; #if NET20 || NET40 || NET451 if ((uri.Scheme == Uri.UriSchemeHttps) || (uri.Scheme == Uri.UriSchemeHttp)) #else // Uri.UriSchemeHttps is internal. Bug or not completed? if ((uri.Scheme == "https") || (uri.Scheme == "http")) #endif { doc = LoadUrl(uri, method, null, null); } else { #if NET20 || NET40 || NET451 if (uri.Scheme == Uri.UriSchemeFile) #else // Uri.UriScheme* is internal. Bug or not completed? if (uri.Scheme == "file") #endif { doc = new HtmlDocument(); doc.OptionAutoCloseOnEnd = false; doc.OptionAutoCloseOnEnd = true; if (OverrideEncoding != null) doc.Load(url, OverrideEncoding); else doc.DetectEncodingAndLoad(url, _autoDetectEncoding); } else { throw new HtmlWebException("Unsupported uri scheme: '" + uri.Scheme + "'."); } } if (PreHandleDocument != null) { PreHandleDocument(doc); } return doc; }