public async Task TestGetTextReaderAsync_ForEncoding(string url, string expectedTitle, ClientOptions options = null) { ClientOptions optionsToUse = options == null ? HtmlClient.Options : options; XmlDocument doc1 = new XmlDocument(); System.Text.Encoding initialEncoding = null; EncodingConfidence initialConfidence = EncodingConfidence.Tentative; System.Text.Encoding finalEncoding = null; EncodingConfidence finalConfidence = EncodingConfidence.Tentative; // Get the Html asynchronously and Parse it into an Xml Document using (HtmlTextReader textReader = await HtmlClient.GetHtmlTextReaderAsync(url, optionsToUse)) { initialEncoding = textReader.CurrentEncoding; initialConfidence = textReader.CurrentEncodingConfidence; HtmlParser.DefaultParser.Parse(doc1, textReader, new ParserOptions { BaseUrl = url }); finalEncoding = textReader.CurrentEncoding; finalConfidence = textReader.CurrentEncodingConfidence; } string title1 = doc1.SelectSingleNode("//title/text()").InnerText; Console.WriteLine("Crawled: " + url + ", title: " + title1 + ", default: " + optionsToUse.DefaultEncoding.WebName + " (detect=" + optionsToUse.DetectEncoding + "), inital: " + initialEncoding.WebName + " (" + initialConfidence + "), final: " + finalEncoding.WebName + " (" + finalConfidence + ")"); // Compare the titles of the pages to see if the encoding is picking up consistently between Assert.AreEqual(expectedTitle, title1); }
public async Task <XmlDocument> LoadXHtmlDocAsync(string url) { XmlDocument xhtmlDoc = new XmlDocument(); // Get the Html asynchronously and Parse it into an Xml Document using (HtmlTextReader htmlReader = await HtmlClient.GetHtmlTextReaderAsync(url)) this.Parser.Parse(xhtmlDoc, htmlReader, new ParserOptions { BaseUrl = url }); return(xhtmlDoc); }
internal static async Task LoadWebPageAsync(XmlDocument doc, string url, LoaderOptions options) { LoaderOptions optionsToUse = options == null ? new LoaderOptions() : options; optionsToUse.ParserOptions.BaseUrl = string.IsNullOrEmpty(optionsToUse.ParserOptions.BaseUrl) ? url : optionsToUse.ParserOptions.BaseUrl; XmlDomBuilder dom = new XmlDomBuilder(doc); HtmlStreamParser <XmlNode> parser = new HtmlStreamParser <XmlNode>(); // Get the Html asynchronously and Parse it into an Xml Document using (HtmlTextReader htmlReader = await HtmlClient.GetHtmlTextReaderAsync(url, optionsToUse)) parser.Parse(dom, htmlReader, optionsToUse.ParserOptions); }
public async Task <int> DownloadPageUsingGetAsTextReaderAsync(string url) { using (HtmlTextReader reader = await HtmlClient.GetHtmlTextReaderAsync(url)) { int c = 0; int charsRead = 0; while (true) { c = reader.BaseReader.Read(); if (c < 0) { break; } charsRead++; } return(charsRead); } }