Ejemplo n.º 1
0
        public async Task TestGetTextReaderAsync_ForEncoding(string url, string expectedTitle, ClientOptions options = null)
        {
            ClientOptions optionsToUse = options == null ? HtmlClient.Options : options;
            XmlDocument   doc1         = new XmlDocument();

            System.Text.Encoding initialEncoding   = null;
            EncodingConfidence   initialConfidence = EncodingConfidence.Tentative;

            System.Text.Encoding finalEncoding   = null;
            EncodingConfidence   finalConfidence = EncodingConfidence.Tentative;

            // Get the Html asynchronously and Parse it into an Xml Document
            using (HtmlTextReader textReader = await HtmlClient.GetHtmlTextReaderAsync(url, optionsToUse)) {
                initialEncoding   = textReader.CurrentEncoding;
                initialConfidence = textReader.CurrentEncodingConfidence;

                HtmlParser.DefaultParser.Parse(doc1, textReader, new ParserOptions {
                    BaseUrl = url
                });

                finalEncoding   = textReader.CurrentEncoding;
                finalConfidence = textReader.CurrentEncodingConfidence;
            }

            string title1 = doc1.SelectSingleNode("//title/text()").InnerText;

            Console.WriteLine("Crawled: " + url + ", title: " + title1 + ", default: " + optionsToUse.DefaultEncoding.WebName + " (detect=" + optionsToUse.DetectEncoding + "), inital: " + initialEncoding.WebName + " (" + initialConfidence + "), final: " + finalEncoding.WebName + " (" + finalConfidence + ")");

            // Compare the titles of the pages to see if the encoding is picking up consistently between
            Assert.AreEqual(expectedTitle, title1);
        }
Ejemplo n.º 2
0
        public async Task <XmlDocument> LoadXHtmlDocAsync(string url)
        {
            XmlDocument xhtmlDoc = new XmlDocument();

            // Get the Html asynchronously and Parse it into an Xml Document
            using (HtmlTextReader htmlReader = await HtmlClient.GetHtmlTextReaderAsync(url))
                this.Parser.Parse(xhtmlDoc, htmlReader, new ParserOptions {
                    BaseUrl = url
                });

            return(xhtmlDoc);
        }
Ejemplo n.º 3
0
        internal static async Task LoadWebPageAsync(XmlDocument doc, string url, LoaderOptions options)
        {
            LoaderOptions optionsToUse = options == null ? new LoaderOptions() : options;

            optionsToUse.ParserOptions.BaseUrl = string.IsNullOrEmpty(optionsToUse.ParserOptions.BaseUrl) ? url : optionsToUse.ParserOptions.BaseUrl;

            XmlDomBuilder dom = new XmlDomBuilder(doc);
            HtmlStreamParser <XmlNode> parser = new HtmlStreamParser <XmlNode>();

            // Get the Html asynchronously and Parse it into an Xml Document
            using (HtmlTextReader htmlReader = await HtmlClient.GetHtmlTextReaderAsync(url, optionsToUse))
                parser.Parse(dom, htmlReader, optionsToUse.ParserOptions);
        }
Ejemplo n.º 4
0
        public async Task <int> DownloadPageUsingGetAsTextReaderAsync(string url)
        {
            using (HtmlTextReader reader = await HtmlClient.GetHtmlTextReaderAsync(url))
            {
                int c         = 0;
                int charsRead = 0;
                while (true)
                {
                    c = reader.BaseReader.Read();
                    if (c < 0)
                    {
                        break;
                    }
                    charsRead++;
                }

                return(charsRead);
            }
        }