Example #1
0
        /// <summary>
        /// Returns a TextReader that detects the underlying stream's endoding. Allows clients to stream the
        /// retured content using a TextReader. This method is similar in purpose to GetStreamAsync, however, GetStreamAsync
        /// doesn't detect the Stream's encoding as GetStringAsync does.
        /// </summary>
        /// <param name="httpClient"></param>
        public static async Task <HtmlTextReader> GetHtmlTextReaderAsync(string url, ClientOptions options)
        {
            HtmlTextReader reader;
            ClientOptions  optionsToUse = options == null ? HtmlClient.Options : options;
            Uri            uri          = new Uri(url);

            // See if the url pointed to a file. If so, return a reader with a file stream
            // under the hood.
            if (uri.IsFile)
            {
                FileStream fs     = File.OpenRead(uri.AbsolutePath);
                HtmlStream stream = new HtmlStream(fs);
                reader = new HtmlTextReader(stream, options.DefaultEncoding, EncodingConfidence.Tentative);
                reader.OriginatingUrl = url;
                return(reader);
            }

            // Set a user agent if one was specified
            if (!string.IsNullOrEmpty(optionsToUse.UserAgent))
            {
                HttpClient.DefaultRequestHeaders.Remove("User-Agent");
                HttpClient.DefaultRequestHeaders.Add("User-Agent", optionsToUse.UserAgent);
            }

            // Get the Http response (only read the headers at this point) and ensure succes
            HttpResponseMessage responseMessage = await HttpClient.GetAsync(uri, HttpCompletionOption.ResponseHeadersRead).ConfigureAwait(false);

            responseMessage.EnsureSuccessStatusCode();

            // If there is no content to return, return an empty HtmlTextReader
            HttpContent content = responseMessage.Content;

            if (content == null)
            {
                reader = new HtmlTextReader(String.Empty);
            }
            else
            {
                reader = await content.GetHtmlTextReaderAsync(optionsToUse.DefaultEncoding, optionsToUse.DetectEncoding);
            }

            // Store some metadata on the reader. Could be used by a parser.
            reader.OriginatingUrl = url;
            foreach (var header in content.Headers)
            {
                reader.OriginatingHttpHeaders.Add(new KeyValuePair <string, string>(header.Key, string.Join(";", header.Value)));
            }

            return(reader);
        }