Exemple #1
0
        internal HtmlPage(WebBytes wb, WebString prelimContent) : base(wb.Location)
        {
            HeadersCharset = wb.CharacterSet;
            EncHeaders     = prelimContent.UsedEncoding;

            DetermineEncoding(wb.Data, prelimContent.Document);
        }
Exemple #2
0
        FetcherResult Load(Uri uri, bool useCookies, int redirects, string previousPage)
        {
            var req = SetupRequest(uri, useCookies);
            var wb  = WebBytes.Create(req, MaxSizeHtml, MaxSizeNonHtml);

            if (wb.ContentIsHtml)
            {
                var page = WebString.Create(wb, EncHelp.Windows1252);

                Uri refreshUrl;
                if (redirects < MaxRefreshes &&
                    TryGetMetaRefresh(page, out refreshUrl) &&
                    VerifyRefresh(page.Location, refreshUrl))
                {
                    // If during the redirects we get a different page/HTML string, refrain from following more
                    // redirects. It probably means we've arrived, but only more real world testing will tell us
                    // if that's true.
                    // Otherwise keep trying to follow the redirects.
                    if (redirects == 0 || page.Document == previousPage)
                    {
                        return(Load(refreshUrl, useCookies, (redirects + 1), page.Document));
                    }
                }

                return(new FetcherResult(wb, page));
            }
            // Silently handle cookie exceptions, Mono/.NET can be very strict with which cookies it accepts.
            if (!wb.Success && wb.Exception.InnerException is CookieException)
            {
                return(Load(uri, false, redirects, previousPage));
            }

            // If either not HTML or there was an error in getting the resource, return as-is.
            return(new FetcherResult(wb));
        }
Exemple #3
0
        public static HtmlPage Create(System.Net.WebRequest request)
        {
            if (request == null)
            {
                throw new ArgumentNullException("request");
            }

            return(Create(WebBytes.ReadOnlyHtml(request)));
        }
Exemple #4
0
        public static WebString Create(WebRequest request, Encoding fallbackEnc)
        {
            if (request == null)
            {
                throw new ArgumentNullException("request");
            }

            return(Create(WebBytes.Create(request), fallbackEnc));
        }
Exemple #5
0
        public FetcherResult(WebBytes bytes)
        {
            if (bytes == null)
            {
                throw new ArgumentNullException(nameof(bytes));
            }

            Bytes = bytes;
            Page  = HtmlPage.Create(bytes);
        }
Exemple #6
0
        public static HtmlPage Create(WebBytes wb)
        {
            if (wb == null)
            {
                throw new ArgumentNullException("wb");
            }

            if (wb.ContentIsHtml)
            {
                return(new HtmlPage(wb.Location, wb.Data, wb.CharacterSet));
            }
            else if (wb.Success)
            {
                var ex = new NotHtmlException("Content isn't (X)HTML. Content-Type: " + wb.ContentType);
                return(new HtmlPage(wb.Location, ex));
            }
            else
            {
                return(new HtmlPage(wb));
            }
        }
Exemple #7
0
        public static WebString Create(WebBytes wb, Encoding fallbackEnc)
        {
            if (wb == null)
            {
                throw new ArgumentNullException("wb");
            }
            else if (fallbackEnc == null)
            {
                throw new ArgumentNullException("fallbackEnc");
            }

            if (wb.Success)
            {
                var enc = EncHelp.GetEncoding(wb.CharacterSet) ?? fallbackEnc;
                return(new WebString(wb.Location, wb.Data, enc));
            }
            else
            {
                return(new WebString(wb));
            }
        }
Exemple #8
0
 internal FetcherResult(WebBytes bytes, WebString page)
 {
     Bytes = bytes;
     Page  = new HtmlPage(bytes, page);
 }