コード例 #1
0
        private async Task <HttpFetchResult> TryFetchWithHttp2Client(Uri url, params string[] acceptHeaders)
        {
            try
            {
                using var http2Request = new HttpRequestMessage(HttpMethod.Get, url)
                      {
                          Version = new Version(2, 0)
                      };
                http2Request.AddAcceptHeaders(acceptHeaders);
                using var result = await http.SendAsync(http2Request);

                result.EnsureSuccessStatusCode();
                var contentString = await result.Content.ReadAsStringAsync();

                logger.LogInformation("Successfully fetched {url} via HTTP/2 fallback", url);
                return(new HttpFetchResult
                {
                    Result = contentString
                });
            }
            catch (Exception http2Error)
            {
                logger.LogWarning(http2Error, "Unable to fetch {url} using HTTP/2 fallback.", url);
                return(new HttpFetchResult
                {
                    Error = http2Error
                });
            }
        }
コード例 #2
0
        /// <summary>
        /// Sets the user agent to exclude CURL, which some sites (e.g. Facebook) require for programmatic fetching to work.
        /// </summary>
        /// <param name="url">The URL to fetch</param>
        /// <param name="acceptHeaders">Additional headers to set on the fetch request.</param>
        /// <returns>A fetch result containing the fetch response.</returns>
        private async Task <HttpFetchResult> TryFetchWithCurlUserAgent(Uri url, string[] acceptHeaders)
        {
            try
            {
                // Append CURL to the user agent.
                http.SetUserAgent(userAgent + " curl/7.64.1");

                using var httpRequest = new HttpRequestMessage(HttpMethod.Get, url);
                httpRequest.AddAcceptHeaders(acceptHeaders);
                var httpResponse = await http.SendAsync(httpRequest);

                httpResponse.EnsureSuccessStatusCode();
                var content = await httpResponse.Content.ReadAsStringAsync();

                return(new HttpFetchResult
                {
                    Result = content
                });
            }
            catch (Exception fetchError)
            {
                logger.LogWarning(fetchError, "Unable to fetch {url} using CURL user agent fallback.", url);
                return(new HttpFetchResult
                {
                    Error = fetchError
                });
            }
            finally
            {
                // Reset the user agent back to the default user agent.
                http.SetUserAgent(userAgent);
            }
        }
コード例 #3
0
        /// <summary>
        /// Attempts to fetch a resource at the specified URL.
        /// If the fetch fails, it will attempt to fetch using HTTP/2.
        /// Failures due to encoding errors will also attempt fetch using UTF-8 encoding as a fallback.
        /// If all fetches fail, the result will contain the exception.
        /// </summary>
        /// <param name="url"></param>
        /// <param name="acceptHeaders"></param>
        /// <returns></returns>
        private async Task <HttpFetchResult> TryFetch(Uri url, params string[] acceptHeaders)
        {
            try
            {
                using var httpRequest = new HttpRequestMessage(HttpMethod.Get, url);
                httpRequest.AddAcceptHeaders(acceptHeaders);
                var httpResponse = await http.SendAsync(httpRequest);

                // If it's a 403, we have special handling for this.
                if (httpResponse.StatusCode == System.Net.HttpStatusCode.Forbidden)
                {
                    var errorMessage = !string.IsNullOrWhiteSpace(httpResponse.ReasonPhrase) ?
                                       httpResponse.ReasonPhrase :
                                       "Web server's response was 403 Forbidden.";
                    throw new HttpForbiddenException(errorMessage);
                }

                httpResponse.EnsureSuccessStatusCode();
                var content = await httpResponse.Content.ReadAsStringAsync();

                return(new HttpFetchResult
                {
                    Result = content
                });
            }
            catch (InvalidOperationException invalidOpError) when(invalidOpError.Message.Contains("The character set provided in ContentType is invalid."))
            {
                // Invalid encoding? Sometimes webpages have incorrectly set their charset / content type.
                // See if we can just parse the thing using UTF-8.
                logger.LogWarning(invalidOpError, "Unable to parse using HTTP client due to invalid ContentType. Attempting to parse using UTF-8.");
                return(await TryFetchWithForcedUtf8(url, acceptHeaders));
            }
            catch (HttpForbiddenException forbiddenError)
            {
                logger.LogWarning(forbiddenError, "Received 403 Forbidden when fetching {url}. Attempting fetch with CURL user agent fallback.");
                return(await TryFetchWithCurlUserAgent(url, acceptHeaders)); // TODO: should we always try this when we encounter exception, not just forbidden exception?
            }
            catch (Exception httpException)
            {
                logger.LogWarning(httpException, "Failed to fetch {url}. Falling back to HTTP/2 fetch.", url);
                return(await TryFetchWithHttp2Client(url, acceptHeaders));
            }
        }