Esempio n. 1
0
        /// <summary>
        /// Loads the HEAD of the requested URL, and returns the response URL value.
        /// For a site that redirects some queries, this allows you to get the 'real' URL for a given short URL.
        /// </summary>
        /// <param name="url">The URL of the page to load.  Cannot be null.</param>
        /// <param name="shortDescrip">A short description that can be used in status updates.  If null, no update will be given.</param>
        /// <param name="caching">Indicator of whether to query the cache for the requested page.</param>
        /// <param name="shouldCache">Indicates whether the result of this page load should be cached.</param>
        /// <param name="suppressNotifications">Indicates whether notification messages should be sent to output.</param>
        /// <param name="token">Cancellation token.</param>
        /// <returns>
        /// Returns the URL that the response headers say we requested.
        /// </returns>
        /// <exception cref="System.ArgumentNullException">url</exception>
        /// <exception cref="System.ArgumentException">url</exception>
        public async Task <string> GetRedirectUrl(string url, string shortDescrip,
                                                  CachingMode caching, ShouldCache shouldCache, SuppressNotifications suppressNotifications, CancellationToken token)
        {
            Uri responseUri = await GetRedirectedHeaderRequestUri(url, shortDescrip, suppressNotifications, token);

            return(responseUri?.AbsoluteUri);
        }
Esempio n. 2
0
        /// <summary>
        /// Loads the HEAD of the requested URL, and returns the response URL value.
        /// For a site that redirects some queries, this allows you to get the 'real' URL for a given short URL.
        /// </summary>
        /// <param name="url">The URL of the page to load.  Cannot be null.</param>
        /// <param name="shortDescrip">A short description that can be used in status updates.  If null, no update will be given.</param>
        /// <param name="caching">Indicator of whether to query the cache for the requested page.</param>
        /// <param name="shouldCache">Indicates whether the result of this page load should be cached.</param>
        /// <param name="suppressNotifications">Indicates whether notification messages should be sent to output.</param>
        /// <param name="token">Cancellation token.</param>
        /// <returns>
        /// Returns the URL that the response headers say we requested.
        /// </returns>
        /// <exception cref="System.ArgumentNullException">url</exception>
        /// <exception cref="System.ArgumentException">url</exception>
        public async Task <string> GetRedirectUrlAsync(string url, string?shortDescrip,
                                                       CachingMode caching, ShouldCache shouldCache, SuppressNotifications suppressNotifications, CancellationToken token)
        {
            logger.LogInformation($"Requested URL redirect for \"{shortDescrip}\"");
            Uri?responseUri = await GetRedirectedHeaderRequestUri(url, shortDescrip, suppressNotifications, token);

            string result = responseUri?.AbsoluteUri ?? string.Empty;

            if (string.IsNullOrEmpty(result))
            {
                logger.LogDebug("Redirect request failed for \"{shortDescrip}\".");
            }
            else
            {
                logger.LogDebug($"Redirect request succeeded. Using {result}");
            }

            return(result);
        }
Esempio n. 3
0
        /// <summary>
        /// Asynchronously load a specific page.
        /// </summary>
        /// <param name="url">The URL of the page to load.  Cannot be null.</param>
        /// <param name="shortDescrip">A short description that can be used in status updates.  If null, no update will be given.</param>
        /// <param name="caching">Indicator of whether to query the cache for the requested page.</param>
        /// <param name="token">Cancellation token.</param>
        /// <param name="shouldCache">Indicates whether the result of this page load should be cached.</param>
        /// <returns>Returns an HTML document, if it can be loaded.</returns>
        /// <exception cref="ArgumentNullException">If url is null or empty.</exception>
        /// <exception cref="ArgumentException">If url is not a valid absolute url.</exception>
        private async Task <string?> GetUrlContent(Uri uri, string url, string shortDescrip,
                                                   ShouldCache shouldCache, SuppressNotifications suppressNotifications, CancellationToken token)
        {
            string?  result  = null;
            int      tries   = 0;
            DateTime expires = CacheInfo.DefaultExpiration;

            NotifyStatusChange(PageRequestStatusType.Requested, url, shortDescrip, null, suppressNotifications);

            // Limit to no more than N parallel requests
            await ss.WaitAsync(token).ConfigureAwait(false);

            try
            {
                Cookie?cookie = ForumCookies.GetCookie(uri);
                if (cookie != null)
                {
                    ClientHandler.CookieContainer.Add(uri, cookie);
                }

                string?authorization = ForumAuthentications.GetAuthorization(uri);
                if (authorization != null && !httpClient.DefaultRequestHeaders.Contains("Authorization"))
                {
                    httpClient.DefaultRequestHeaders.Add("Authorization", authorization);
                }

                Task <HttpResponseMessage>?getResponseTask = null;

                do
                {
                    token.ThrowIfCancellationRequested();

                    if (tries > 0)
                    {
                        // Delay any additional attempts after the first.
                        await Task.Delay(retryDelay, token).ConfigureAwait(false);

                        // Notify the user if we're making another attempt to load the page.
                        NotifyStatusChange(PageRequestStatusType.Retry, url, shortDescrip, null, suppressNotifications);
                    }

                    tries++;

                    try
                    {
                        getResponseTask = httpClient.GetAsync(uri, token).TimeoutAfter(timeout, token);
                        logger.LogDebug($"Get URI {uri} task ID: {getResponseTask.Id}");

                        using (var response = await getResponseTask.ConfigureAwait(false))
                        {
                            if (response.IsSuccessStatusCode)
                            {
                                result = await response.Content.ReadAsStringAsync().ConfigureAwait(false);

                                // Get expires value
                                // Cannot get Expires value until we move to .NET Standard 2.0.

                                // If we get a successful result, we're done.
                                break;
                            }
                            else if (PageLoadFailed(response))
                            {
                                NotifyStatusChange(PageRequestStatusType.Failed, url,
                                                   GetFailureMessage(response, shortDescrip, url), null, suppressNotifications);
                                return(null);
                            }
                            else if (PageWasMoved(response))
                            {
                                url = response.Content.Headers.ContentLocation.AbsoluteUri;
                                uri = new Uri(url);
                            }
                        }
                    }
                    catch (OperationCanceledException)
                    {
                        if (token.IsCancellationRequested)
                        {
                            // user request
                            throw;
                        }
                        else
                        {
                            // timeout via cancellation
                            logger.LogDebug($"Attempt to load {shortDescrip} timed out/self-cancelled (TA). Tries={tries}");
                        }
                    }
                    catch (TimeoutException)
                    {
                        logger.LogDebug($"Attempt to load {shortDescrip} timed out. Tries={tries}");
                    }
                    catch (HttpRequestException e)
                    {
                        NotifyStatusChange(PageRequestStatusType.Error, url, shortDescrip, e, suppressNotifications);
                        throw;
                    }
                } while (tries < retryLimit);

                logger.LogDebug($"Finished getting URI {uri} task ID: {getResponseTask?.Id ?? 0}");

                if (result == null && tries >= retryLimit)
                {
                    httpClient.CancelPendingRequests();
                }
            }
            catch (OperationCanceledException)
            {
                // If it's not a user-requested cancellation, generate a failure message.
                if (!token.IsCancellationRequested)
                {
                    NotifyStatusChange(PageRequestStatusType.Failed, url, shortDescrip, null, suppressNotifications);
                }

                throw;
            }
            finally
            {
                ss.Release();
            }

            token.ThrowIfCancellationRequested();

            if (result == null)
            {
                NotifyStatusChange(PageRequestStatusType.Failed, url, shortDescrip, null, suppressNotifications);
                return(null);
            }

            if (shouldCache == ShouldCache.Yes)
            {
                Cache.Add(url, result, expires);
            }

            NotifyStatusChange(PageRequestStatusType.Loaded, url, shortDescrip, null, suppressNotifications);

            return(result);
        }
Esempio n. 4
0
        /// <summary>
        /// Gets the content of the requested page.
        /// </summary>
        /// <param name="url">The URL to load.</param>
        /// <param name="shortDescrip">The short description of the page (for notifications).</param>
        /// <param name="caching">The caching mode.</param>
        /// <param name="shouldCache">Whether the requested page should be cached.</param>
        /// <param name="suppressNotifications">Whether to suppress notifications.</param>
        /// <param name="token">The cancellation token.</param>
        /// <returns>Returns the loaded resource string.</returns>
        private async Task <string> GetPageContent(string url, string shortDescrip, CachingMode caching, ShouldCache shouldCache,
                                                   SuppressNotifications suppressNotifications, CancellationToken token)
        {
            var(uri, url2) = GetVerifiedUrl(url);

            var(found, content) = GetCachedContent(url2, caching);

            if (found)
            {
                NotifyStatusChange(PageRequestStatusType.LoadedFromCache, url2, shortDescrip, null, suppressNotifications);
            }
            else
            {
                content = await GetUrlContent(uri, url2, shortDescrip, shouldCache, suppressNotifications, token).ConfigureAwait(false) ?? string.Empty;
            }

            return(content);
        }
Esempio n. 5
0
        /// <summary>
        /// Gets the XML page.
        /// </summary>
        /// <param name="url">The URL of the page to load.  Cannot be null.</param>
        /// <param name="shortDescrip">A short description that can be used in status updates.  If null, no update will be given.</param>
        /// <param name="caching">Indicator of whether to query the cache for the requested page.</param>
        /// <param name="shouldCache">Indicates whether the result of this page load should be cached.</param>
        /// <param name="suppressNotifications">Indicates whether notification messages should be sent to output.</param>
        /// <param name="token">Cancellation token.</param>
        /// <returns>Returns an XML document, if it can be loaded.</returns>
        public async Task <XDocument?> GetXmlDocumentAsync(string url, string shortDescrip, CachingMode caching, ShouldCache shouldCache,
                                                           SuppressNotifications suppressNotifications, CancellationToken token)
        {
            logger.LogInformation($"Requested XML document \"{shortDescrip}\"");
            XDocument?xmldoc = null;

            string content = await GetPageContent(url, shortDescrip, caching, shouldCache, suppressNotifications, token).ConfigureAwait(false);

            if (!string.IsNullOrEmpty(content))
            {
                logger.LogInformation($"\"{shortDescrip}\" successfully loaded.");
                xmldoc = XDocument.Parse(content);
                logger.LogDebug($"\"{shortDescrip}\" successfully parsed into XDocument.");
            }

            return(xmldoc);
        }
Esempio n. 6
0
        /// <summary>
        /// Asynchronously load a specific web page.
        /// </summary>
        /// <param name="url">The URL of the page to load.  Cannot be null.</param>
        /// <param name="shortDescrip">A short description that can be used in status updates.  If null, no update will be given.</param>
        /// <param name="caching">Indicator of whether to query the cache for the requested page.</param>
        /// <param name="shouldCache">Indicates whether the result of this page load should be cached.</param>
        /// <param name="suppressNotifications">Indicates whether notification messages should be sent to output.</param>
        /// <param name="token">Cancellation token.</param>
        /// <returns>
        /// Returns an HTML document, if it can be loaded.
        /// </returns>
        public async Task <HtmlDocument?> GetHtmlDocumentAsync(string url, string shortDescrip, CachingMode caching, ShouldCache shouldCache,
                                                               SuppressNotifications suppressNotifications, CancellationToken token)
        {
            logger.LogInformation($"Requested HTML document \"{shortDescrip}\"");
            HtmlDocument?htmldoc = null;

            string content = await GetPageContent(url, shortDescrip, caching, shouldCache, suppressNotifications, token).ConfigureAwait(false);

            if (!string.IsNullOrEmpty(content))
            {
                logger.LogInformation($"\"{shortDescrip}\" successfully loaded from web.");
                htmldoc = new HtmlDocument();

                await Task.Run(() => htmldoc.LoadHtml(content), token).ConfigureAwait(false);

                logger.LogDebug($"\"{shortDescrip}\" successfully parsed into HtmlDocument.");
            }

            return(htmldoc);
        }
Esempio n. 7
0
        /// <summary>
        /// Gets the XML page.
        /// </summary>
        /// <param name="url">The URL of the page to load.  Cannot be null.</param>
        /// <param name="shortDescrip">A short description that can be used in status updates.  If null, no update will be given.</param>
        /// <param name="caching">Indicator of whether to query the cache for the requested page.</param>
        /// <param name="shouldCache">Indicates whether the result of this page load should be cached.</param>
        /// <param name="suppressNotifications">Indicates whether notification messages should be sent to output.</param>
        /// <param name="token">Cancellation token.</param>
        /// <returns>Returns an XML document, if it can be loaded.</returns>
        public async Task <XDocument> GetXmlPage(string url, string shortDescrip, CachingMode caching, ShouldCache shouldCache,
                                                 SuppressNotifications suppressNotifications, CancellationToken token)
        {
            XDocument xmldoc = null;

            string content = await GetPageContent(url, shortDescrip, caching, shouldCache, suppressNotifications, token).ConfigureAwait(false);

            if (!string.IsNullOrEmpty(content))
            {
                xmldoc = XDocument.Parse(content);
            }

            return(xmldoc);
        }
Esempio n. 8
0
        /// <summary>
        /// Asynchronously load a specific web page.
        /// </summary>
        /// <param name="url">The URL of the page to load.  Cannot be null.</param>
        /// <param name="shortDescrip">A short description that can be used in status updates.  If null, no update will be given.</param>
        /// <param name="caching">Indicator of whether to query the cache for the requested page.</param>
        /// <param name="shouldCache">Indicates whether the result of this page load should be cached.</param>
        /// <param name="suppressNotifications">Indicates whether notification messages should be sent to output.</param>
        /// <param name="token">Cancellation token.</param>
        /// <returns>
        /// Returns an HTML document, if it can be loaded.
        /// </returns>
        public async Task <HtmlDocument> GetPage(string url, string shortDescrip, CachingMode caching, ShouldCache shouldCache,
                                                 SuppressNotifications suppressNotifications, CancellationToken token)
        {
            HtmlDocument htmldoc = null;

            string content = await GetPageContent(url, shortDescrip, caching, shouldCache, suppressNotifications, token).ConfigureAwait(false);

            if (!string.IsNullOrEmpty(content))
            {
                htmldoc = new HtmlDocument();

                await Task.Run(() => htmldoc.LoadHtml(content), token).ConfigureAwait(false);
            }

            return(htmldoc);
        }
Esempio n. 9
0
        /// <summary>
        /// Loads the HEAD of the requested URL, and returns the response URL value.
        /// For a site that redirects some queries, this allows you to get the 'real' URL for a given short URL.
        /// </summary>
        /// <param name="url">The URL of the page to load.  Cannot be null.</param>
        /// <param name="shortDescrip">A short description that can be used in status updates.  If null, no update will be given.</param>
        /// <param name="caching">Indicator of whether to query the cache for the requested page.</param>
        /// <param name="shouldCache">Indicates whether the result of this page load should be cached.</param>
        /// <param name="suppressNotifications">Indicates whether notification messages should be sent to output.</param>
        /// <param name="token">Cancellation token.</param>
        /// <returns>
        /// Returns the URL that the response headers say we requested.
        /// </returns>
        /// <exception cref="System.ArgumentNullException">url</exception>
        /// <exception cref="System.ArgumentException">url</exception>
        public async Task <string> GetHeaderUrl(string url, string shortDescrip,
                                                CachingMode caching, ShouldCache shouldCache, SuppressNotifications suppressNotifications, CancellationToken token)
        {
            if (string.IsNullOrEmpty(url))
            {
                throw new ArgumentNullException(nameof(url));
            }

            if (!Uri.IsWellFormedUriString(url, UriKind.Absolute))
            {
                throw new ArgumentException($"Url is not valid: {url}", nameof(url));
            }

            Uri uri = new Uri(url);

            NotifyStatusChange(PageRequestStatusType.Requested, url, shortDescrip, null, suppressNotifications);

            // Limit to no more than N parallel requests
            await ss.WaitAsync(token).ConfigureAwait(false);

            try
            {
                Cookie cookie = ForumCookies.GetCookie(uri);
                if (cookie != null)
                {
                    ClientHandler.CookieContainer.Add(uri, cookie);
                }

                int tries = 0;
                HttpResponseMessage response;
                HttpRequestMessage  request = new HttpRequestMessage(HttpMethod.Head, uri);

                while (tries < retryLimit && token.IsCancellationRequested == false)
                {
                    if (tries > 0)
                    {
                        // If we have to retry loading the page, give it a short delay.
                        await Task.Delay(TimeSpan.FromSeconds(4)).ConfigureAwait(false);

                        NotifyStatusChange(PageRequestStatusType.Retry, url, shortDescrip, null, suppressNotifications);
                    }
                    tries++;

                    try
                    {
                        // As long as we got a response (whether 200 or 404), we can extract what
                        // the server thinks the URL should be.
                        using (response = await client.SendAsync(request, token).ConfigureAwait(false))
                        {
                            return(response.RequestMessage.RequestUri.AbsoluteUri);
                        }
                    }
                    catch (HttpRequestException e)
                    {
                        NotifyStatusChange(PageRequestStatusType.Error, url, shortDescrip, e, suppressNotifications);
                        throw;
                    }
                }
            }
            finally
            {
                ss.Release();
            }

            NotifyStatusChange(PageRequestStatusType.Loaded, url, shortDescrip, null, suppressNotifications);
            return(null);
        }
Esempio n. 10
0
        /// <summary>
        /// Asynchronously load a specific page.
        /// </summary>
        /// <param name="url">The URL of the page to load.  Cannot be null.</param>
        /// <param name="shortDescrip">A short description that can be used in status updates.  If null, no update will be given.</param>
        /// <param name="caching">Indicator of whether to query the cache for the requested page.</param>
        /// <param name="token">Cancellation token.</param>
        /// <param name="shouldCache">Indicates whether the result of this page load should be cached.</param>
        /// <returns>Returns an HTML document, if it can be loaded.</returns>
        /// <exception cref="ArgumentNullException">If url is null or empty.</exception>
        /// <exception cref="ArgumentException">If url is not a valid absolute url.</exception>
        public async Task <HtmlDocument> GetPage(string url, string shortDescrip,
                                                 CachingMode caching, ShouldCache shouldCache, SuppressNotifications suppressNotifications, CancellationToken token)
        {
            if (string.IsNullOrEmpty(url))
            {
                throw new ArgumentNullException(nameof(url));
            }

            if (!Uri.IsWellFormedUriString(url, UriKind.Absolute))
            {
                throw new ArgumentException($"Url is not valid: {url}", nameof(url));
            }

            Uri uri = new Uri(url);

            url = Uri.UnescapeDataString(url);
            HtmlDocument htmldoc = null;
            string       result  = null;
            int          tries   = 0;

            // Try to load from cache first, if allowed.
            if (caching == CachingMode.UseCache)
            {
                htmldoc = await Cache.GetAsync(url).ConfigureAwait(false);

                if (htmldoc != null)
                {
                    NotifyStatusChange(PageRequestStatusType.LoadedFromCache, url, shortDescrip, null, suppressNotifications);
                    return(htmldoc);
                }
            }

            NotifyStatusChange(PageRequestStatusType.Requested, url, shortDescrip, null, suppressNotifications);

            // Limit to no more than N parallel requests
            await ss.WaitAsync(token).ConfigureAwait(false);

            try
            {
                Cookie cookie = ForumCookies.GetCookie(uri);
                if (cookie != null)
                {
                    ClientHandler.CookieContainer.Add(uri, cookie);
                }

                HttpResponseMessage        response;
                Task <HttpResponseMessage> getResponseTask = null;

                do
                {
                    token.ThrowIfCancellationRequested();

                    if (tries > 0)
                    {
                        // Delay any additional attempts after the first.
                        await Task.Delay(retryDelay, token).ConfigureAwait(false);

                        // Notify the user if we're re-trying to load the page.
                        NotifyStatusChange(PageRequestStatusType.Retry, url, shortDescrip, null, suppressNotifications);
                    }

                    tries++;

                    try
                    {
                        getResponseTask = client.GetAsync(uri, token).TimeoutAfter(timeout, token);
                        Debug.WriteLine($"Get URI {uri} task ID: {getResponseTask.Id}");

                        using (response = await getResponseTask.ConfigureAwait(false))
                        {
                            if (response.IsSuccessStatusCode)
                            {
                                result = await response.Content.ReadAsStringAsync().ConfigureAwait(false);

                                // If we get a successful result, we're done.
                                break;
                            }
                            else if (PageLoadFailed(response))
                            {
                                NotifyStatusChange(PageRequestStatusType.Failed, url,
                                                   GetFailureMessage(response, shortDescrip, url), null, suppressNotifications);
                                return(null);
                            }
                            else if (PageWasMoved(response))
                            {
                                url = response.Content.Headers.ContentLocation.AbsoluteUri;
                                uri = new Uri(url);
                            }
                        }
                    }
                    catch (OperationCanceledException)
                    {
                        if (token.IsCancellationRequested)
                        {
                            // user request
                            throw;
                        }
                        else
                        {
                            // timeout via cancellation
                            Debug.WriteLine($"Attempt to load {shortDescrip} timed out/self-cancelled (TA). Tries={tries}");
                        }
                    }
                    catch (TimeoutException)
                    {
                        Debug.WriteLine($"Attempt to load {shortDescrip} timed out. Tries={tries}");
                    }
                    catch (HttpRequestException e)
                    {
                        NotifyStatusChange(PageRequestStatusType.Error, url, shortDescrip, e, suppressNotifications);
                        throw;
                    }
                } while (tries < retryLimit);

                Debug.WriteLine($"Finished getting URI {uri} task ID: {getResponseTask.Id}");

                if (result == null && tries >= retryLimit)
                {
                    client.CancelPendingRequests();
                }
            }
            catch (OperationCanceledException)
            {
                // If it's not a user-requested cancellation, generate a failure message.
                if (!token.IsCancellationRequested)
                {
                    NotifyStatusChange(PageRequestStatusType.Failed, url, shortDescrip, null, suppressNotifications);
                }

                throw;
            }
            finally
            {
                ss.Release();
            }

            token.ThrowIfCancellationRequested();

            if (result == null)
            {
                NotifyStatusChange(PageRequestStatusType.Failed, url, shortDescrip, null, suppressNotifications);
                return(null);
            }

            if (shouldCache == ShouldCache.Yes)
            {
                Cache.Add(url, result);
            }

            htmldoc = new HtmlDocument();
            await Task.Run(() => htmldoc.LoadHtml(result)).ConfigureAwait(false);

            NotifyStatusChange(PageRequestStatusType.Loaded, url, shortDescrip, null, suppressNotifications);

            return(htmldoc);
        }