public async Task <PageReadResult> GetSongsFromPageAsync(Uri uri, CancellationToken cancellationToken) { if (uri == null) { throw new ArgumentNullException(nameof(uri), "uri cannot be null in ScoreSaberReader.GetSongsFromPageAsync"); } IWebResponseMessage response = null; try { response = await WebUtils.WebClient.GetAsync(uri, cancellationToken).ConfigureAwait(false); response.EnsureSuccessStatusCode(); var pageText = await response.Content.ReadAsStringAsync().ConfigureAwait(false); return(GetSongsFromPageText(pageText, uri)); } catch (WebClientException ex) { return(PageReadResult.FromWebClientException(ex, uri)); } catch (Exception ex) { string message = $"Uncaught error getting page {uri?.ToString()}: {ex.Message}"; Logger?.Error(message); return(new PageReadResult(uri, null, new FeedReaderException(message, ex, FeedReaderFailureCode.PageFailed), PageErrorType.Unknown)); } finally { response?.Dispose(); response = null; } }
public static async Task <PageReadResult> GetSongsFromPageAsync(Uri uri, CancellationToken cancellationToken) { if (uri == null) { throw new ArgumentNullException(nameof(uri), "uri cannot be null in BeatSaverReader.GetSongsFromPageAsync."); } string pageText = string.Empty; var songs = new List <ScrapedSong>(); IWebResponseMessage response = null; try { response = await WebUtils.GetBeatSaverAsync(uri, cancellationToken).ConfigureAwait(false); response.EnsureSuccessStatusCode(); pageText = await response.Content.ReadAsStringAsync().ConfigureAwait(false); } catch (WebClientException ex) { return(PageReadResult.FromWebClientException(ex, uri)); } catch (OperationCanceledException ex) { return(new PageReadResult(uri, null, ex, PageErrorType.Cancelled)); } finally { response?.Dispose(); response = null; } foreach (var song in ParseSongsFromPage(pageText, uri)) { songs.Add(song); } return(new PageReadResult(uri, songs)); }
/// <summary> /// /// </summary> /// <param name="page"></param> /// <param name="cancellationToken"></param> /// <exception cref="InvalidFeedSettingsException">Thrown when the feed's settings aren't valid.</exception> /// <returns></returns> public async Task <PageReadResult> GetSongsAsync(Uri uri, CancellationToken cancellationToken) { string pageText = ""; Dictionary <string, ScrapedSong> songs = new Dictionary <string, ScrapedSong>(); Logger.Debug($"Getting songs from '{uri}'"); IWebResponseMessage?response = null; try { response = await WebUtils.WebClient.GetAsync(uri, cancellationToken).ConfigureAwait(false); response.EnsureSuccessStatusCode(); pageText = await response.Content.ReadAsStringAsync().ConfigureAwait(false); } catch (OperationCanceledException ex) { return(PageReadResult.CancelledResult(uri, ex)); } catch (WebClientException ex) { string errorText = string.Empty; int statusCode = ex?.Response?.StatusCode ?? 0; if (statusCode != 0) { switch (statusCode) { case 404: errorText = $"{uri.ToString()} was not found."; break; case 408: errorText = $"Timeout getting first page in ScoreSaberReader: {uri}: {ex.Message}"; break; default: errorText = $"Site Error getting first page in ScoreSaberReader: {uri}: {ex.Message}"; break; } } Logger?.Debug(errorText); // No need for a stacktrace if it's one of these errors. if (!(statusCode == 404 || statusCode == 408 || statusCode == 500)) { Logger?.Debug($"{ex.Message}\n{ex.StackTrace}"); } return(PageReadResult.FromWebClientException(ex, uri)); } catch (Exception ex) { string message = $"Uncaught error getting the first page in ScoreSaberReader.GetSongsFromScoreSaberAsync(): {ex.Message}"; return(new PageReadResult(uri, new List <ScrapedSong>(), null, null, 0, new FeedReaderException(message, ex, FeedReaderFailureCode.SourceFailed), PageErrorType.Unknown)); } finally { response?.Dispose(); response = null; } bool isLastPage; ScrapedSong?firstSong = null; ScrapedSong?lastSong = null; int songsOnPage = 0; try { List <ScrapedSong>?diffs = GetSongsFromPageText(pageText, uri, Settings.StoreRawData || StoreRawData); firstSong = diffs?.FirstOrDefault(); lastSong = diffs?.LastOrDefault(); songsOnPage = diffs?.Count ?? 0; isLastPage = (diffs?.Count ?? 0) < SongsPerPage; foreach (ScrapedSong?diff in diffs) { if (!songs.ContainsKey(diff.Hash) && (Settings.Filter == null || Settings.Filter(diff))) { songs.Add(diff.Hash, diff); } if (Settings.StopWhenAny != null && Settings.StopWhenAny(diff)) { isLastPage = true; } } } catch (JsonReaderException ex) { string message = "Unable to parse JSON from text"; Logger?.Debug($"{message}: {ex.Message}\n{ex.StackTrace}"); return(new PageReadResult(uri, null, firstSong, lastSong, songsOnPage, new FeedReaderException(message, ex, FeedReaderFailureCode.PageFailed), PageErrorType.ParsingError)); } catch (Exception ex) { string message = $"Unhandled exception from GetSongsFromPageText() while parsing {uri}"; Logger?.Debug($"{message}: {ex.Message}\n{ex.StackTrace}"); return(new PageReadResult(uri, null, firstSong, lastSong, songsOnPage, new FeedReaderException(message, ex, FeedReaderFailureCode.PageFailed), PageErrorType.ParsingError)); } return(new PageReadResult(uri, songs.Values.ToList(), firstSong, lastSong, songsOnPage, isLastPage)); }
/// <summary> /// /// </summary> /// <param name="page"></param> /// <param name="cancellationToken"></param> /// <exception cref="InvalidFeedSettingsException">Thrown when the feed's settings aren't valid.</exception> /// <returns></returns> public async Task <PageReadResult> GetSongsAsync(Uri uri, CancellationToken cancellationToken) { string pageText; JObject result; List <ScrapedSong> newSongs; Logger.Debug($"Getting songs from '{uri}'"); //int? lastPage; bool isLastPage = false; IWebResponseMessage?response = null; ScrapedSong? firstSong = null; ScrapedSong? lastSong = null; int songsOnPage = 0; try { response = await WebUtils.GetBeatSaverAsync(uri, cancellationToken).ConfigureAwait(false); response.EnsureSuccessStatusCode(); #pragma warning disable CS8602 // Dereference of a possibly null reference. pageText = await response.Content.ReadAsStringAsync().ConfigureAwait(false); #pragma warning restore CS8602 // Dereference of a possibly null reference. result = JObject.Parse(pageText); if (result?["docs"] == null) { Logger?.Warning($"Error checking Beat Saver's {Name} feed."); return(new PageReadResult(uri, null, null, null, 0, new FeedReaderException($"Error getting page in BeatSaverFeed.GetSongsFromPageAsync()", null, FeedReaderFailureCode.PageFailed), PageErrorType.ParsingError)); } //if (configuredLastPage > 0) // isLastPage = Math.Min(configuredLastPage, lastPage.Value) <= page; //else // isLastPage = page >= lastPage.Value; newSongs = new List <ScrapedSong>(); var scrapedSongs = BeatSaverReader.ParseSongsFromJson(result, uri, Settings.StoreRawData || StoreRawData); firstSong = scrapedSongs.FirstOrDefault(); lastSong = scrapedSongs.LastOrDefault(); songsOnPage = scrapedSongs.Count; foreach (var song in scrapedSongs) { if (Settings.Filter == null || Settings.Filter(song)) { newSongs.Add(song); } if (Settings.StopWhenAny != null && Settings.StopWhenAny(song)) { isLastPage = true; break; } } if (scrapedSongs.Count == 0) { isLastPage = true; } } catch (WebClientException ex) { string errorText = string.Empty; if (ex.Response != null) { switch (ex.Response.StatusCode) { case 408: errorText = "Timeout"; break; default: errorText = "Site Error"; break; } } string message = $"{errorText} getting a response from {uri}: {ex.Message}"; Logger?.Debug(message); Logger?.Debug($"{ex.Message}\n{ex.StackTrace}"); return(PageReadResult.FromWebClientException(ex, uri)); } catch (JsonReaderException ex) { string message = $"Unable to parse JSON from text on page {uri.ToString()}"; Logger?.Debug(message); Logger?.Debug($"{ex.Message}\n{ex.StackTrace}"); return(new PageReadResult(uri, null, firstSong, lastSong, songsOnPage, new FeedReaderException(message, ex, FeedReaderFailureCode.SourceFailed), PageErrorType.ParsingError)); } catch (OperationCanceledException ex) { return(PageReadResult.CancelledResult(uri, ex)); } catch (Exception ex) { string message = $"Uncaught error getting page {uri} in BeatSaverFeed.GetSongsFromPageAsync(): {ex.Message}"; return(new PageReadResult(uri, null, firstSong, lastSong, songsOnPage, new FeedReaderException(message, ex, FeedReaderFailureCode.SourceFailed), PageErrorType.ParsingError)); } finally { response?.Dispose(); response = null; } //if (lastPage.HasValue && !isLastPage) // return new BeatSaverPageResult(pageUri, newSongs, page, lastPage.Value); //else return(new PageReadResult(uri, newSongs, firstSong, lastSong, songsOnPage, isLastPage)); }
// TODO: Abort early when bsaber.com is down (check if all items in block failed?) // TODO: Make cancellationToken actually do something. /// <summary> /// Gets all songs from the feed defined by the provided settings. /// </summary> /// <param name="settings"></param> /// <param name="cancellationToken"></param> /// <exception cref="InvalidCastException">Thrown when the passed IFeedSettings isn't a BeastSaberFeedSettings.</exception> /// <exception cref="ArgumentException">Thrown when trying to access a feed that requires a username and the username wasn't provided.</exception> /// <exception cref="OperationCanceledException"></exception> /// <returns></returns> public async Task <FeedResult> GetSongsFromFeedAsync(IFeedSettings settings, CancellationToken cancellationToken) { if (cancellationToken.IsCancellationRequested) { return(FeedResult.CancelledResult); } if (settings == null) { throw new ArgumentNullException(nameof(settings), "settings cannot be null for BeastSaberReader.GetSongsFromFeedAsync."); } Dictionary <string, ScrapedSong> retDict = new Dictionary <string, ScrapedSong>(); if (!(settings is BeastSaberFeedSettings _settings)) { throw new InvalidCastException(INVALIDFEEDSETTINGSMESSAGE); } if (_settings.FeedIndex != 2 && string.IsNullOrEmpty(_username?.Trim())) { //Logger?.Error($"Can't access feed without a valid username in the config file"); throw new ArgumentException("Cannot access this feed without a valid username."); } int pageIndex = settings.StartingPage; int maxPages = _settings.MaxPages; bool useMaxSongs = _settings.MaxSongs != 0; bool useMaxPages = maxPages != 0; if (useMaxPages && pageIndex > 1) { maxPages = maxPages + pageIndex - 1; } var ProcessPageBlock = new TransformBlock <Uri, PageReadResult>(async feedUri => { Stopwatch sw = new Stopwatch(); sw.Start(); //Logger?.Debug($"Checking URL: {feedUrl}"); string pageText = ""; ContentType contentType = ContentType.Unknown; string contentTypeStr = string.Empty; IWebResponseMessage response = null; try { response = await WebUtils.WebClient.GetAsync(feedUri, cancellationToken).ConfigureAwait(false); if ((response?.StatusCode ?? 500) == 500) { response?.Dispose(); response = null; Logger?.Warning($"Internal server error on {feedUri}, retrying in 20 seconds"); await Task.Delay(20000).ConfigureAwait(false); response = await WebUtils.WebClient.GetAsync(feedUri, cancellationToken).ConfigureAwait(false); } response.EnsureSuccessStatusCode(); contentTypeStr = response.Content.ContentType.ToLower(); if (ContentDictionary.ContainsKey(contentTypeStr)) { contentType = ContentDictionary[contentTypeStr]; } else { contentType = ContentType.Unknown; } pageText = await response.Content.ReadAsStringAsync().ConfigureAwait(false); } catch (WebClientException ex) { return(PageReadResult.FromWebClientException(ex, feedUri)); } catch (OperationCanceledException) { return(new PageReadResult(feedUri, null, new FeedReaderException("Page read was cancelled.", new OperationCanceledException(), FeedReaderFailureCode.Cancelled), PageErrorType.Cancelled)); } catch (Exception ex) { string message = $"Error downloading {feedUri} in TransformBlock."; Logger?.Debug(message); Logger?.Debug($"{ex.Message}\n{ex.StackTrace}"); return(new PageReadResult(feedUri, null, new FeedReaderException(message, ex, FeedReaderFailureCode.PageFailed), PageErrorType.Unknown)); } finally { response?.Dispose(); response = null; } List <ScrapedSong> newSongs = null; try { newSongs = GetSongsFromPageText(pageText, feedUri, contentType); } catch (JsonReaderException ex) { // TODO: Probably don't need a logger message here, caller can deal with it. string message = $"Error parsing page text for {feedUri} in TransformBlock."; Logger?.Debug(message); Logger?.Debug($"{ex.Message}\n{ex.StackTrace}"); return(new PageReadResult(feedUri, null, new FeedReaderException(message, ex, FeedReaderFailureCode.PageFailed), PageErrorType.ParsingError)); } catch (XmlException ex) { // TODO: Probably don't need a logger message here, caller can deal with it. string message = $"Error parsing page text for {feedUri} in TransformBlock."; Logger?.Debug(message); Logger?.Debug($"{ex.Message}\n{ex.StackTrace}"); return(new PageReadResult(feedUri, null, new FeedReaderException(message, ex, FeedReaderFailureCode.PageFailed), PageErrorType.ParsingError)); } catch (Exception ex) { // TODO: Probably don't need a logger message here, caller can deal with it. string message = $"Uncaught error parsing page text for {feedUri} in TransformBlock."; Logger?.Debug(message); Logger?.Debug($"{ex.Message}\n{ex.StackTrace}"); return(new PageReadResult(feedUri, null, new FeedReaderException(message, ex, FeedReaderFailureCode.PageFailed), PageErrorType.Unknown)); } sw.Stop(); //Logger?.Debug($"Task for {feedUrl} completed in {sw.ElapsedMilliseconds}ms"); return(new PageReadResult(feedUri, newSongs)); }, new ExecutionDataflowBlockOptions { MaxDegreeOfParallelism = MaxConcurrency, BoundedCapacity = MaxConcurrency, CancellationToken = cancellationToken //#if NETSTANDARD // , EnsureOrdered = true //#endif }); bool continueLooping = true; int itemsInBlock = 0; List <PageReadResult> pageResults = new List <PageReadResult>(maxPages + 2); do { if (cancellationToken.IsCancellationRequested) { continueLooping = false; } while (continueLooping) { if (Utilities.IsPaused) { await Utilities.WaitUntil(() => !Utilities.IsPaused, 500, cancellationToken).ConfigureAwait(false); } if (cancellationToken.IsCancellationRequested) { continueLooping = false; break; } var feedUrl = GetPageUri(Feeds[_settings.Feed].BaseUrl, pageIndex); await ProcessPageBlock.SendAsync(feedUrl, cancellationToken).ConfigureAwait(false); // TODO: Need check with SongsPerPage itemsInBlock++; pageIndex++; if ((pageIndex > maxPages && useMaxPages) || cancellationToken.IsCancellationRequested) { continueLooping = false; } // TODO: Better http error handling, what if only a single page is broken and returns 0 songs? while (ProcessPageBlock.OutputCount > 0 || itemsInBlock == MaxConcurrency || !continueLooping) { if (cancellationToken.IsCancellationRequested) { continueLooping = false; break; } if (itemsInBlock <= 0) { break; } await ProcessPageBlock.OutputAvailableAsync(cancellationToken).ConfigureAwait(false); while (ProcessPageBlock.TryReceive(out PageReadResult pageResult)) { if (pageResult != null) { pageResults.Add(pageResult); } if (Utilities.IsPaused) { await Utilities.WaitUntil(() => !Utilities.IsPaused, 500, cancellationToken).ConfigureAwait(false); } itemsInBlock--; if (pageResult == null || pageResult.Count == 0) // TODO: This will trigger if a single page has an error. { Logger?.Debug("Received no new songs, last page reached."); ProcessPageBlock.Complete(); itemsInBlock = 0; continueLooping = false; break; } if (pageResult.Count > 0) { Logger?.Debug($"Receiving {pageResult.Count} potential songs from {pageResult.Uri}"); } else { Logger?.Debug($"Did not find any songs in {Name}.{settings.FeedName}."); } // TODO: Process PageReadResults for better error feedback. foreach (var song in pageResult.Songs) { if (!retDict.ContainsKey(song.Hash)) { if (retDict.Count < settings.MaxSongs || settings.MaxSongs == 0) { retDict.Add(song.Hash, song); } if (retDict.Count >= settings.MaxSongs && useMaxSongs) { continueLooping = false; } } } if (!useMaxPages || pageIndex <= maxPages) { if (retDict.Count < settings.MaxSongs) { continueLooping = true; } } } } } }while (continueLooping); return(new FeedResult(retDict, pageResults)); }
/// <summary> /// /// </summary> /// <param name="page"></param> /// <param name="cancellationToken"></param> /// <exception cref="InvalidFeedSettingsException">Thrown when the feed's settings aren't valid.</exception> /// <returns></returns> public async Task <PageReadResult> GetSongsAsync(Uri pageUri, CancellationToken cancellationToken) { Stopwatch sw = new Stopwatch(); sw.Start(); string pageText = ""; bool isLastPage = false; Logger.Debug($"Getting songs from '{pageUri}'"); ContentType contentType = ContentType.Unknown; string? contentTypeStr; IWebResponseMessage?response = null; //PageReadResult result = null; try { response = await WebUtils.WebClient.GetAsync(pageUri, cancellationToken).ConfigureAwait(false); if ((response?.StatusCode ?? 500) == 500) { response?.Dispose(); response = null; Logger?.Warning($"Internal server error on {pageUri}, retrying in 20 seconds"); await Task.Delay(20000).ConfigureAwait(false); response = await WebUtils.WebClient.GetAsync(pageUri, cancellationToken).ConfigureAwait(false); } if (response == null) { throw new WebClientException($"Response was null for '{pageUri}'."); } response.EnsureSuccessStatusCode(); contentTypeStr = response.Content?.ContentType?.ToLower(); if (contentTypeStr != null && ContentDictionary.ContainsKey(contentTypeStr)) { contentType = ContentDictionary[contentTypeStr]; } else { contentType = ContentType.Unknown; } if (response.Content == null) { throw new WebClientException($"Response content was null for '{pageUri}'."); } pageText = await response.Content.ReadAsStringAsync().ConfigureAwait(false); } catch (WebClientException ex) { return(PageReadResult.FromWebClientException(ex, pageUri)); } catch (OperationCanceledException) { return(new PageReadResult(pageUri, null, null, null, 0, new FeedReaderException("Page read was cancelled.", new OperationCanceledException(), FeedReaderFailureCode.Cancelled), PageErrorType.Cancelled)); } catch (Exception ex) { string message = $"Error downloading {pageUri} in TransformBlock."; Logger?.Debug(message); Logger?.Debug($"{ex.Message}\n{ex.StackTrace}"); return(new PageReadResult(pageUri, null, null, null, 0, new FeedReaderException(message, ex, FeedReaderFailureCode.PageFailed), PageErrorType.Unknown)); } finally { response?.Dispose(); response = null; } List <ScrapedSong> newSongs; ScrapedSong?firstSong = null; ScrapedSong?lastSong = null; int songsOnPage = 0; try { var scrapedSongs = GetSongsFromPageText(pageText, pageUri, contentType, Settings.StoreRawData || StoreRawData); isLastPage = scrapedSongs.Count == 0; firstSong = scrapedSongs.FirstOrDefault(); lastSong = scrapedSongs.LastOrDefault(); songsOnPage = scrapedSongs.Count; newSongs = new List <ScrapedSong>(); foreach (var song in scrapedSongs) { if (Settings.Filter == null || Settings.Filter(song)) { newSongs.Add(song); } if (Settings.StopWhenAny != null && Settings.StopWhenAny(song)) { isLastPage = true; } } } catch (JsonReaderException ex) { // TODO: Probably don't need a logger message here, caller can deal with it. string message = $"Error parsing page text for {pageUri} in TransformBlock."; Logger?.Debug(message); Logger?.Debug($"{ex.Message}\n{ex.StackTrace}"); return(new PageReadResult(pageUri, null, firstSong, lastSong, songsOnPage, new FeedReaderException(message, ex, FeedReaderFailureCode.PageFailed), PageErrorType.ParsingError)); } catch (XmlException ex) { // TODO: Probably don't need a logger message here, caller can deal with it. string message = $"Error parsing page text for {pageUri} in TransformBlock."; Logger?.Debug(message); Logger?.Debug($"{ex.Message}\n{ex.StackTrace}"); return(new PageReadResult(pageUri, null, firstSong, lastSong, songsOnPage, new FeedReaderException(message, ex, FeedReaderFailureCode.PageFailed), PageErrorType.ParsingError)); } catch (Exception ex) { // TODO: Probably don't need a logger message here, caller can deal with it. string message = $"Uncaught error parsing page text for {pageUri} in TransformBlock."; Logger?.Debug(message); Logger?.Debug($"{ex.Message}\n{ex.StackTrace}"); return(new PageReadResult(pageUri, null, firstSong, lastSong, songsOnPage, new FeedReaderException(message, ex, FeedReaderFailureCode.PageFailed), PageErrorType.Unknown)); } sw.Stop(); //Logger?.Debug($"Task for {feedUrl} completed in {sw.ElapsedMilliseconds}ms"); return(new PageReadResult(pageUri, newSongs, firstSong, lastSong, songsOnPage, isLastPage)); }