/// <summary> /// Collects the posts out of a quest based on the quest's configuration. /// </summary> /// <param name="quest">The quest to read.</param> /// <param name="token">The cancellation token.</param> /// <returns>Returns a list of posts extracted from the quest.</returns> public async Task <List <PostComponents> > ReadQuestAsync(IQuest quest, CancellationToken token) { IForumAdapter adapter = await GetForumAdapterAsync(quest, token).ConfigureAwait(false); if (adapter == null) { throw new InvalidOperationException("Unable to acquire forum adapter for the quest."); } ThreadRangeInfo rangeInfo = await GetStartInfoAsync(quest, adapter, token).ConfigureAwait(false); if (rangeInfo == null) { throw new InvalidOperationException("Unable to determine post range for the quest."); } List <Task <HtmlDocument> > loadedPages = await LoadQuestPagesAsync(quest, adapter, rangeInfo, token).ConfigureAwait(false); if (loadedPages == null) { throw new InvalidOperationException("Unable to load pages for the quest."); } List <PostComponents> posts = await GetPostsFromPagesAsync(quest, adapter, rangeInfo, loadedPages, token).ConfigureAwait(false); if (posts == null) { throw new InvalidOperationException("Unable to extract posts from quest pages."); } return(posts); }
/// <summary> /// Loads the HTML pages that are relevant to a quest's tally. /// </summary> /// <param name="quest">The quest being loaded.</param> /// <param name="adapter">The quest's forum adapter, used to forum the URLs to load.</param> /// <param name="threadRangeInfo">The range info that determines which pages to load.</param> /// <param name="token">The cancellation token.</param> /// <returns>Returns a list of tasks that are handling the async loading of the requested pages.</returns> private async Task <List <Task <HtmlDocument> > > LoadQuestPagesAsync( IQuest quest, IForumAdapter adapter, ThreadRangeInfo threadRangeInfo, CancellationToken token) { var scanInfo = await GetPagesToScanAsync(quest, adapter, threadRangeInfo, token).ConfigureAwait(false); int firstPageNumber = scanInfo.Item1; int lastPageNumber = scanInfo.Item2; int pagesToScan = scanInfo.Item3; // We will store the loaded pages in a new List. List <Task <HtmlDocument> > pages = new List <Task <HtmlDocument> >(); IPageProvider pageProvider = ViewModels.ViewModelService.MainViewModel.PageProvider; // Initiate the async tasks to load the pages if (pagesToScan > 0) { // Initiate tasks for all pages other than the first page (which we already loaded) var results = from pageNum in Enumerable.Range(firstPageNumber, pagesToScan) let pageUrl = adapter.GetUrlForPage(pageNum, quest.PostsPerPage) let shouldCache = (pageNum == lastPageNumber) ? ShouldCache.No : ShouldCache.Yes select pageProvider.GetPage(pageUrl, $"Page {pageNum}", CachingMode.UseCache, shouldCache, SuppressNotifications.No, token); pages.AddRange(results.ToList()); } return(pages); }
/// <summary> /// Get the last page number of the tally. This may be determined solely /// from the thread range info, or might require information from the /// provided first page, where we can extract how many pages are in the thread. /// </summary> /// <param name="quest">The quest being tallied.</param> /// <param name="adapter">The forum adapter that handles the quest's thread.</param> /// <param name="threadRangeInfo">The range of posts that are wanted in the tally.</param> /// <param name="firstPage">The first page of the tally, from which we can get the page range of the thread.</param> /// <returns>Returns the last page number of the tally.</returns> private async Task <int> GetLastPageNumber(IQuest quest, IForumAdapter2 adapter, ThreadRangeInfo threadRangeInfo, Task <HtmlDocument?> firstPage) { // Check for quick results first. if (threadRangeInfo.Pages > 0) { // If the page range has already been determined, use that. return(threadRangeInfo.Pages); } if (!quest.ReadToEndOfThread && !threadRangeInfo.IsThreadmarkSearchResult) { // If we're not reading to the end of the thread, just calculate // what the last page number will be. Pages to scan will be the // difference in pages +1. return(ThreadInfo.GetPageNumberOfPost(quest.EndPost, quest)); } // If we're reading to the end of the thread (end post 0, or based on a threadmark), // then we need to load the first page to find out how many pages there are in the thread. var page = await firstPage.ConfigureAwait(false); if (page == null) { throw new InvalidOperationException($"Unable to load first page of {quest.ThreadName}"); } return(adapter.GetThreadInfo(page).Pages); }
/// <summary> /// Acquire a list of page loading tasks for the pages that are intended /// to be tallied. /// </summary> /// <param name="quest">The quest for which the tally is being run.</param> /// <param name="adapter">The forum adapter that handles the quest's thread.</param> /// <param name="threadRangeInfo">The range of posts that are wanted in the tally.</param> /// <param name="token">A cancellation token.</param> /// <returns>Returns a list of page loading tasks.</returns> private async Task <List <Task <HtmlDocument?> > > LoadQuestPagesAsync( IQuest quest, IForumAdapter2 adapter, ThreadRangeInfo threadRangeInfo, IPageProvider pageProvider, CancellationToken token) { int firstPageNumber = threadRangeInfo.GetStartPage(quest); // Get the first page in order to find out how many pages are in the thread // Keep it as a task. Task <HtmlDocument?> firstPage = GetFirstPage(firstPageNumber, quest, adapter, pageProvider, token); // Get the last page number. int lastPageNumber = await GetLastPageNumber(quest, adapter, threadRangeInfo, firstPage).ConfigureAwait(false); // Initiate tasks for any remaining pages IEnumerable <Task <HtmlDocument?> > remainingPages = GetRemainingPages(firstPageNumber, lastPageNumber, quest, adapter, pageProvider, token); // Collect all the page load tasks (including the finished first page) to return to caller. List <Task <HtmlDocument?> > pagesToLoad = new List <Task <HtmlDocument?> >() { firstPage }; pagesToLoad.AddRange(remainingPages); return(pagesToLoad); }
/// <summary> /// Collects the posts out of a quest based on the quest's configuration. /// </summary> /// <param name="quest">The quest to read.</param> /// <param name="pageProvider">The page provider to use to read this quest.</param> /// <param name="token">The cancellation token.</param> /// <returns>Returns a list of posts extracted from the quest.</returns> private async Task <(string threadTitle, List <Post> posts)> ReadQuestAsyncImpl( IQuest quest, IPageProvider pageProvider, CancellationToken token) { logger.LogDebug($"Reading quest {quest.DisplayName} with ForumReader."); IForumAdapter2 adapter = await forumAdapterFactory.CreateForumAdapterAsync(quest, pageProvider, token).ConfigureAwait(false); logger.LogDebug($"Forum adapter created for {quest.DisplayName}."); SyncQuestWithForumAdapter(quest, adapter); logger.LogDebug($"Quest {quest.DisplayName} synced with forum adapter."); ThreadRangeInfo rangeInfo = await GetStartInfoAsync(quest, adapter, pageProvider, token).ConfigureAwait(false); logger.LogDebug($"Range info acquired for {quest.DisplayName}. ({rangeInfo})"); List <Task <HtmlDocument?> > loadingPages = await LoadQuestPagesAsync(quest, adapter, rangeInfo, pageProvider, token).ConfigureAwait(false); logger.LogDebug($"Got {loadingPages.Count} pages loading {quest.DisplayName}."); var(threadInfo, posts2) = await GetPostsFromPagesAsync(loadingPages, quest, adapter, rangeInfo).ConfigureAwait(false); logger.LogDebug($"Got {posts2.Count} posts for quest {quest.DisplayName}."); List <Post> filteredPosts = FilterPosts(posts2, quest, threadInfo, rangeInfo); logger.LogDebug($"Filtered to {filteredPosts.Count} posts for quest {quest.DisplayName}."); return(threadInfo.Title, filteredPosts); }
/// <summary> /// Gets the thread range info (page and post numbers) based on the quest configuration. /// May load pages (such as for checking threadmarks), so will use the ViewModel's page provider. /// </summary> /// <param name="quest">The quest we're getting thread info for.</param> /// <param name="adapter">The quest's forum adapter.</param> /// <param name="token">The cancellation token.</param> /// <returns>Returns the quest's thread range info.</returns> private async Task <ThreadRangeInfo> GetStartInfoAsync(IQuest quest, IForumAdapter adapter, CancellationToken token) { IPageProvider pageProvider = ViewModels.ViewModelService.MainViewModel.PageProvider; ThreadRangeInfo rangeInfo = await adapter.GetStartingPostNumberAsync(quest, pageProvider, token).ConfigureAwait(false); return(rangeInfo); }
/// <summary> /// Gets a list of posts from the provided pages from a quest. /// </summary> /// <param name="quest">The quest being tallied.</param> /// <param name="adapter">The quest's forum adapter.</param> /// <param name="rangeInfo">The thread range info for the tally.</param> /// <param name="pages">The pages that are being loaded.</param> /// <param name="token">The cancellation token.</param> /// <returns>Returns a list of PostComponents comprising the posts from the threads that fall within the specified range.</returns> private async Task <List <PostComponents> > GetPostsFromPagesAsync( IQuest quest, IForumAdapter adapter, ThreadRangeInfo rangeInfo, List <Task <HtmlDocument> > pages, CancellationToken token) { List <PostComponents> postsList = new List <PostComponents>(); var firstPageTask = pages.First(); while (pages.Any()) { var finishedPage = await Task.WhenAny(pages).ConfigureAwait(false); pages.Remove(finishedPage); if (finishedPage.IsCanceled) { throw new OperationCanceledException(); } // This will throw any pending exceptions that occurred while trying to load the page. // This removes the need to check for finishedPage.IsFaulted. var page = await finishedPage.ConfigureAwait(false); if (page == null) { Exception ae = new Exception("Not all pages loaded. Rerun tally."); ae.Data["Application"] = true; throw ae; } var posts = from post in adapter.GetPosts(page, quest) where post != null && post.IsVote && post.IsAfterStart(rangeInfo) && (quest.ReadToEndOfThread || rangeInfo.IsThreadmarkSearchResult || post.Number <= quest.EndPost) select post; postsList.AddRange(posts); } var firstPage = firstPageTask.Result; ThreadInfo threadInfo = adapter.GetThreadInfo(firstPage); ViewModelService.MainViewModel.VoteCounter.Title = threadInfo.Title; // Get all posts that are not filtered out, either explicitly, or (for the thread author) implicity. postsList = postsList .Where(p => ( (quest.UseCustomUsernameFilters && !quest.UsernameFilter.Match(p.Author)) || (!quest.UseCustomUsernameFilters && p.Author != threadInfo.Author)) && (!quest.UseCustomPostFilters || !(quest.PostsToFilter.Contains(p.Number) || quest.PostsToFilter.Contains(p.IDValue)) ) ) .Distinct().OrderBy(p => p.Number).ToList(); return(postsList); }
/// <summary> /// Determines the page number range that will be loaded for the quest. /// Returns a tuple of first page number, last page number, and pages to scan. /// </summary> /// <param name="quest">The quest being tallied.</param> /// <param name="adapter">The forum adapter for the quest.</param> /// <param name="threadRangeInfo">The thread range info, as provided by the adapter.</param> /// <param name="token">The cancellation token.</param> /// <returns>Returns a tuple of the page number info that was determined.</returns> private async Task <Tuple <int, int, int> > GetPagesToScanAsync( IQuest quest, IForumAdapter adapter, ThreadRangeInfo threadRangeInfo, CancellationToken token) { IPageProvider pageProvider = ViewModels.ViewModelService.MainViewModel.PageProvider; int firstPageNumber = threadRangeInfo.GetStartPage(quest); int lastPageNumber = 0; int pagesToScan = 0; if (threadRangeInfo.Pages > 0) { // If the startInfo obtained the thread pages info, just use that. lastPageNumber = threadRangeInfo.Pages; } else if (quest.ReadToEndOfThread || threadRangeInfo.IsThreadmarkSearchResult) { // If we're reading to the end of the thread (end post 0, or based on a threadmark), // then we need to load the first page to find out how many pages there are in the thread. // Make sure to bypass the cache, since it may have changed since the last load. string firstPageUrl = adapter.GetUrlForPage(firstPageNumber, quest.PostsPerPage); HtmlDocument page = await pageProvider.GetPage(firstPageUrl, $"Page {firstPageNumber}", CachingMode.BypassCache, ShouldCache.Yes, SuppressNotifications.No, token) .ConfigureAwait(false); if (page == null) { throw new InvalidOperationException($"Unable to load web page: {firstPageUrl}"); } lastPageNumber = adapter.GetThreadInfo(page).Pages; } else { // If we're not reading to the end of the thread, just calculate // what the last page number will be. Pages to scan will be the // difference in pages +1. lastPageNumber = quest.GetPageNumberOf(quest.EndPost); } pagesToScan = lastPageNumber - firstPageNumber + 1; Tuple <int, int, int> result = new Tuple <int, int, int>(firstPageNumber, lastPageNumber, pagesToScan); return(result); }
/// <summary> /// Gets all posts from the provided pages list. /// </summary> /// <param name="loadingPages">The pages that are being loaded for the tally.</param> /// <param name="quest">The quest being tallied.</param> /// <param name="adapter">The forum adapter that handles the quest's thread.</param> /// <returns>Returns all posts extracted from all pages provided, /// and the thread title.</returns> private async Task <(ThreadInfo threadInfo, List <Post> posts)> GetPostsFromPagesAsync( List <Task <HtmlDocument?> > loadingPages, IQuest quest, IForumAdapter2 adapter, ThreadRangeInfo threadRangeInfo) { ThreadInfo? threadInfo = null; List <Post> postsList = new List <Post>(); int pageNumber = threadRangeInfo.GetStartPage(quest) - 1; bool incomplete = false; foreach (var loadingPage in loadingPages) { var page = await loadingPage.ConfigureAwait(false); pageNumber++; if (page == null) { incomplete = true; continue; } if (threadInfo == null) { threadInfo = adapter.GetThreadInfo(page); } postsList.AddRange(adapter.GetPosts(page, quest, pageNumber)); } if (incomplete) { InvalidOperationException e = new InvalidOperationException("Unable to load all pages."); e.Data["Application"] = true; throw e; } if (threadInfo == null) { threadInfo = new ThreadInfo("Unknown", "Unknown", 0); } return(threadInfo, postsList); }
/// <summary> /// Run the provided post list through the various filters, as set /// by quest options and post numbers, etc. /// </summary> /// <param name="postsList">The list of posts to filter.</param> /// <param name="quest">The quest with relevant options.</param> /// <param name="threadInfo">Thread info provides the thread author.</param> /// <param name="rangeInfo">Range info provides information on the range of valid posts.</param> /// <returns>Returns a list of posts that satisfy the filtering criteria.</returns> private List <Post> FilterPosts(List <Post> postsList, IQuest quest, ThreadInfo threadInfo, ThreadRangeInfo rangeInfo) { // Remove any posts that are not votes, that aren't in the valid post range, or that // hit any filters the quest has set up. Then do a grouping to get distinct results. var filtered = from post in postsList where post.HasVote && (PostIsAfterStart(post, rangeInfo) && PostIsBeforeEnd(post, quest, rangeInfo)) && ((quest.UseCustomUsernameFilters && !quest.UsernameFilter.Match(post.Origin.Author)) || (!quest.UseCustomUsernameFilters && !string.Equals(post.Origin.Author, threadInfo.Author, StringComparison.Ordinal))) && (!quest.UseCustomPostFilters || !(quest.PostsToFilter.Contains(post.Origin.ThreadPostNumber) || quest.PostsToFilter.Contains(post.Origin.ID.Value))) // Group to deal with sticky posts that should only be processed once. group post by post.Origin.ThreadPostNumber into postNumGroup orderby postNumGroup.Key select postNumGroup.First(); return(filtered.ToList()); }
/// <summary> /// Check whether the given post is before the endpoint of the tally. /// </summary> /// <param name="post">The post to check.</param> /// <param name="quest">Quest options.</param> /// <param name="rangeInfo">Specific range information.</param> /// <returns>Returns true if the post comes before the end of the tally.</returns> private bool PostIsBeforeEnd(Post post, IQuest quest, ThreadRangeInfo rangeInfo) { return(quest.ReadToEndOfThread || rangeInfo.IsThreadmarkSearchResult || post.Origin.ThreadPostNumber <= quest.EndPost); }
/// <summary> /// Check whether the given post is after the startpoint of the tally. /// </summary> /// <param name="post">The post to check.</param> /// <param name="rangeInfo">The range which shows where the tally starts.</param> /// <returns>Returns true if the post comes after the start of the tally.</returns> private bool PostIsAfterStart(Post post, ThreadRangeInfo rangeInfo) { return((rangeInfo.ByNumber && post.Origin.ThreadPostNumber >= rangeInfo.Number) || (!rangeInfo.ByNumber && post.Origin.ID > rangeInfo.ID)); }
/// <summary> /// Gets the thread range info (page and post numbers) based on the quest configuration. /// May load pages (such as for checking threadmarks), so will use the ViewModel's page provider. /// </summary> /// <param name="quest">The quest we're getting thread info for.</param> /// <param name="adapter">The quest's forum adapter.</param> /// <param name="token">The cancellation token.</param> /// <returns>Returns the quest's thread range info.</returns> private async Task <ThreadRangeInfo> GetStartInfoAsync(IQuest quest, IForumAdapter2 adapter, IPageProvider pageProvider, CancellationToken token) { ThreadRangeInfo rangeInfo = await adapter.GetQuestRangeInfoAsync(quest, pageProvider, token).ConfigureAwait(false); return(rangeInfo); }