/// <summary> /// Decide if /// Is the time to get file informations (the current page represent a file content) /// Or need to continue the process (the current page represent a folder list) /// </summary> /// <param name="parser">Contain full DOM Elements (IDocument) and helpers to use it</param> /// <param name="root">Indicate if the URL is the main/master/first/root</param> /// <returns></returns> protected virtual async Task DeterminePageContentAsync(GitHubParser parser, bool root) { if (root) { // Here, i use the cache! The key is Last commit hash. // Uses the URL like Key is fastest then last commit hash, because to get last commit, i need to load the first page // But if new commit was push, the URL key returns outdated result this.lastCommitHash = parser.GetLastCommitHash(); var cachedFileInformation = this.cache.Get <ConcurrentBag <ItemFileInformationResponse> >(lastCommitHash); if (cachedFileInformation != default) { this.temporaryFiles = cachedFileInformation; this.usedCache = true; return; } } var pageType = parser.DiscoverPageType(); switch (pageType) { case GitHubParser.GitHubPageType.FileContent: this.GetSynthesizedFileInformation(parser); break; case GitHubParser.GitHubPageType.FolderList: await this.IterateFolderListAsync(parser); break; } }
public async Task ShouldDiscoverPageType() { // Given var source = @" <div itemprop=""text"" class=""Box-body p-0 blob-wrapper data type-c gist-border-0""> <table class=""highlight tab-size js-file-line-container"" data-tab-size=""8"" data-paste-markdown-skip=""""></table> <details class=""details-reset details-overlay BlobToolbar position-absolute js-file-line-actions dropdown d-none"" aria-hidden=""true""> </details> </div>"; var document = await context.OpenAsync(req => req.Content(source)); var parser = new GitHubParser(document, "github.com"); // When var pageType = parser.DiscoverPageType(); // Then pageType.Should().Be(GitHubParser.GitHubPageType.FileContent); }