/// <summary> /// Decide if /// Is the time to get file informations (the current page represent a file content) /// Or need to continue the process (the current page represent a folder list) /// </summary> /// <param name="parser">Contain full DOM Elements (IDocument) and helpers to use it</param> /// <param name="root">Indicate if the URL is the main/master/first/root</param> /// <returns></returns> protected virtual async Task DeterminePageContentAsync(GitHubParser parser, bool root) { if (root) { // Here, i use the cache! The key is Last commit hash. // Uses the URL like Key is fastest then last commit hash, because to get last commit, i need to load the first page // But if new commit was push, the URL key returns outdated result this.lastCommitHash = parser.GetLastCommitHash(); var cachedFileInformation = this.cache.Get <ConcurrentBag <ItemFileInformationResponse> >(lastCommitHash); if (cachedFileInformation != default) { this.temporaryFiles = cachedFileInformation; this.usedCache = true; return; } } var pageType = parser.DiscoverPageType(); switch (pageType) { case GitHubParser.GitHubPageType.FileContent: this.GetSynthesizedFileInformation(parser); break; case GitHubParser.GitHubPageType.FolderList: await this.IterateFolderListAsync(parser); break; } }
public async Task ShouldGetLastCommitHash() { // Given var source = @" <div class=""Box-header Box-header--blue position-relative""> <h2 class=""sr-only"">Latest commit</h2> <div class=""js-details-container Details d-flex rounded-top-1 flex-items-center flex-wrap"" data-issue-and-pr-hovercards-enabled=""""> <div class=""flex-1 d-flex flex-items-center ml-3 min-width-0""> <div class=""css-truncate css-truncate-overflow text-gray""> <span class=""commit-author user-mention"">Paulo Justino</span> <span class=""d-none d-sm-inline""> <a data-pjax=""true"" title=""Documentation"" class=""link-gray-dark"" href=""/paulojsilva/web-scraping-nolayer/commit/1cf47cd55b5c745bb8dfb734c8f96614c6b30273"">Documentation</a> </span> </div> <span class=""hidden-text-expander ml-2 d-inline-block d-inline-block d-lg-none""> <button type=""button"" class=""hx_bg-black-fade-15 text-gray-dark ellipsis-expander js-details-target"" aria-expanded=""false"">…</button> </span> <div class=""d-flex flex-auto flex-justify-end ml-3 flex-items-baseline""> <a href=""/paulojsilva/web-scraping-nolayer/commit/1cf47cd55b5c745bb8dfb734c8f96614c6b30273"" class=""f6 link-gray text-mono ml-2 d-none d-lg-inline"" data-pjax="""">1cf47cd</a> <a href=""/paulojsilva/web-scraping-nolayer/commit/1cf47cd55b5c745bb8dfb734c8f96614c6b30273"" class=""link-gray ml-2"" data-pjax=""""> <relative-time datetime=""2021-02-02T14:54:52Z"" class=""no-wrap"" title=""2 de fev. de 2021 11:54 BRT"">12 hours ago</relative-time> </a> </div> </div> </div> </div>"; var document = await context.OpenAsync(req => req.Content(source)); var parser = new GitHubParser(document, "github.com"); // When var lastCommitHash = parser.GetLastCommitHash(); // Then lastCommitHash.Should().Be("1cf47cd55b5c745bb8dfb734c8f96614c6b30273"); }