예제 #1
0
        /// <summary>
        /// Decide if
        /// Is the time to get file informations (the current page represent a file content)
        /// Or need to continue the process (the current page represent a folder list)
        /// </summary>
        /// <param name="parser">Contain full DOM Elements (IDocument) and helpers to use it</param>
        /// <param name="root">Indicate if the URL is the main/master/first/root</param>
        /// <returns></returns>
        protected virtual async Task DeterminePageContentAsync(GitHubParser parser, bool root)
        {
            if (root)
            {
                // Here, i use the cache! The key is Last commit hash.
                // Uses the URL like Key is fastest then last commit hash, because to get last commit, i need to load the first page
                // But if new commit was push, the URL key returns outdated result
                this.lastCommitHash = parser.GetLastCommitHash();
                var cachedFileInformation = this.cache.Get <ConcurrentBag <ItemFileInformationResponse> >(lastCommitHash);
                if (cachedFileInformation != default)
                {
                    this.temporaryFiles = cachedFileInformation;
                    this.usedCache      = true;
                    return;
                }
            }

            var pageType = parser.DiscoverPageType();

            switch (pageType)
            {
            case GitHubParser.GitHubPageType.FileContent:
                this.GetSynthesizedFileInformation(parser);
                break;

            case GitHubParser.GitHubPageType.FolderList:
                await this.IterateFolderListAsync(parser);

                break;
            }
        }
예제 #2
0
        public async Task ShouldGetLastCommitHash()
        {
            // Given
            var source = @"
            <div class=""Box-header Box-header--blue position-relative"">
                <h2 class=""sr-only"">Latest commit</h2>
                <div class=""js-details-container Details d-flex rounded-top-1 flex-items-center flex-wrap"" data-issue-and-pr-hovercards-enabled="""">
              <div class=""flex-1 d-flex flex-items-center ml-3 min-width-0"">
                <div class=""css-truncate css-truncate-overflow text-gray"">
                    <span class=""commit-author user-mention"">Paulo Justino</span>
                    <span class=""d-none d-sm-inline"">
                      <a data-pjax=""true"" title=""Documentation"" class=""link-gray-dark"" href=""/paulojsilva/web-scraping-nolayer/commit/1cf47cd55b5c745bb8dfb734c8f96614c6b30273"">Documentation</a>
                    </span>
                </div>
                <span class=""hidden-text-expander ml-2 d-inline-block d-inline-block d-lg-none"">
                  <button type=""button"" class=""hx_bg-black-fade-15 text-gray-dark ellipsis-expander js-details-target"" aria-expanded=""false"">…</button>
                </span>
                <div class=""d-flex flex-auto flex-justify-end ml-3 flex-items-baseline"">
        
                  <a href=""/paulojsilva/web-scraping-nolayer/commit/1cf47cd55b5c745bb8dfb734c8f96614c6b30273"" class=""f6 link-gray text-mono ml-2 d-none d-lg-inline"" data-pjax="""">1cf47cd</a>
                  <a href=""/paulojsilva/web-scraping-nolayer/commit/1cf47cd55b5c745bb8dfb734c8f96614c6b30273"" class=""link-gray ml-2"" data-pjax="""">
                    <relative-time datetime=""2021-02-02T14:54:52Z"" class=""no-wrap"" title=""2 de fev. de 2021 11:54 BRT"">12 hours ago</relative-time>
                  </a>
                </div>
              </div>
                </div>
              </div>";

            var document = await context.OpenAsync(req => req.Content(source));

            var parser = new GitHubParser(document, "github.com");

            // When
            var lastCommitHash = parser.GetLastCommitHash();

            // Then
            lastCommitHash.Should().Be("1cf47cd55b5c745bb8dfb734c8f96614c6b30273");
        }