예제 #1
0
        /// <summary>
        /// Use ProcessAsync recursively to access all folders and files
        /// </summary>
        /// <param name="parser">Contain full DOM Elements (IDocument) and helpers to use it</param>
        /// <returns>A Task</returns>
        protected async Task IterateFolderListAsync(GitHubParser parser)
        {
            var elementsToNavigate = parser.GetFolderListItens();

            await elementsToNavigate.ParallelForEachAsync(async element =>
            {
                await this.ProcessAsync(host + element.Endpoint);
            }, maxDegreeOfParallelism : this.settings.Value.MaxDegreeOfParallelism);
        }
예제 #2
0
        public async Task ShouldGetFolderListItens()
        {
            // Given
            var source = @"
            <div class=""js-details-container Details"">
            <div role=""grid"" aria-labelledby=""files"" class=""Details-content--hidden-not-important js-navigation-container js-active-navigation-container d-block"" data-pjax="""">
                <div role=""row"" class=""Box-row Box-row--focus-gray py-2 d-flex position-relative js-navigation-item"">
                  <div role=""gridcell"" class=""mr-3 flex-shrink-0"" style=""width: 16px;"">
                      <svg aria-label=""Directory"" class=""octicon octicon-file-directory text-color-icon-directory"" height=""16"" viewBox=""0 0 16 16"" version=""1.1"" width=""16"" role=""img""><path fill-rule=""evenodd"" d=""M1.75 1A1.75 1.75 0 000 2.75v10.5C0 14.216.784 15 1.75 15h12.5A1.75 1.75 0 0016 13.25v-8.5A1.75 1.75 0 0014.25 3h-6.5a.25.25 0 01-.2-.1l-.9-1.2c-.33-.44-.85-.7-1.4-.7h-3.5z""></path></svg>
                  </div>
                  <div role=""rowheader"" class=""flex-auto min-width-0 col-md-2 mr-3"">
                    <span class=""css-truncate css-truncate-target d-block width-fit""><a class=""js-navigation-open link-gray-dark"" title=""This path skips through empty directories"" data-pjax=""#repo-content-pjax-container"" href=""/paulojsilva/web-scraping-nolayer/tree/main/Layers/Domain/Dom/GitHub""><span class=""text-gray-light"">Dom/</span>GitHub</a></span>
                  </div>
                  <div role=""gridcell"" class=""flex-auto min-width-0 d-none d-md-block col-5 mr-3"">
                      <span class=""css-truncate css-truncate-target d-block width-fit"">
                            <a data-pjax=""true"" title=""first commit"" class=""link-gray"" href=""/paulojsilva/web-scraping-nolayer/commit/c95e6cb33ab2d712c4cc93767808061ee9469f3a"">first commit</a>
                      </span>
                  </div>
                  <div role=""gridcell"" class=""text-gray-light text-right"" style=""width:100px;"">
                      <time-ago datetime=""2021-02-02T13:14:42Z"" class=""no-wrap"" title=""2 de fev. de 2021 10:14 BRT"">14 hours ago</time-ago>
                  </div>
                </div>
                <div role=""row"" class=""Box-row Box-row--focus-gray py-2 d-flex position-relative js-navigation-item navigation-focus"">
                  <div role=""gridcell"" class=""mr-3 flex-shrink-0"" style=""width: 16px;"">
                      <svg aria-label=""Directory"" class=""octicon octicon-file-directory text-color-icon-directory"" height=""16"" viewBox=""0 0 16 16"" version=""1.1"" width=""16"" role=""img""><path fill-rule=""evenodd"" d=""M1.75 1A1.75 1.75 0 000 2.75v10.5C0 14.216.784 15 1.75 15h12.5A1.75 1.75 0 0016 13.25v-8.5A1.75 1.75 0 0014.25 3h-6.5a.25.25 0 01-.2-.1l-.9-1.2c-.33-.44-.85-.7-1.4-.7h-3.5z""></path></svg>
                  </div>
                  <div role=""rowheader"" class=""flex-auto min-width-0 col-md-2 mr-3"">
                    <span class=""css-truncate css-truncate-target d-block width-fit""><a class=""js-navigation-open link-gray-dark"" title=""Services"" data-pjax=""#repo-content-pjax-container"" href=""/paulojsilva/web-scraping-nolayer/tree/main/Layers/Domain/Services"">Services</a></span>
                  </div>
                  <div role=""gridcell"" class=""flex-auto min-width-0 d-none d-md-block col-5 mr-3"">
                      <span class=""css-truncate css-truncate-target d-block width-fit"">
                            <a data-pjax=""true"" title=""first commit"" class=""link-gray"" href=""/paulojsilva/web-scraping-nolayer/commit/c95e6cb33ab2d712c4cc93767808061ee9469f3a"">first commit</a>
                      </span>
                  </div>
                  <div role=""gridcell"" class=""text-gray-light text-right"" style=""width:100px;"">
                      <time-ago datetime=""2021-02-02T13:14:42Z"" class=""no-wrap"" title=""2 de fev. de 2021 10:14 BRT"">14 hours ago</time-ago>
                  </div>
                </div>
            </div>
          </div>";

            var document = await context.OpenAsync(req => req.Content(source));

            var parser = new GitHubParser(document, "github.com");

            // When
            var itens = parser.GetFolderListItens();

            // Then
            itens.Should().HaveCount(2);
            itens.First(i => i.Type == GitHubLinkAccess.GitHubLinkAccessType.Folder && i.Endpoint.EndsWith("Dom/GitHub")).Should().NotBeNull();
            itens.Last(i => i.Type == GitHubLinkAccess.GitHubLinkAccessType.Folder && i.Endpoint.EndsWith("Services")).Should().NotBeNull();
        }