/// <summary> /// Use ProcessAsync recursively to access all folders and files /// </summary> /// <param name="parser">Contain full DOM Elements (IDocument) and helpers to use it</param> /// <returns>A Task</returns> protected async Task IterateFolderListAsync(GitHubParser parser) { var elementsToNavigate = parser.GetFolderListItens(); await elementsToNavigate.ParallelForEachAsync(async element => { await this.ProcessAsync(host + element.Endpoint); }, maxDegreeOfParallelism : this.settings.Value.MaxDegreeOfParallelism); }
public async Task ShouldGetFolderListItens() { // Given var source = @" <div class=""js-details-container Details""> <div role=""grid"" aria-labelledby=""files"" class=""Details-content--hidden-not-important js-navigation-container js-active-navigation-container d-block"" data-pjax=""""> <div role=""row"" class=""Box-row Box-row--focus-gray py-2 d-flex position-relative js-navigation-item""> <div role=""gridcell"" class=""mr-3 flex-shrink-0"" style=""width: 16px;""> <svg aria-label=""Directory"" class=""octicon octicon-file-directory text-color-icon-directory"" height=""16"" viewBox=""0 0 16 16"" version=""1.1"" width=""16"" role=""img""><path fill-rule=""evenodd"" d=""M1.75 1A1.75 1.75 0 000 2.75v10.5C0 14.216.784 15 1.75 15h12.5A1.75 1.75 0 0016 13.25v-8.5A1.75 1.75 0 0014.25 3h-6.5a.25.25 0 01-.2-.1l-.9-1.2c-.33-.44-.85-.7-1.4-.7h-3.5z""></path></svg> </div> <div role=""rowheader"" class=""flex-auto min-width-0 col-md-2 mr-3""> <span class=""css-truncate css-truncate-target d-block width-fit""><a class=""js-navigation-open link-gray-dark"" title=""This path skips through empty directories"" data-pjax=""#repo-content-pjax-container"" href=""/paulojsilva/web-scraping-nolayer/tree/main/Layers/Domain/Dom/GitHub""><span class=""text-gray-light"">Dom/</span>GitHub</a></span> </div> <div role=""gridcell"" class=""flex-auto min-width-0 d-none d-md-block col-5 mr-3""> <span class=""css-truncate css-truncate-target d-block width-fit""> <a data-pjax=""true"" title=""first commit"" class=""link-gray"" href=""/paulojsilva/web-scraping-nolayer/commit/c95e6cb33ab2d712c4cc93767808061ee9469f3a"">first commit</a> </span> </div> <div role=""gridcell"" class=""text-gray-light text-right"" style=""width:100px;""> <time-ago datetime=""2021-02-02T13:14:42Z"" class=""no-wrap"" title=""2 de fev. de 2021 10:14 BRT"">14 hours ago</time-ago> </div> </div> <div role=""row"" class=""Box-row Box-row--focus-gray py-2 d-flex position-relative js-navigation-item navigation-focus""> <div role=""gridcell"" class=""mr-3 flex-shrink-0"" style=""width: 16px;""> <svg aria-label=""Directory"" class=""octicon octicon-file-directory text-color-icon-directory"" height=""16"" viewBox=""0 0 16 16"" version=""1.1"" width=""16"" role=""img""><path fill-rule=""evenodd"" d=""M1.75 1A1.75 1.75 0 000 2.75v10.5C0 14.216.784 15 1.75 15h12.5A1.75 1.75 0 0016 13.25v-8.5A1.75 1.75 0 0014.25 3h-6.5a.25.25 0 01-.2-.1l-.9-1.2c-.33-.44-.85-.7-1.4-.7h-3.5z""></path></svg> </div> <div role=""rowheader"" class=""flex-auto min-width-0 col-md-2 mr-3""> <span class=""css-truncate css-truncate-target d-block width-fit""><a class=""js-navigation-open link-gray-dark"" title=""Services"" data-pjax=""#repo-content-pjax-container"" href=""/paulojsilva/web-scraping-nolayer/tree/main/Layers/Domain/Services"">Services</a></span> </div> <div role=""gridcell"" class=""flex-auto min-width-0 d-none d-md-block col-5 mr-3""> <span class=""css-truncate css-truncate-target d-block width-fit""> <a data-pjax=""true"" title=""first commit"" class=""link-gray"" href=""/paulojsilva/web-scraping-nolayer/commit/c95e6cb33ab2d712c4cc93767808061ee9469f3a"">first commit</a> </span> </div> <div role=""gridcell"" class=""text-gray-light text-right"" style=""width:100px;""> <time-ago datetime=""2021-02-02T13:14:42Z"" class=""no-wrap"" title=""2 de fev. de 2021 10:14 BRT"">14 hours ago</time-ago> </div> </div> </div> </div>"; var document = await context.OpenAsync(req => req.Content(source)); var parser = new GitHubParser(document, "github.com"); // When var itens = parser.GetFolderListItens(); // Then itens.Should().HaveCount(2); itens.First(i => i.Type == GitHubLinkAccess.GitHubLinkAccessType.Folder && i.Endpoint.EndsWith("Dom/GitHub")).Should().NotBeNull(); itens.Last(i => i.Type == GitHubLinkAccess.GitHubLinkAccessType.Folder && i.Endpoint.EndsWith("Services")).Should().NotBeNull(); }