/// <summary> /// Extracts the Informations about a file /// </summary> /// <param name="file"></param> /// <returns></returns> public static void GetFileInfo(DirectoryItem file, string dataStr) { var doc = new HtmlDocument(); doc.LoadHtml(dataStr); var div = doc.DocumentNode.SelectSingleNode("//div[contains(@class,'Box mt-3 position-relative')]/div[contains(@class,'Box-header')]/div"); var divider = div.SelectSingleNode("span"); //The Html in Github informs the Number of lines and Size of a File. //Here these values are extracted. //If divider is null the page only display de size of the file. //Its a Binary file. //Else its a Text file and the number of lines and Size of the file are shown. if (divider == null)// only size { var fileSize = div.InnerText.Trim().Split(' '); file.Size = Convert.ToDecimal(fileSize[0]); file.SizeUnit = fileSize[1].ToLower(); } else { LinesAndSizeExtractor.LinesAndSize linesAndSize; try { linesAndSize = LinesAndSizeExtractor.Extract(div.InnerText); } catch (Exception ex) { throw; } file.NumberOfLines = linesAndSize.NumberOfLines; file.Size = linesAndSize.Size; file.SizeUnit = linesAndSize.Unit; } }
/// <summary> /// Return all Dir and files in a Repository /// </summary> /// <param name="pageUrl"></param> /// <param name="directoryItemObjects"></param> public static async Task GetDirItems(string pageUrl, List <DirectoryItem> directoryItemObjects) { //Dirs at the current folder/page var localDirs = new List <DirectoryItem>(); var tasks = new List <Task <string> >(); //Start with the root Repository folder localDirs.Add(new DirectoryItem { Url = pageUrl, Type = DirObjectType.DIRECTORY }); while (localDirs.Count > 0) { //Downloads all folder pages in parallel foreach (var item in localDirs) { tasks.Add(GhderHttpClient.Instance.GetStringAsync(GHProjectDownloaderService.GitHubUrl + item.Url)); } //Waits all folder/pages to be downloaded await Task.WhenAll(tasks); var pages = new List <Page>(); for (int i = 0; i < tasks.Count(); i++) { pages.Add(new Page { Html = tasks[i].Result, Url = localDirs[i].Url }); } localDirs = new List <DirectoryItem>(); //For each downloaded dir/page foreach (var page in pages) { var doc = new HtmlDocument(); doc.LoadHtml(page.Html); //List all rows in a directory. //Each row is a div and each cell is a div. var divs = doc.DocumentNode.SelectNodes("//div[contains(@class,'js-navigation-item')]"); if (divs == null) { throw new DomainException($"No Rows found in: {page.Url}"); } foreach (var div in divs) { //Inside a folder, the first item of the directory is a link for the upper directory. //If the first cell row doesnt have a svg, its a upper directory link. var svg = div.SelectSingleNode("div[1]/svg"); if (svg == null) { continue; } var type = svg.Attributes["aria-label"].Value.ToLower();// directory or file var dirObjectType = type == "file" ? DirObjectType.FILE : DirObjectType.DIRECTORY; var url = div.SelectSingleNode("div[2]/span/a").Attributes["href"].Value; var name = div.SelectSingleNode("div[2]/span/a").InnerText; var newItem = new DirectoryItem { Type = dirObjectType, Url = url, Name = name, Extension = GetExtention(name) }; //all folder objects directoryItemObjects.Add(newItem); //Only folders. //Folders can have subfolders! if (newItem.Type == DirObjectType.DIRECTORY) { localDirs.Add(newItem); } } }//foreach tasks = new List <Task <string> >(); } //While } // Method