/// <summary> /// Start listing all files an folders starting with provided Uri /// </summary> /// <param name="options">Listing preferences and parameters</param> /// <returns>All listings</returns> public IEnumerable <ListingInfo> GetListing(ListingOptions options) { toVisit.Enqueue(Uri); yield return(new ListingInfo() { IsDirectory = true, Parent = Uri, Uri = Uri }); while (toVisit.TryDequeue(out Uri uri)) { IEnumerable <ListingInfo> result = processPage(uri, options); foreach (var l in result) { yield return(l); if (!l.IsDirectory) { continue; } enqueueDirectory(l, options); } } }
private void enqueueDirectory(ListingInfo l, ListingOptions options) { if (options.ShouldFetch != null) { var args = new ShouldFetchEventArgs(new Link(l.Uri, l.Uri)); options.ShouldFetch(this, args); if (args.Cancel) { return; } } toVisit.Enqueue(l.Uri); }
private IEnumerable <ListingInfo> processPage(Uri uri, ListingOptions options) { HtmlAgilityPack.HtmlDocument doc; var url = uri.ToString(); if (visited.Contains(url)) { return(new ListingInfo[0]); } try { doc = FetchHelper.FetchResourceDocument(uri, enableCaching: options.AllowCaching); } catch (Exception ex) { Console.WriteLine("Err: " + ex.Message); // send to end toVisit.Enqueue(Uri); Thread.Sleep(5000); return(new ListingInfo[0]); } // add to 'seen' list visited.Add(url); var rows = doc.DocumentNode.SelectNodes("//table/tr"); var pre = doc.DocumentNode.SelectSingleNode("//pre"); if (rows != null) { return(processRows(rows, uri, options)); } else if (pre != null) { var nodes = pre.ChildNodes.ToArray(); // index-based return(processPreLines(nodes, uri, options)); } else { throw new NotImplementedException("This version of the apache directory listing is not supported yet"); } }
private IEnumerable <ListingInfo> processPreLines(HtmlAgilityPack.HtmlNode[] nodes, Uri uri, ListingOptions options) { // [9] is header's <hr> for (int i = 10; i < nodes.Length; i += 2) { if (nodes[i].Name == "hr") { break; } var text = nodes[i + 1]; var textParts = text.InnerText.Split(' ', StringSplitOptions.RemoveEmptyEntries); var href = nodes[i].GetAttributeValue("href", ""); var lnkText = nodes[i].InnerText; bool isParent = lnkText == "Parent Directory"; if (isParent) { textParts = new string[] { " ", " ", "-" } } ; bool isDir = href.EndsWith("/"); if (isParent && options.NoParent) { continue; } DateTime.TryParse(textParts[0] + " " + textParts[1], out DateTime lastModified); string size = textParts[2]; long numSize = processTextualSize(size); //bool isDir = dir || isParent; string fileName = ""; if (!isDir) { fileName = href; } yield return(new ListingInfo() { Parent = uri, Uri = new Uri(uri, href), LastModified = lastModified, Size = size, FileSize = numSize, IsDirectory = isDir, FileName = fileName, FileExtension = fileName.Split('.')[^ 1]