예제 #1
0
        /// <summary>
        /// Start listing all files an folders starting with provided Uri
        /// </summary>
        /// <param name="options">Listing preferences and parameters</param>
        /// <returns>All listings</returns>
        public IEnumerable <ListingInfo> GetListing(ListingOptions options)
        {
            toVisit.Enqueue(Uri);
            yield return(new ListingInfo()
            {
                IsDirectory = true,
                Parent = Uri,
                Uri = Uri
            });

            while (toVisit.TryDequeue(out Uri uri))
            {
                IEnumerable <ListingInfo> result = processPage(uri, options);

                foreach (var l in result)
                {
                    yield return(l);

                    if (!l.IsDirectory)
                    {
                        continue;
                    }
                    enqueueDirectory(l, options);
                }
            }
        }
예제 #2
0
        private void enqueueDirectory(ListingInfo l, ListingOptions options)
        {
            if (options.ShouldFetch != null)
            {
                var args = new ShouldFetchEventArgs(new Link(l.Uri, l.Uri));
                options.ShouldFetch(this, args);
                if (args.Cancel)
                {
                    return;
                }
            }

            toVisit.Enqueue(l.Uri);
        }
예제 #3
0
        private IEnumerable <ListingInfo> processPage(Uri uri, ListingOptions options)
        {
            HtmlAgilityPack.HtmlDocument doc;
            var url = uri.ToString();

            if (visited.Contains(url))
            {
                return(new ListingInfo[0]);
            }

            try
            {
                doc = FetchHelper.FetchResourceDocument(uri, enableCaching: options.AllowCaching);
            }
            catch (Exception ex)
            {
                Console.WriteLine("Err: " + ex.Message);
                // send to end
                toVisit.Enqueue(Uri);
                Thread.Sleep(5000);
                return(new ListingInfo[0]);
            }
            // add to 'seen' list
            visited.Add(url);

            var rows = doc.DocumentNode.SelectNodes("//table/tr");
            var pre  = doc.DocumentNode.SelectSingleNode("//pre");

            if (rows != null)
            {
                return(processRows(rows, uri, options));
            }
            else if (pre != null)
            {
                var nodes = pre.ChildNodes.ToArray(); // index-based
                return(processPreLines(nodes, uri, options));
            }
            else
            {
                throw new NotImplementedException("This version of the apache directory listing is not supported yet");
            }
        }
예제 #4
0
        private IEnumerable <ListingInfo> processPreLines(HtmlAgilityPack.HtmlNode[] nodes, Uri uri, ListingOptions options)
        {
            // [9] is header's <hr>
            for (int i = 10; i < nodes.Length; i += 2)
            {
                if (nodes[i].Name == "hr")
                {
                    break;
                }

                var text      = nodes[i + 1];
                var textParts = text.InnerText.Split(' ', StringSplitOptions.RemoveEmptyEntries);

                var  href     = nodes[i].GetAttributeValue("href", "");
                var  lnkText  = nodes[i].InnerText;
                bool isParent = lnkText == "Parent Directory";
                if (isParent)
                {
                    textParts = new string[] { " ", " ", "-" }
                }
                ;

                bool isDir = href.EndsWith("/");

                if (isParent && options.NoParent)
                {
                    continue;
                }

                DateTime.TryParse(textParts[0] + " " + textParts[1], out DateTime lastModified);

                string size    = textParts[2];
                long   numSize = processTextualSize(size);

                //bool isDir = dir || isParent;
                string fileName = "";
                if (!isDir)
                {
                    fileName = href;
                }

                yield return(new ListingInfo()
                {
                    Parent = uri,
                    Uri = new Uri(uri, href),
                    LastModified = lastModified,
                    Size = size,
                    FileSize = numSize,
                    IsDirectory = isDir,
                    FileName = fileName,
                    FileExtension = fileName.Split('.')[^ 1]