public DirectoryTree(DirectoryTreeNode root) { m_root = root; m_nodes = new SortedSet<DirectoryTreeNode>(); m_nodes.Add(root); }
/// <summary> /// Adds a new link to a current node. /// </summary> /// <param name="origin">The current node.</param> /// <param name="path">The path to the linked node.</param> /// <param name="status">The status code when reaching the link</param> /// <returns>The new discovered node or null.</returns> public DirectoryTreeNode AddLink(DirectoryTreeNode origin, string path, HttpStatusCode status) { if (path == null) throw new ArgumentNullException("path"); if (origin == null && m_root != null) throw new ArgumentNullException("origin"); Uri outUri; if (Uri.TryCreate(path, UriKind.Absolute, out outUri)) { DirectoryTreeNode newNode = new DirectoryTreeNode(outUri, status); if (m_nodes.Contains(newNode)) { if (origin.Links.ContainsKey(path)) { Console.WriteLine(string.Format(@"Path '{0}' already exists.", path)); } else { DirectoryTreeNode existingNode = m_nodes.First(x => x == newNode); origin.Links.Add(path, existingNode); } } else { if (origin != null) origin.Links.Add(path, newNode); m_nodes.Add(newNode); return newNode; } } else { throw new ArgumentException("Incorrect type of Uri"); } return null; }
async Task Scrape(Action<string> onNewLinkFound, CancellationToken token, DirectoryTreeNode current, Uri url, string path) { if (token.IsCancellationRequested) return; try { int depth; string dataPath; DirectoryTreeNode node = null; DirectoryTreeNode directoryTreeNode = null; if (url.AbsolutePath != "/") { string[] directory = url.AbsolutePath.Split('/').Where(x => x != "").ToArray(); depth = directory.Length; dataPath = ""; foreach (string dir in directory.Reverse().Skip(1).Reverse()) { dataPath = Path.Combine(dataPath, dir); string currentLevel = (new Uri(Path.Combine(path, dataPath))).AbsolutePath; if (!FileOrDirectoryExists(currentLevel)) { Console.WriteLine("Creating Dir:@{1}:{0}", currentLevel, new System.Diagnostics.StackTrace(true).GetFrame(0).GetFileLineNumber()); Directory.CreateDirectory(currentLevel); } } // If the file doesn't have an extension append html string filename = directory.Last(); IfNecessaryAppendHtml(ref filename); dataPath = Path.Combine(path, Path.Combine(dataPath, filename)); if (File.Exists(dataPath)) { Console.Error.WriteLine("{0} already exists", dataPath); return; } else { if (filename.Split('.').Where(x => x != "").Last().StartsWith("htm")) depth = directory.Length - 1; node = await GetUrl(url, dataPath); } onNewLinkFound(node.Path.AbsoluteUri); directoryTreeNode = m_directoryTree.AddLink(current, node.Path.AbsoluteUri, node.Status); } else { Console.Error.WriteLine("Attempting to retrieve root again."); return; } Uri data; if (!Uri.TryCreate(dataPath, UriKind.RelativeOrAbsolute, out data)) { Console.Error.WriteLine("The path '{0}' is not well formed.", dataPath); return; } List<string> resources = GetLinks(data, depth); foreach (string resource in resources) { if (!string.IsNullOrEmpty(resource) && resource.First() == '/') { Uri nextUrl; string urlInput = string.Format("{0}{1}{2}{3}", url.Scheme, Uri.SchemeDelimiter, url.Authority, resource); if (Uri.TryCreate(urlInput, UriKind.Absolute, out nextUrl)) { await Scrape(onNewLinkFound, token, current, nextUrl, path); } else { Console.Error.WriteLine("Link '{0}' was of incorrect form.", urlInput); continue; } } else { //File that is an outside resource } } } catch (WebException e) { Console.WriteLine("WebExceptionMessage :" + e.Message); if (e.Status == WebExceptionStatus.ProtocolError) { Console.WriteLine("Status Code : {0}", ((HttpWebResponse)e.Response).StatusCode); Console.WriteLine("Status Description : {0}", ((HttpWebResponse)e.Response).StatusDescription); } } }
public DirectoryTree() { m_root = null; m_nodes = new SortedSet<DirectoryTreeNode>(); }