public DirectoryTree(DirectoryTreeNode root)
        {
            m_root = root;

            m_nodes = new SortedSet<DirectoryTreeNode>();
            m_nodes.Add(root);
        }
        /// <summary>
        /// Adds a new link to a current node.
        /// </summary>
        /// <param name="origin">The current node.</param>
        /// <param name="path">The path to the linked node.</param>
        /// <param name="status">The status code when reaching the link</param>
        /// <returns>The new discovered node or null.</returns>
        public DirectoryTreeNode AddLink(DirectoryTreeNode origin, string path, HttpStatusCode status)
        {
            if (path == null)
                throw new ArgumentNullException("path");
            if (origin == null && m_root != null)
                throw new ArgumentNullException("origin");

            Uri outUri;
            if (Uri.TryCreate(path, UriKind.Absolute, out outUri))
            {
                DirectoryTreeNode newNode = new DirectoryTreeNode(outUri, status);
                if (m_nodes.Contains(newNode))
                {
                    if (origin.Links.ContainsKey(path))
                    {
                        Console.WriteLine(string.Format(@"Path '{0}' already exists.", path));
                    }
                    else
                    {
                        DirectoryTreeNode existingNode = m_nodes.First(x => x == newNode);
                        origin.Links.Add(path, existingNode);
                    }
                }
                else
                {
                    if (origin != null)
                        origin.Links.Add(path, newNode);

                    m_nodes.Add(newNode);
                    return newNode;
                }
            }
            else
            {
                throw new ArgumentException("Incorrect type of Uri");
            }
            return null;
        }
Beispiel #3
0
		async Task Scrape(Action<string> onNewLinkFound, CancellationToken token, DirectoryTreeNode current, Uri url, string path)
		{
			if (token.IsCancellationRequested)
				return;

			try
			{
				int depth;
				string dataPath;
				DirectoryTreeNode node = null;
				DirectoryTreeNode directoryTreeNode = null;
				if (url.AbsolutePath != "/")
				{
					string[] directory = url.AbsolutePath.Split('/').Where(x => x != "").ToArray();
					depth = directory.Length;
					dataPath = "";
					foreach (string dir in directory.Reverse().Skip(1).Reverse())
					{
						dataPath = Path.Combine(dataPath, dir);
						string currentLevel = (new Uri(Path.Combine(path, dataPath))).AbsolutePath;
						if (!FileOrDirectoryExists(currentLevel))
						{
							Console.WriteLine("Creating Dir:@{1}:{0}", currentLevel, new System.Diagnostics.StackTrace(true).GetFrame(0).GetFileLineNumber());
							Directory.CreateDirectory(currentLevel);
						}
					}
					// If the file doesn't have an extension append html
					string filename = directory.Last();
					IfNecessaryAppendHtml(ref filename);
					dataPath = Path.Combine(path, Path.Combine(dataPath, filename));
					if (File.Exists(dataPath))
					{
						Console.Error.WriteLine("{0} already exists", dataPath);
						return;
					}
					else
					{
						if (filename.Split('.').Where(x => x != "").Last().StartsWith("htm"))
							depth = directory.Length - 1;
						node = await GetUrl(url, dataPath);
					}

					onNewLinkFound(node.Path.AbsoluteUri);
					directoryTreeNode = m_directoryTree.AddLink(current, node.Path.AbsoluteUri, node.Status);
				}
				else
				{
					Console.Error.WriteLine("Attempting to retrieve root again.");
					return;
				}

				Uri data;
				if (!Uri.TryCreate(dataPath, UriKind.RelativeOrAbsolute, out data))
				{
					Console.Error.WriteLine("The path '{0}' is not well formed.", dataPath);
					return;
				}

				List<string> resources = GetLinks(data, depth);

				foreach (string resource in resources)
				{
					if (!string.IsNullOrEmpty(resource) && resource.First() == '/')
					{
						Uri nextUrl;
						string urlInput = string.Format("{0}{1}{2}{3}", url.Scheme, Uri.SchemeDelimiter, url.Authority, resource);
						if (Uri.TryCreate(urlInput, UriKind.Absolute, out nextUrl))
						{
							await Scrape(onNewLinkFound, token, current, nextUrl, path);
						}
						else
						{
							Console.Error.WriteLine("Link '{0}' was of incorrect form.", urlInput);
							continue;
						}
					}
					else
					{
						//File that is an outside resource
					}
				}
			}
			catch (WebException e)
			{
				Console.WriteLine("WebExceptionMessage :" + e.Message);
				if (e.Status == WebExceptionStatus.ProtocolError)
				{
					Console.WriteLine("Status Code : {0}", ((HttpWebResponse)e.Response).StatusCode);
					Console.WriteLine("Status Description : {0}", ((HttpWebResponse)e.Response).StatusDescription);
				}
			}
		}
 public DirectoryTree()
 {
     m_root = null;
     m_nodes = new SortedSet<DirectoryTreeNode>();
 }