public void AHrefWithBase(string baseUrl, string href, string expected) { var html = string.Format("<html><head><base href='{0}'></head><body><div><a href='{1}'>a</a></div></body></html>", baseUrl, href); var links = new LinkExtractor().ExtractLinks(new Uri("http://y.com"), Read(html), MediaTypeNames.Text.Html); Assert.AreElementsEqualIgnoringOrder(new[] { expected }, links.Select(l => l.ToString())); }
public void SimpleAHref(string url, string href, string expected) { var html = string.Format("<html><body><div><a href='{0}'>a</a></div></body></html>", href); var links = new LinkExtractor().ExtractLinks(new Uri(url), Read(html), MediaTypeNames.Text.Html); Assert.AreElementsEqualIgnoringOrder(new[] { expected }, links.Select(l => l.ToString())); }
private static void Main(ApplicationArguments arguments) { EnsureConsoleWindowHandle(); if (arguments.IgnoreCertificateValidation) ServicePointManager.ServerCertificateValidationCallback = (x1, x2, x3, x4) => true; Console.Title = "LightGet"; Console.CursorVisible = false; var loggerForDownloader = new LoggerForDownloader { Prefix = ConsoleIndent }; var mapper = new UrlToPathMapper(arguments.OutputPathFormat, arguments.Url); var directory = new DirectoryInfo(Directory.GetCurrentDirectory()); var downloader = new Downloader(loggerForDownloader, (url, fileName) => new FileInfo(Path.Combine(directory.FullName, mapper.GetPath(url, fileName)))); var extractor = new LinkExtractor(); var credentials = arguments.User != null ? new NetworkCredential(arguments.User, arguments.Password) : null; var visited = new HashSet<Uri>(); var queue = new Queue<Uri>(); queue.Enqueue(arguments.Url); while (queue.Count > 0) { var url = queue.Dequeue(); ConsoleUI.WriteLine(ConsoleColor.White, url); DownloaderResult result; try { result = Download(downloader, url, credentials); } catch (Exception ex) { ConsoleUI.WriteLine(ConsoleColor.Red, ex.Message); continue; } finally { Console.WriteLine(); } visited.Add(url); if (arguments.FollowLinks == LinkFollowingRule.None) break; using (var reader = result.File.OpenText()) { var links = extractor.ExtractLinks(result.Url, reader, result.ContentType); foreach (var link in links.Where(l => !visited.Contains(l))) { if (!ShouldFollow(link, arguments)) continue; queue.Enqueue(link); } } } Console.CursorVisible = true; }