private void AddLink(LinkItem linkItem) { string url = LinkExtractor.NormalizeUrl(linkItem.Url); if (url != null) { linkItem.Url = url; parsedHtml.Links.Add(linkItem); } }
public static void Process(string directory) { DirectoryInfo folder = new DirectoryInfo(directory); FileInfo[] files = folder.GetFiles("*.htm"); foreach (FileInfo file in files) { string path = file.FullName; using (StreamReader streamReader = File.OpenText(path)) { string html = streamReader.ReadToEnd(); List <HtmlNode> nodes = markupParser.Parse(html); Dump(path, html, nodes); LinkExtractor linkExtractor = new LinkExtractor(); ParsedHtml parsedHtml = new ParsedHtml(); parsedHtml.Html = html; parsedHtml.Nodes = nodes; linkExtractor.Process(parsedHtml, true); Dump(parsedHtml); } } }
public HtmlParser() { markupParser = new MarkupParser(); linkExtractor = new LinkExtractor(); }