예제 #1
0
        private void AddLink(LinkItem linkItem)
        {
            string url = LinkExtractor.NormalizeUrl(linkItem.Url);

            if (url != null)
            {
                linkItem.Url = url;
                parsedHtml.Links.Add(linkItem);
            }
        }
예제 #2
0
        public static void Process(string directory)
        {
            DirectoryInfo folder = new DirectoryInfo(directory);

            FileInfo[] files = folder.GetFiles("*.htm");
            foreach (FileInfo file in files)
            {
                string path = file.FullName;
                using (StreamReader streamReader = File.OpenText(path))
                {
                    string          html  = streamReader.ReadToEnd();
                    List <HtmlNode> nodes = markupParser.Parse(html);
                    Dump(path, html, nodes);

                    LinkExtractor linkExtractor = new LinkExtractor();

                    ParsedHtml parsedHtml = new ParsedHtml();
                    parsedHtml.Html  = html;
                    parsedHtml.Nodes = nodes;
                    linkExtractor.Process(parsedHtml, true);
                    Dump(parsedHtml);
                }
            }
        }
예제 #3
0
 public HtmlParser()
 {
     markupParser  = new MarkupParser();
     linkExtractor = new LinkExtractor();
 }