Пример #1
0
        public List <string> ExtractTextsHtmlParserSharp()
        {
            stream.Seek(0, SeekOrigin.Begin);

            var simpleHtmlparser = new SimpleHtmlParser();
            var document         = simpleHtmlparser.Parse(new StreamReader(stream));
            var memoryStream     = new MemoryStream();

            document.Save(memoryStream);
            memoryStream.Seek(0, SeekOrigin.Begin);

            var texts = new List <string>();

            var reader = XmlReader.Create(memoryStream, new XmlReaderSettings {
                DtdProcessing = DtdProcessing.Parse
            });

            while (reader.Read())
            {
                if (reader.NodeType != XmlNodeType.Text && reader.NodeType != XmlNodeType.Whitespace)
                {
                    continue;
                }

                var value = reader.Value;
                if (value == "")
                {
                    continue;
                }

                texts.Add(reader.Value);
            }

            return(texts);
        }
Пример #2
0
        public List <string> ExtractLinksHtmlParserSharp()
        {
            stream.Seek(0, SeekOrigin.Begin);
            var links = new List <string>();

            var simpleHtmlparser = new SimpleHtmlParser();
            var document         = simpleHtmlparser.Parse(new StreamReader(stream));
            var memoryStream     = new MemoryStream();

            document.Save(memoryStream);
            memoryStream.Seek(0, SeekOrigin.Begin);

            var reader = XmlReader.Create(memoryStream, new XmlReaderSettings {
                DtdProcessing = DtdProcessing.Parse
            });

            while (reader.Read())
            {
                if (reader.NodeType != XmlNodeType.Element)
                {
                    continue;
                }

                if (reader.Name != "a")
                {
                    continue;
                }

                var hrefAttributeValue = reader.GetAttribute("href");
                if (hrefAttributeValue == null)
                {
                    continue;
                }

                links.Add(hrefAttributeValue);
            }

            return(links);
        }
Пример #3
0
        public void SvgHang()
        {
            var parser = new SimpleHtmlParser();

            parser.Parse(new StringReader("<svg x=y/>"));
        }
Пример #4
0
 public void SvgHang()
 {
     var parser = new SimpleHtmlParser();
     parser.Parse(new StringReader("<svg x=y/>"));
 }