public List <string> ExtractTextsHtmlParserSharp() { stream.Seek(0, SeekOrigin.Begin); var simpleHtmlparser = new SimpleHtmlParser(); var document = simpleHtmlparser.Parse(new StreamReader(stream)); var memoryStream = new MemoryStream(); document.Save(memoryStream); memoryStream.Seek(0, SeekOrigin.Begin); var texts = new List <string>(); var reader = XmlReader.Create(memoryStream, new XmlReaderSettings { DtdProcessing = DtdProcessing.Parse }); while (reader.Read()) { if (reader.NodeType != XmlNodeType.Text && reader.NodeType != XmlNodeType.Whitespace) { continue; } var value = reader.Value; if (value == "") { continue; } texts.Add(reader.Value); } return(texts); }
public List <string> ExtractLinksHtmlParserSharp() { stream.Seek(0, SeekOrigin.Begin); var links = new List <string>(); var simpleHtmlparser = new SimpleHtmlParser(); var document = simpleHtmlparser.Parse(new StreamReader(stream)); var memoryStream = new MemoryStream(); document.Save(memoryStream); memoryStream.Seek(0, SeekOrigin.Begin); var reader = XmlReader.Create(memoryStream, new XmlReaderSettings { DtdProcessing = DtdProcessing.Parse }); while (reader.Read()) { if (reader.NodeType != XmlNodeType.Element) { continue; } if (reader.Name != "a") { continue; } var hrefAttributeValue = reader.GetAttribute("href"); if (hrefAttributeValue == null) { continue; } links.Add(hrefAttributeValue); } return(links); }
public void SvgHang() { var parser = new SimpleHtmlParser(); parser.Parse(new StringReader("<svg x=y/>")); }