public static XDocument StreamToXDocument(Stream stream, Encoding enc) { using (var sr = new StreamReader(stream, enc)) { using (var sgmlReader = new SgmlReader { DocType = "HTML", CaseFolding = CaseFolding.ToLower }) { sgmlReader.InputStream = sr; return XDocument.Load(sgmlReader); } } }
public void Test_MoveToNextAttribute() { // Make sure we can do MoveToElement after reading multiple attributes. var r = new SgmlReader { InputStream = new StringReader("<test id='10' x='20'><a/><!--comment-->test</test>") }; Assert.IsTrue(r.Read()); while (r.MoveToNextAttribute()) { Trace.WriteLine(r.Name); } if (r.MoveToElement()) { Trace.WriteLine(r.ReadInnerXml()); } }
public void Test_for_illegal_char_value() { const string source = "&test"; var reader = new SgmlReader { DocType = "HTML", WhitespaceHandling = true, StripDocType = true, InputStream = new StringReader(source), CaseFolding = CaseFolding.ToLower }; // test var element = System.Xml.Linq.XElement.Load(reader); string value = element.Value; Assert.IsFalse(string.IsNullOrEmpty(value), "element has no value"); Assert.AreNotEqual((char)65535, value[value.Length - 1], "unexpected -1 as last char"); }
private static string RunTest(CaseFolding caseFolding, string doctype, bool format, string source, XmlReaderTestCallback callback) { var pseudoBaseUri = new Uri(string.Format( "rsrc://{0}/{1}/", typeof(Tests).Assembly.FullName.Split(',')[0], typeof(Tests).Namespace)); // initialize sgml reader XmlReader reader = new SgmlReader( new StringReader(source), pseudoBaseUri, pseudoUri => // Stream opener (Uri --> Stream callback) { var resourcePath = string.Join( ".", pseudoUri.PathAndQuery.Split( new[] { '/' }, StringSplitOptions.RemoveEmptyEntries)); return new StreamInformation { Stream = typeof(Tests).Assembly.GetManifestResourceStream(resourcePath), DefaultEncoding = Encoding.UTF8 }; }) { CaseFolding = caseFolding, DocType = doctype, WhitespaceHandling = format == false }; // check if we need to use the LoggingXmlReader if (_debug) { reader = new LoggingXmlReader(reader, Console.Out); } // initialize xml writer var stringWriter = new StringWriter(); var xmlTextWriter = new XmlTextWriter(stringWriter); if (format) { xmlTextWriter.Formatting = Formatting.Indented; } callback(reader, xmlTextWriter); xmlTextWriter.Close(); // reproduce the parsed document var actual = stringWriter.ToString(); // ensure that output can be parsed again try { using (var stringReader = new StringReader(actual)) { var doc = new XmlDocument(); doc.Load(stringReader); } } catch (Exception) { Assert.Fail("unable to parse sgml reader output:\n{0}", actual); } return actual.Trim().Replace("\r", ""); }