public void Mondo_Web_HtmlToXml_Process() { string input = "<p><html><p>Come enjoy this unique and exciting concert dedicated to <strong>Romanian </strong><span class=\"blah blah blah blah\" /> culture! The concert program will feature masterworks by Béla Bartók and George Enescu. </p><p><br /></p><p><strong>Experience </strong>the fire, drama and fine lyricism of Romanian Art Songs and Romanian Folk Dances by Béla Bartók, and Sonata No. 3 'in <strong><em><u>Romanian </u></em></strong>Folk Character' for <em>violin </em>and piano, one of the most popular and critically respected works by George Enescu. All of these pieces take Romanian folk dances and songs as their point of departure. </p><p><br /></p><p>Distinguished and versatile violinist Mikhail Shmidt, acclaimed soprano Marcy Stonikas, and sought-after pianist Oana <strike>Rusu </strike>Tomai will perform.</p><p><br /></p><!-- br--> <p>This concert is presented by the <em>Romanian-American Society of Washington State</em>.</p><p><br /></p><!-- br--> <p>All seating is general admission.</p></html></p>"; HtmlToXml objProcessor = new HtmlToXml(); string result = objProcessor.Process(input, true, true, false); Assert.AreNotEqual("", result); }
public static void FileHtmlToXml_v2(string file, string xmlFile, string traceHtmlReaderFile, Encoding encoding = null) { try { if (traceHtmlReaderFile != null) { __srTraceHtmlReader = zFile.CreateText(traceHtmlReaderFile); __traceJsonSettings = new JsonWriterSettings(); __traceJsonSettings.Indent = true; } using (StreamReader sr = zfile.OpenText(file, encoding)) { //HtmlReader.TraceHtmlReaderFile = traceHtmlReaderFile; //HtmlReader_v2.TraceHtmlReaderFile = traceHtmlReaderFile; //HtmlToXml.HtmlReaderVersion = htmlReaderVersion; HtmlReader_v2 htmlReader = new HtmlReader_v2(sr); //htmlReader.Trace += TraceHtmlReader; htmlReader.Trace = TraceHtmlReader; //HtmlToXml hx = new HtmlToXml(sr); HtmlToXml hx = new HtmlToXml(htmlReader); hx.GenerateXDocument().Save(xmlFile); } } finally { if (__srTraceHtmlReader != null) { __srTraceHtmlReader.Close(); __srTraceHtmlReader = null; } __traceJsonSettings = null; } }
public static void FileHtmlToXml(string file, string xmlFile, string traceHtmlReaderFile, int htmlReaderVersion = 2, Encoding encoding = null) { //using (StreamReader sr = zFile.OpenText(file)) using (StreamReader sr = zfile.OpenText(file, encoding)) { HtmlReader.TraceHtmlReaderFile = traceHtmlReaderFile; HtmlReader_v2.TraceHtmlReaderFile = traceHtmlReaderFile; //HtmlReader htmlReader = new HtmlReader(sr); //htmlReader.TraceHtmlReaderFile = traceHtmlReaderFile; //HtmlToXml hx = new HtmlToXml(htmlReader); HtmlToXml.HtmlReaderVersion = htmlReaderVersion; HtmlToXml hx = new HtmlToXml(sr); //hx.ReadCommentInText = _readCommentInText; //file = zpath.PathSetExtension(file, ".xml"); hx.GenerateXDocument().Save(xmlFile); } }