public void Parse(string input, Stream output, TidyMessageCollection messages = null) { var html = Parse(input, messages); using (var writer = new StreamWriter(output)) writer.Write(html); }
public string Parse(Stream input, TidyMessageCollection messages = null) { var memoryStream = new MemoryStream(); Parse(input, memoryStream, messages); memoryStream.Position = 0; using (var reader = new StreamReader(memoryStream)) return(reader.ReadToEnd()); }
/// <summary> /// Parses InputStream in and returns a DOM Document node. /// If out is non-null, pretty prints to OutputStream out. /// </summary> internal virtual IDocument ParseDom(Stream input, Stream output, TidyMessageCollection messages) { Node document = ParseInternal(input, output, messages); if (document != null) { return((IDocument)document.Adapter); } return(null); }
public XElement ParseXml(Stream input, TidyMessageCollection messages = null) { Options.DocType = DocType.Strict; Options.QuoteNbsp = false; Options.XmlOut = true; Options.Xhtml = true; var html = Parse(input, messages); return(XElement.Parse(html)); }
public string Parse(string input, TidyMessageCollection messages = null) { var memoryStream = new MemoryStream(); using (var writer = new StreamWriter(memoryStream)) { writer.Write(input); var html = Parse(memoryStream, messages); return(html); } }
/// <summary> /// Internal routine that actually does the parsing. The caller /// can pass either an InputStream or file name. If both are passed, /// the file name is preferred. /// </summary> internal Node ParseInternal(Stream input, Stream output, TidyMessageCollection messages) { Node document = null; Out o = new OutImpl(); /* normal output stream */ /* ensure config is self-consistent */ _options.Adjust(); if (input != null) { var lexer = new Lexer(new ClsStreamInImpl(input, _options.CharEncoding, _options.TabSize), _options) { Messages = messages }; /* * store pointer to lexer in input stream * to allow character encoding errors to be * reported */ lexer.Input.Lexer = lexer; /* Tidy doesn't alter the doctype for generic XML docs */ Node doctype; if (_options.XmlTags) { document = ParserImpl.ParseXmlDocument(lexer); } else { document = ParserImpl.ParseDocument(lexer); if (!document.CheckNodeIntegrity()) { Report.BadTree(lexer); return(null); } var cleaner = new Clean(_options.TagTable); /* simplifies <b><b> ... </b> ...</b> etc. */ cleaner.NestedEmphasis(document); /* cleans up <dir>indented text</dir> etc. */ cleaner.List2Bq(document); cleaner.Bq2Div(document); /* replaces i by em and b by strong */ if (_options.LogicalEmphasis) { cleaner.EmFromI(document); } if (_options.Word2000 && cleaner.IsWord2000(document, _options.TagTable)) { /* prune Word2000's <![if ...]> ... <![endif]> */ cleaner.DropSections(lexer, document); /* drop style & class attributes and empty p, span elements */ cleaner.CleanWord2000(lexer, document); } /* replaces presentational markup by style rules */ if (_options.MakeClean || _options.DropFontTags) { cleaner.CleanTree(lexer, document); } if (!document.CheckNodeIntegrity()) { Report.BadTree(lexer); return(null); } doctype = document.FindDocType(); if (document.Content != null) { if (_options.Xhtml) { lexer.SetXhtmlDocType(document); } else { lexer.FixDocType(document); } if (_options.TidyMark) { lexer.AddGenerator(document); } } /* ensure presence of initial <?XML version="1.0"?> */ if (_options.XmlOut && _options.XmlPi) { lexer.FixXmlPi(document); } if (document.Content != null) { Report.ReportVersion(lexer, doctype); Report.ReportNumWarnings(lexer); } } if (lexer.Messages.Errors > 0) { Report.NeedsAuthorIntervention(lexer); } o.State = StreamIn.FSM_ASCII; o.Encoding = _options.CharEncoding; if (lexer.Messages.Errors == 0) { PPrint pprint; if (_options.BurstSlides) { /* * remove doctype to avoid potential clash with * markup introduced when bursting into slides */ /* discard the document type */ doctype = document.FindDocType(); if (doctype != null) { Node.DiscardElement(doctype); } /* slides use transitional features */ lexer.Versions |= HtmlVersion.Html40Loose; /* and patch up doctype to match */ if (_options.Xhtml) { lexer.SetXhtmlDocType(document); } else { lexer.FixDocType(document); } /* find the body element which may be implicit */ Node body = document.FindBody(_options.TagTable); if (body != null) { pprint = new PPrint(_options); Report.ReportNumberOfSlides(lexer, pprint.CountSlides(body)); pprint.CreateSlides(lexer, document); } else { Report.MissingBody(lexer); } } else if (output != null) { pprint = new PPrint(_options); o.Output = output; if (_options.XmlTags) { pprint.PrintXmlTree(o, 0, 0, lexer, document); } else { pprint.PrintTree(o, 0, 0, lexer, document); } pprint.FlushLine(o, 0); } } Report.ErrorSummary(lexer); } return(document); }
/// <summary> /// Parses the input stream and writes to the output. /// </summary> /// <param name="input">The input stream</param> /// <param name="output">The output stream</param> /// <param name="messages">The messages</param> public void Parse(Stream input, Stream output, TidyMessageCollection messages = null) { messages = messages ?? new TidyMessageCollection(); ParseInternal(input, output, messages); }