/// <summary> /// Convert the contents of <paramref name="reader"/> from the <see cref="InputFormat"/> to the /// <see cref="OutputFormat"/> and uses the <paramref name="writer"/> to write the resulting text. /// </summary> /// <remarks> /// Converts the contents of <paramref name="reader"/> from the <see cref="InputFormat"/> to the /// <see cref="OutputFormat"/> and uses the <paramref name="writer"/> to write the resulting text. /// </remarks> /// <param name="reader">The text reader.</param> /// <param name="writer">The text writer.</param> /// <exception cref="System.ArgumentNullException"> /// <para><paramref name="reader"/> is <c>null</c>.</para> /// <para>-or-</para> /// <para><paramref name="writer"/> is <c>null</c>.</para> /// </exception> public override void Convert (TextReader reader, TextWriter writer) { if (reader == null) throw new ArgumentNullException (nameof (reader)); if (writer == null) throw new ArgumentNullException (nameof (writer)); if (!string.IsNullOrEmpty (Header)) { if (HeaderFormat == HeaderFooterFormat.Text) { var converter = new TextToHtml (); using (var sr = new StringReader (Header)) converter.Convert (sr, writer); } else { writer.Write (Header); } } using (var htmlWriter = new HtmlWriter (writer)) { var callback = HtmlTagCallback ?? DefaultHtmlTagCallback; var stack = new List<HtmlToHtmlTagContext> (); var tokenizer = new HtmlTokenizer (reader); HtmlToHtmlTagContext ctx; HtmlToken token; while (tokenizer.ReadNextToken (out token)) { switch (token.Kind) { default: if (!SuppressContent (stack)) htmlWriter.WriteToken (token); break; case HtmlTokenKind.Tag: var tag = (HtmlTagToken) token; if (!tag.IsEndTag) { //if (NormalizeHtml && AutoClosingTags.Contains (startTag.TagName) && // (ctx = Pop (stack, startTag.TagName)) != null && // ctx.InvokeCallbackForEndTag && !SuppressContent (stack)) { // var value = string.Format ("</{0}>", ctx.TagName); // var name = ctx.TagName; // // ctx = new HtmlToHtmlTagContext (new HtmlTokenTag (HtmlTokenKind.EndTag, name, value)) { // InvokeCallbackForEndTag = ctx.InvokeCallbackForEndTag, // SuppressInnerContent = ctx.SuppressInnerContent, // DeleteEndTag = ctx.DeleteEndTag, // DeleteTag = ctx.DeleteTag // }; // callback (ctx, htmlWriter); //} if (!tag.IsEmptyElement) { ctx = new HtmlToHtmlTagContext (tag); if (FilterHtml && ctx.TagId == HtmlTagId.Script) { ctx.SuppressInnerContent = true; ctx.DeleteEndTag = true; ctx.DeleteTag = true; } else if (!SuppressContent (stack)) { callback (ctx, htmlWriter); } stack.Add (ctx); } else if (!SuppressContent (stack)) { ctx = new HtmlToHtmlTagContext (tag); if (!FilterHtml || ctx.TagId != HtmlTagId.Script) callback (ctx, htmlWriter); } } else { if ((ctx = Pop (stack, tag.Name)) != null) { if (!SuppressContent (stack)) { if (ctx.InvokeCallbackForEndTag) { ctx = new HtmlToHtmlTagContext (tag) { InvokeCallbackForEndTag = ctx.InvokeCallbackForEndTag, SuppressInnerContent = ctx.SuppressInnerContent, DeleteEndTag = ctx.DeleteEndTag, DeleteTag = ctx.DeleteTag }; callback (ctx, htmlWriter); } else if (!ctx.DeleteEndTag) { htmlWriter.WriteEndTag (tag.Name); } } } else if (!SuppressContent (stack)) { ctx = new HtmlToHtmlTagContext (tag); callback (ctx, htmlWriter); } } break; } } htmlWriter.Flush (); } if (!string.IsNullOrEmpty (Footer)) { if (FooterFormat == HeaderFooterFormat.Text) { var converter = new TextToHtml (); using (var sr = new StringReader (Footer)) converter.Convert (sr, writer); } else { writer.Write (Footer); } } }
static void VerifyHtmlTokenizerOutput (string path) { var tokens = Path.ChangeExtension (path, ".tokens"); var expected = File.Exists (tokens) ? File.ReadAllText (tokens).Replace ("\r", "") : string.Empty; var actual = new StringBuilder (); using (var textReader = File.OpenText (path)) { var tokenizer = new HtmlTokenizer (textReader); HtmlToken token; Assert.AreEqual (HtmlTokenizerState.Data, tokenizer.TokenizerState); while (tokenizer.ReadNextToken (out token)) { actual.AppendFormat ("{0}: ", token.Kind); switch (token.Kind) { case HtmlTokenKind.ScriptData: case HtmlTokenKind.CData: case HtmlTokenKind.Data: var text = (HtmlDataToken) token; for (int i = 0; i < text.Data.Length; i++) { switch (text.Data[i]) { case '\f': actual.Append ("\\f"); break; case '\t': actual.Append ("\\t"); break; case '\r': break; case '\n': actual.Append ("\\n"); break; default: actual.Append (text.Data[i]); break; } } actual.Append ('\n'); break; case HtmlTokenKind.Tag: var tag = (HtmlTagToken) token; actual.AppendFormat ("<{0}{1}", tag.IsEndTag ? "/" : "", tag.Name); foreach (var attribute in tag.Attributes) { if (attribute.Value != null) actual.AppendFormat (" {0}={1}", attribute.Name, Quote (attribute.Value)); else actual.AppendFormat (" {0}", attribute.Name); } actual.Append (tag.IsEmptyElement ? "/>" : ">"); actual.Append ('\n'); break; case HtmlTokenKind.Comment: var comment = (HtmlCommentToken) token; actual.Append (comment.Comment); actual.Append ('\n'); break; case HtmlTokenKind.DocType: var doctype = (HtmlDocTypeToken) token; if (doctype.ForceQuirksMode) actual.Append ("<!-- force quirks mode -->"); actual.Append ("<!DOCTYPE"); if (doctype.Name != null) actual.AppendFormat (" {0}", doctype.Name.ToUpperInvariant ()); if (doctype.PublicIdentifier != null) { actual.AppendFormat (" PUBLIC {0}", Quote (doctype.PublicIdentifier)); if (doctype.SystemIdentifier != null) actual.AppendFormat (" {0}", Quote (doctype.SystemIdentifier)); } else if (doctype.SystemIdentifier != null) { actual.AppendFormat (" SYSTEM {0}", Quote (doctype.SystemIdentifier)); } actual.Append (">"); actual.Append ('\n'); break; default: Assert.Fail ("Unhandled token type: {0}", token.Kind); break; } } Assert.AreEqual (HtmlTokenizerState.EndOfFile, tokenizer.TokenizerState); } if (!File.Exists (tokens)) File.WriteAllText (tokens, actual.ToString ()); Assert.AreEqual (expected, actual.ToString (), "The token stream does not match the expected tokens."); }