public string DoTidy(string html) { Tidy.Core.Tidy document = new Tidy.Core.Tidy(); TidyMessageCollection messageCollection = new TidyMessageCollection(); document.Options.DocType = DocType.Omit; document.Options.Xhtml = true; document.Options.CharEncoding = CharEncoding.Utf8; document.Options.LogicalEmphasis = true; document.Options.MakeClean = false; document.Options.QuoteNbsp = false; document.Options.SmartIndent = false; document.Options.IndentContent = false; document.Options.TidyMark = false; document.Options.DropFontTags = false; document.Options.QuoteAmpersand = true; document.Options.DropEmptyParas = true; MemoryStream input = new MemoryStream(); MemoryStream output = new MemoryStream(); byte[] array = Encoding.UTF8.GetBytes(html); input.Write(array, 0, array.Length); input.Position = 0; document.Parse(input, output, messageCollection); string tidyXhtml = Encoding.UTF8.GetString(output.ToArray(), 0, output.ToArray().Length); return(XElement.Parse(tidyXhtml).ToString()); }
public override void Write(byte[] buffer, int offset, int count) { var data = new byte[count]; Buffer.BlockCopy(buffer, offset, data, 0, count); string html = Encoding.UTF8.GetString(buffer); using (var input = new MemoryStream()) { using (var output = new MemoryStream()) { byte[] byteArray = Encoding.UTF8.GetBytes(html); input.Write(byteArray, 0, byteArray.Length); input.Position = 0; _tidy.Parse(input, output, new TidyMessageCollection()); string result = Encoding.UTF8.GetString(output.ToArray()); byte[] outdata = Encoding.UTF8.GetBytes(result); _stream.Write(outdata, 0, outdata.GetLength(0)); } } }
/// <summary> /// Pretties the print. /// </summary> /// <param name="dirtyHtml">The dirty HTML.</param> /// <param name="messages">The messages.</param> /// <returns></returns> public static string PrettyPrint(string dirtyHtml, out TidyMessageCollection messages) { const int spaces = 8; var tidy = new Tidy.Core.Tidy(); tidy.Options.SmartIndent = true; tidy.Options.IndentAttributes = false; tidy.Options.WrapLen = 0; tidy.Options.Spaces = spaces; messages = new TidyMessageCollection(); using (var inStream = new MemoryStream(Encoding.Default.GetBytes(dirtyHtml))) using (var outStream = new MemoryStream()) { tidy.Parse(inStream, outStream, messages); return(Encoding.Default.GetString(outStream.ToArray()) .Replace(new string(' ', spaces), '\t'.ToString())); } }