public static string TidyHtml(string html) { TidyNet.Tidy tidy = new TidyNet.Tidy(); /* Set the options you want */ tidy.Options.DocType = TidyNet.DocType.Strict; tidy.Options.DropFontTags = true; tidy.Options.LogicalEmphasis = true; if (GlobalSettings.EditXhtmlMode == "true") { tidy.Options.Xhtml = true; tidy.Options.XmlOut = true; } else { tidy.Options.XmlOut = false; tidy.Options.Xhtml = false; } tidy.Options.MakeClean = true; tidy.Options.TidyMark = false; // To avoid entity encoding tidy.Options.CharEncoding = (TidyNet.CharEncoding)Enum.Parse(typeof(TidyNet.CharEncoding), UmbracoSettings.TidyCharEncoding); /* Declare the parameters that is needed */ TidyNet.TidyMessageCollection tmc = new TidyNet.TidyMessageCollection(); MemoryStream input = new MemoryStream(); MemoryStream output = new MemoryStream(); byte[] byteArray = System.Text.Encoding.UTF8.GetBytes(html); input.Write(byteArray, 0, byteArray.Length); input.Position = 0; tidy.Parse(input, output, tmc); string tidyed = System.Text.Encoding.UTF8.GetString(output.ToArray()); // only return body string regex = @"</{0,1}body[^>]*>"; System.Text.RegularExpressions.RegexOptions options = ((System.Text.RegularExpressions.RegexOptions.IgnorePatternWhitespace | System.Text.RegularExpressions.RegexOptions.Multiline) | System.Text.RegularExpressions.RegexOptions.IgnoreCase); System.Text.RegularExpressions.Regex reg = new System.Text.RegularExpressions.Regex(regex, options); string[] s = reg.Split(tidyed); if (s.Length > 1) { return(s[1]); } else { return("[tidy error]"); } }
public static Tidy.Result[] Analyze(string html, bool isPart) { List <Tidy.Result> result = new List <Tidy.Result>(); if (!string.IsNullOrEmpty(html)) { int skipLines = 0; byte[] buffer; if (isPart) { StringBuilder sb = new StringBuilder(); sb.AppendLine(@"<!doctype html>"); skipLines++; sb.AppendLine("<html><head><meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" /><title>Tidier</title></head><body>"); skipLines++; sb.AppendLine(html); sb.Append("</body></html>"); buffer = ASCIIEncoding.UTF8.GetBytes(sb.ToString()); } else { buffer = ASCIIEncoding.UTF8.GetBytes(html); } TidyNet.TidyMessageCollection msg; using (MemoryStream msIn = new MemoryStream()) { msIn.Write(buffer, 0, buffer.Length); msIn.Position = 0; using (MemoryStream msOut = new MemoryStream()) { TidyNet.Tidy tidy = new TidyNet.Tidy(); msg = new TidyNet.TidyMessageCollection(); tidy.Parse(msIn, msOut, msg); } } Utils.Text.Line[] sourceLines = Utils.Text.Line.Split(html); foreach (TidyNet.TidyMessage m1 in msg) { Tidy.Result result1 = Tidy.Result.Parse(sourceLines, m1, skipLines); if (result1 != null) { result.Add(result1); } } } return(result.ToArray()); }
public static string TidyHtml(string html) { TidyNet.Tidy tidy = new TidyNet.Tidy(); /* Set the options you want */ tidy.Options.DocType = TidyNet.DocType.Strict; tidy.Options.DropFontTags = true; tidy.Options.LogicalEmphasis = true; if (GlobalSettings.EditXhtmlMode == "true") { tidy.Options.Xhtml = true; tidy.Options.XmlOut = true; } else { tidy.Options.XmlOut = false; tidy.Options.Xhtml = false; } tidy.Options.MakeClean = true; tidy.Options.TidyMark = false; // To avoid entity encoding tidy.Options.CharEncoding = (TidyNet.CharEncoding)Enum.Parse(typeof(TidyNet.CharEncoding), UmbracoConfig.For.UmbracoSettings().Content.TidyCharEncoding); /* Declare the parameters that is needed */ TidyNet.TidyMessageCollection tmc = new TidyNet.TidyMessageCollection(); MemoryStream input = new MemoryStream(); MemoryStream output = new MemoryStream(); byte[] byteArray = System.Text.Encoding.UTF8.GetBytes(html); input.Write(byteArray, 0, byteArray.Length); input.Position = 0; tidy.Parse(input, output, tmc); string tidyed = System.Text.Encoding.UTF8.GetString(output.ToArray()); // only return body string regex = @"</{0,1}body[^>]*>"; System.Text.RegularExpressions.RegexOptions options = ((System.Text.RegularExpressions.RegexOptions.IgnorePatternWhitespace | System.Text.RegularExpressions.RegexOptions.Multiline) | System.Text.RegularExpressions.RegexOptions.IgnoreCase); System.Text.RegularExpressions.Regex reg = new System.Text.RegularExpressions.Regex(regex, options); string[] s = reg.Split(tidyed); if (s.Length > 1) return s[1]; else return "[tidy error]"; }
public static string Html2XHtml(string strHtml) { TidyNet.Tidy tidy = new TidyNet.Tidy(); /* Set the options you want */ tidy.Options.Xhtml = true; tidy.Options.XmlOut = true; tidy.Options.MakeClean = true; tidy.Options.CharEncoding = TidyNet.CharEncoding.UTF8; /* Declare the parameters that is needed */ System.IO.MemoryStream input = new System.IO.MemoryStream(System.Text.Encoding.UTF8.GetBytes(strHtml)); System.IO.MemoryStream output = new System.IO.MemoryStream(); tidy.Parse(input, output, new TidyNet.TidyMessageCollection()); return System.Text.Encoding.UTF8.GetString(output.ToArray()); }
public static Tidy.Result[] Analyze(string html, bool isPart) { List<Tidy.Result> result = new List<Tidy.Result>(); if (!string.IsNullOrEmpty(html)) { int skipLines = 0; byte[] buffer; if (isPart) { StringBuilder sb = new StringBuilder(); sb.AppendLine(@"<!doctype html>"); skipLines++; sb.AppendLine("<html><head><meta http-equiv=\"Content-Type\" content=\"text/html; charset=UTF-8\" /><title>Tidier</title></head><body>"); skipLines++; sb.AppendLine(html); sb.Append("</body></html>"); buffer = ASCIIEncoding.UTF8.GetBytes(sb.ToString()); } else { buffer = ASCIIEncoding.UTF8.GetBytes(html); } TidyNet.TidyMessageCollection msg; using (MemoryStream msIn = new MemoryStream()) { msIn.Write(buffer, 0, buffer.Length); msIn.Position = 0; using (MemoryStream msOut = new MemoryStream()) { TidyNet.Tidy tidy = new TidyNet.Tidy(); msg = new TidyNet.TidyMessageCollection(); tidy.Parse(msIn, msOut, msg); } } Utils.Text.Line[] sourceLines = Utils.Text.Line.Split(html); foreach (TidyNet.TidyMessage m1 in msg) { Tidy.Result result1 = Tidy.Result.Parse(sourceLines, m1, skipLines); if (result1 != null) result.Add(result1); } } return result.ToArray(); }