/// <summary> /// Converts HTML to plain text. /// </summary> /// <remarks> /// Warning, the returned text may contain HTML fragments!!! /// For example, the original HTML code is: /// <code> /// <p>&lt;hello&lt;>/p> /// </code> /// Then, the converted text will be: /// <code> /// <hello> /// </code> /// So if you intend to output the stripped text in HTML, you need encode it first! /// </remarks> /// <param name="input">Input HTML. May be <c>null</c>.</param> /// <returns>Plain text, may contain angle braces!</returns> public static string RenderToPlainText(string input) { if (input == null) { return(null); } var inputReader = new StringReader(input); var resultWriter = new StringWriter(); resultWriter.GetStringBuilder().EnsureCapacity(input.Length); resultWriter.NewLine = "\n"; var textRenderer = new PlainTextRenderer(); textRenderer.Sanitize(inputReader, resultWriter); return(resultWriter.ToString()); }
public static string Highlight(string value, string searchPattern) { value = PlainTextRenderer.RenderToPlainText(value) ?? ""; if (StringUtils.IsBlank(searchPattern)) { return(HttpUtility.HtmlEncode(value)); // The text may contain angle braces read from html entities < and >, so encode it. } string[] words = StringUtils.SmartSplit(searchPattern); string trim = string.Join("|", words); trim = trim.Trim(); string regex = string.Empty; foreach (char c in trim) { if (c == '|') { regex += c; continue; } if (c == '\\') { regex += "[\\\\]"; continue; } if (c == '^') { regex += "[\\^]"; continue; } regex += "[" + c + "]"; } var rx = new Regex(regex, RegexOptions.IgnorePatternWhitespace | RegexOptions.Multiline | RegexOptions.IgnoreCase); MatchCollection matches = rx.Matches(value); if (matches.Count == 0) { return(HttpUtility.HtmlEncode(value)); // The text may contain angle braces read from html entities < and >, so encode it. } string text = ""; var foundMatches = new Dictionary <string, int>(); for (int i = 0; i < matches.Count; i++) { Match match = matches[i]; if (!foundMatches.ContainsKey(match.Value)) { foundMatches.Add(match.Value, 1); } else if (foundMatches[match.Value] < MAX_MATCH_CYCLES) { foundMatches[match.Value] = foundMatches[match.Value]++; } else { continue; } int start = DefineStartPosition(match); int length = DefineLength(value, match, start); if (start == 0 && length == value.Length) { text = value; break; } text += CutText(value, start, length); } var result = new StringBuilder(text.Length * 2); matches = rx.Matches(text); int offset = 0; for (int i = 0; i < matches.Count; i++) { Match match = matches[i]; result.Append(HttpUtility.HtmlEncode(text.Substring(offset, match.Index - offset))); result.Append("<span class=\"wordHighlight\">").Append(HttpUtility.HtmlEncode(match.Value)).Append("</span>"); if (i == matches.Count - 1) { result.Append(HttpUtility.HtmlEncode(text.Substring(match.Index + match.Length))); } offset = match.Index + match.Length; } return(result.ToString()); }
/// <summary> /// Converts HTML to plain text. /// </summary> /// <remarks> /// Warning, the returned text may contain HTML fragments!!! /// For example, the original HTML code is: /// <code> /// <p>&lt;hello&lt;>/p> /// </code> /// Then, the converted text will be: /// <code> /// <hello> /// </code> /// So if you intend to output the stripped text in HTML, you need encode it first! /// </remarks> /// <param name="input">Input HTML. May be <c>null</c>.</param> /// <returns>Plain text, may contain angle braces!</returns> public static string RenderToPlainText(string input) { if (input == null) { return null; } var inputReader = new StringReader(input); var resultWriter = new StringWriter(); resultWriter.GetStringBuilder().EnsureCapacity(input.Length); resultWriter.NewLine = "\n"; var textRenderer = new PlainTextRenderer(); textRenderer.Sanitize(inputReader, resultWriter); return resultWriter.ToString(); }