public static string RemoveSpaceBeforeAfterTag(string text, string openTag) { text = HtmlUtil.FixUpperTags(text); var closeTag = string.Empty; switch (openTag) { case "<i>": closeTag = "</i>"; break; case "<b>": closeTag = "</b>"; break; case "<u>": closeTag = "</u>"; break; } if (closeTag.Length == 0 && openTag.Contains("<font ", StringComparison.Ordinal)) { closeTag = "</font>"; } // Open tags var open1 = openTag + " "; var open2 = Environment.NewLine + openTag + " "; var open3 = openTag + Environment.NewLine; // Closing tags var close1 = "! " + closeTag + Environment.NewLine; var close2 = "? " + closeTag + Environment.NewLine; var close3 = " " + closeTag; var close4 = " " + closeTag + Environment.NewLine; var close5 = Environment.NewLine + closeTag; if (text.Contains(close1, StringComparison.Ordinal)) { text = text.Replace(close1, "!" + closeTag + Environment.NewLine); } if (text.Contains(close2, StringComparison.Ordinal)) { text = text.Replace(close2, "?" + closeTag + Environment.NewLine); } if (text.EndsWith(close3, StringComparison.Ordinal)) { text = text.Substring(0, text.Length - close3.Length) + closeTag; } if (text.Contains(close4)) { text = text.Replace(close4, closeTag + Environment.NewLine); } // e.g: ! </i><br>Foobar if (text.StartsWith(open1, StringComparison.Ordinal)) { text = openTag + text.Substring(open1.Length); } // e.g.: <i>\r\n if (text.StartsWith(open3, StringComparison.Ordinal)) { text = text.Remove(openTag.Length, Environment.NewLine.Length); } // e.g.: \r\n</i> if (text.EndsWith(close5, StringComparison.Ordinal)) { text = text.Remove(text.Length - openTag.Length - Environment.NewLine.Length - 1, Environment.NewLine.Length); } if (text.Contains(open2, StringComparison.Ordinal)) { text = text.Replace(open2, Environment.NewLine + openTag); } // Hi <i> bad</i> man! -> Hi <i>bad</i> man! text = text.Replace(" " + openTag + " ", " " + openTag); text = text.Replace(Environment.NewLine + openTag + " ", Environment.NewLine + openTag); // Hi <i>bad </i> man! -> Hi <i>bad</i> man! text = text.Replace(" " + closeTag + " ", closeTag + " "); text = text.Replace(" " + closeTag + Environment.NewLine, closeTag + Environment.NewLine); text = text.Trim(); if (text.StartsWith(open1, StringComparison.Ordinal)) { text = openTag + text.Substring(open1.Length); } return(text); }
public static string AutoBreakLine(string text, int maximumLength, int mergeLinesShorterThan, string language) { if (text == null || text.Length < 3) { return(text); } // do not autobreak dialogs if (Contains(text, '-') && text.Contains(Environment.NewLine)) { var noTagLines = HtmlUtil.RemoveHtmlTags(text, true).SplitToLines(); if (noTagLines.Length == 2) { var arr0 = noTagLines[0].Trim().TrimEnd('"', '\'').TrimEnd(); if (arr0.StartsWith('-') && noTagLines[1].TrimStart().StartsWith('-') && arr0.Length > 1 && (Contains(".?!)]", arr0[arr0.Length - 1]) || arr0.EndsWith("--", StringComparison.Ordinal) || arr0.EndsWith('–'))) { return(text); } } } string s = RemoveLineBreaks(text); if (HtmlUtil.RemoveHtmlTags(s, true).Length < mergeLinesShorterThan) { return(s); } var htmlTags = new Dictionary <int, string>(); var sb = new StringBuilder(); int six = 0; while (six < s.Length) { var letter = s[six]; bool tagFound = false; if (letter == '<') { string tagString = s.Substring(six); tagFound = tagString.StartsWith("<font", StringComparison.OrdinalIgnoreCase) || tagString.StartsWith("</font", StringComparison.OrdinalIgnoreCase) || tagString.StartsWith("<u", StringComparison.OrdinalIgnoreCase) || tagString.StartsWith("</u", StringComparison.OrdinalIgnoreCase) || tagString.StartsWith("<b", StringComparison.OrdinalIgnoreCase) || tagString.StartsWith("</b", StringComparison.OrdinalIgnoreCase) || tagString.StartsWith("<i", StringComparison.OrdinalIgnoreCase) || tagString.StartsWith("</i", StringComparison.OrdinalIgnoreCase); } int endIndex = -1; if (tagFound) { endIndex = s.IndexOf('>', six + 1); } if (tagFound && endIndex > 0) { string tag = s.Substring(six, endIndex - six + 1); s = s.Remove(six, tag.Length); if (htmlTags.ContainsKey(six)) { htmlTags[six] = htmlTags[six] + tag; } else { htmlTags.Add(six, tag); } } else { sb.Append(letter); six++; } } s = sb.ToString(); int splitPos = -1; int mid = s.Length / 2; // try to find " - " with uppercase letter after (dialog) if (s.Contains(" - ")) { for (int j = 0; j <= (maximumLength / 2) + 5; j++) { if (mid + j + 4 < s.Length) { if (s[mid + j] == '-' && s[mid + j + 1] == ' ' && s[mid + j - 1] == ' ') { string rest = s.Substring(mid + j + 1).TrimStart(); if (rest.Length > 0 && char.IsUpper(rest[0])) { splitPos = mid + j; break; } } } if (mid - (j + 1) > 4) { if (s[mid - j] == '-' && s[mid - j + 1] == ' ' && s[mid - j - 1] == ' ') { string rest = s.Substring(mid - j + 1).TrimStart(); if (rest.Length > 0 && char.IsUpper(rest[0])) { if (mid - j > 5 && s[mid - j - 1] == ' ') { if (Contains("!?.", s[mid - j - 2])) { splitPos = mid - j; break; } var first = s.Substring(0, mid - j - 1); if (first.EndsWith(".\"", StringComparison.Ordinal) || first.EndsWith("!\"", StringComparison.Ordinal) || first.EndsWith("?\"", StringComparison.Ordinal)) { splitPos = mid - j; break; } } } } } } } if (splitPos == maximumLength + 1 && s[maximumLength] != ' ') // only allow space for last char (as it does not count) { splitPos = -1; } if (splitPos < 0) { const string expectedChars1 = ".!?0123456789"; const string expectedChars2 = ".!?"; for (int j = 0; j < 15; j++) { if (mid + j + 1 < s.Length && mid + j > 0) { if (Contains(expectedChars2, s[mid + j]) && !IsPartOfNumber(s, mid + j) && CanBreak(s, mid + j + 1, language)) { splitPos = mid + j + 1; if (Contains(expectedChars1, s[splitPos])) { // do not break double/tripple end lines like "!!!" or "..." splitPos++; if (Contains(expectedChars1, s[mid + j + 1])) { splitPos++; } } break; } if (Contains(expectedChars2, s[mid - j]) && !IsPartOfNumber(s, mid - j) && CanBreak(s, mid - j, language)) { splitPos = mid - j; splitPos++; break; } } } } if (splitPos > maximumLength) // too long first line { if (splitPos != maximumLength + 1 || s[maximumLength] != ' ') // allow for maxlength+1 char to be space (does not count) { splitPos = -1; } } else if (splitPos >= 0 && s.Length - splitPos > maximumLength) // too long second line { splitPos = -1; } if (splitPos < 0) { const string expectedChars1 = ".!?, "; const string expectedChars2 = " .!?"; const string expectedChars3 = ".!?"; for (int j = 0; j < 25; j++) { if (mid + j + 1 < s.Length && mid + j > 0) { if (Contains(expectedChars1, s[mid + j]) && !IsPartOfNumber(s, mid + j) && s.Length > mid + j + 2 && CanBreak(s, mid + j, language)) { splitPos = mid + j; if (Contains(expectedChars2, s[mid + j + 1])) { splitPos++; if (Contains(expectedChars2, s[mid + j + 2])) { splitPos++; } } break; } if (Contains(expectedChars1, s[mid - j]) && !IsPartOfNumber(s, mid - j) && s.Length > mid + j + 2 && CanBreak(s, mid - j, language)) { splitPos = mid - j; if (Contains(expectedChars3, s[splitPos])) { splitPos--; } if (Contains(expectedChars3, s[splitPos])) { splitPos--; } if (Contains(expectedChars3, s[splitPos])) { splitPos--; } break; } } } } if (splitPos < 0) { splitPos = mid; s = s.Insert(mid - 1, Environment.NewLine); s = ReInsertHtmlTags(s, htmlTags); htmlTags = new Dictionary <int, string>(); s = s.Replace(Environment.NewLine, "-"); } if (splitPos < s.Length - 2) { s = s.Substring(0, splitPos) + Environment.NewLine + s.Substring(splitPos); } s = ReInsertHtmlTags(s, htmlTags); var idx = s.IndexOf(Environment.NewLine + "</", StringComparison.Ordinal); if (idx > 2) { var endIdx = s.IndexOf('>', idx + 2); if (endIdx > idx) { var tag = s.Substring(idx + Environment.NewLine.Length, endIdx - (idx + Environment.NewLine.Length) + 1); s = s.Insert(idx, tag); s = s.Remove(idx + tag.Length + Environment.NewLine.Length, tag.Length); } } s = s.Replace(" " + Environment.NewLine, Environment.NewLine); s = s.Replace(Environment.NewLine + " ", Environment.NewLine); return(s.TrimEnd()); }
public static int CountWords(this string source) { return(HtmlUtil.RemoveHtmlTags(source, true).Split(new[] { ' ', '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries).Length); }