Пример #1
0
        public static string RemoveSpaceBeforeAfterTag(string text, string openTag)
        {
            text = HtmlUtil.FixUpperTags(text);
            var closeTag = string.Empty;

            switch (openTag)
            {
            case "<i>":
                closeTag = "</i>";
                break;

            case "<b>":
                closeTag = "</b>";
                break;

            case "<u>":
                closeTag = "</u>";
                break;
            }

            if (closeTag.Length == 0 && openTag.Contains("<font ", StringComparison.Ordinal))
            {
                closeTag = "</font>";
            }

            // Open tags
            var open1 = openTag + " ";
            var open2 = Environment.NewLine + openTag + " ";
            var open3 = openTag + Environment.NewLine;

            // Closing tags
            var close1 = "! " + closeTag + Environment.NewLine;
            var close2 = "? " + closeTag + Environment.NewLine;
            var close3 = " " + closeTag;
            var close4 = " " + closeTag + Environment.NewLine;
            var close5 = Environment.NewLine + closeTag;

            if (text.Contains(close1, StringComparison.Ordinal))
            {
                text = text.Replace(close1, "!" + closeTag + Environment.NewLine);
            }

            if (text.Contains(close2, StringComparison.Ordinal))
            {
                text = text.Replace(close2, "?" + closeTag + Environment.NewLine);
            }

            if (text.EndsWith(close3, StringComparison.Ordinal))
            {
                text = text.Substring(0, text.Length - close3.Length) + closeTag;
            }

            if (text.Contains(close4))
            {
                text = text.Replace(close4, closeTag + Environment.NewLine);
            }

            // e.g: ! </i><br>Foobar
            if (text.StartsWith(open1, StringComparison.Ordinal))
            {
                text = openTag + text.Substring(open1.Length);
            }

            // e.g.: <i>\r\n
            if (text.StartsWith(open3, StringComparison.Ordinal))
            {
                text = text.Remove(openTag.Length, Environment.NewLine.Length);
            }

            // e.g.: \r\n</i>
            if (text.EndsWith(close5, StringComparison.Ordinal))
            {
                text = text.Remove(text.Length - openTag.Length - Environment.NewLine.Length - 1, Environment.NewLine.Length);
            }

            if (text.Contains(open2, StringComparison.Ordinal))
            {
                text = text.Replace(open2, Environment.NewLine + openTag);
            }

            // Hi <i> bad</i> man! -> Hi <i>bad</i> man!
            text = text.Replace(" " + openTag + " ", " " + openTag);
            text = text.Replace(Environment.NewLine + openTag + " ", Environment.NewLine + openTag);

            // Hi <i>bad </i> man! -> Hi <i>bad</i> man!
            text = text.Replace(" " + closeTag + " ", closeTag + " ");
            text = text.Replace(" " + closeTag + Environment.NewLine, closeTag + Environment.NewLine);

            text = text.Trim();
            if (text.StartsWith(open1, StringComparison.Ordinal))
            {
                text = openTag + text.Substring(open1.Length);
            }

            return(text);
        }
Пример #2
0
        public static string AutoBreakLine(string text, int maximumLength, int mergeLinesShorterThan, string language)
        {
            if (text == null || text.Length < 3)
            {
                return(text);
            }

            // do not autobreak dialogs
            if (Contains(text, '-') && text.Contains(Environment.NewLine))
            {
                var noTagLines = HtmlUtil.RemoveHtmlTags(text, true).SplitToLines();
                if (noTagLines.Length == 2)
                {
                    var arr0 = noTagLines[0].Trim().TrimEnd('"', '\'').TrimEnd();
                    if (arr0.StartsWith('-') && noTagLines[1].TrimStart().StartsWith('-') && arr0.Length > 1 && (Contains(".?!)]", arr0[arr0.Length - 1]) || arr0.EndsWith("--", StringComparison.Ordinal) || arr0.EndsWith('–')))
                    {
                        return(text);
                    }
                }
            }

            string s = RemoveLineBreaks(text);

            if (HtmlUtil.RemoveHtmlTags(s, true).Length < mergeLinesShorterThan)
            {
                return(s);
            }

            var htmlTags = new Dictionary <int, string>();
            var sb       = new StringBuilder();
            int six      = 0;

            while (six < s.Length)
            {
                var  letter   = s[six];
                bool tagFound = false;
                if (letter == '<')
                {
                    string tagString = s.Substring(six);
                    tagFound = tagString.StartsWith("<font", StringComparison.OrdinalIgnoreCase) ||
                               tagString.StartsWith("</font", StringComparison.OrdinalIgnoreCase) ||
                               tagString.StartsWith("<u", StringComparison.OrdinalIgnoreCase) ||
                               tagString.StartsWith("</u", StringComparison.OrdinalIgnoreCase) ||
                               tagString.StartsWith("<b", StringComparison.OrdinalIgnoreCase) ||
                               tagString.StartsWith("</b", StringComparison.OrdinalIgnoreCase) ||
                               tagString.StartsWith("<i", StringComparison.OrdinalIgnoreCase) ||
                               tagString.StartsWith("</i", StringComparison.OrdinalIgnoreCase);
                }

                int endIndex = -1;
                if (tagFound)
                {
                    endIndex = s.IndexOf('>', six + 1);
                }

                if (tagFound && endIndex > 0)
                {
                    string tag = s.Substring(six, endIndex - six + 1);
                    s = s.Remove(six, tag.Length);
                    if (htmlTags.ContainsKey(six))
                    {
                        htmlTags[six] = htmlTags[six] + tag;
                    }
                    else
                    {
                        htmlTags.Add(six, tag);
                    }
                }
                else
                {
                    sb.Append(letter);
                    six++;
                }
            }
            s = sb.ToString();

            int splitPos = -1;
            int mid      = s.Length / 2;

            // try to find " - " with uppercase letter after (dialog)
            if (s.Contains(" - "))
            {
                for (int j = 0; j <= (maximumLength / 2) + 5; j++)
                {
                    if (mid + j + 4 < s.Length)
                    {
                        if (s[mid + j] == '-' && s[mid + j + 1] == ' ' && s[mid + j - 1] == ' ')
                        {
                            string rest = s.Substring(mid + j + 1).TrimStart();
                            if (rest.Length > 0 && char.IsUpper(rest[0]))
                            {
                                splitPos = mid + j;
                                break;
                            }
                        }
                    }
                    if (mid - (j + 1) > 4)
                    {
                        if (s[mid - j] == '-' && s[mid - j + 1] == ' ' && s[mid - j - 1] == ' ')
                        {
                            string rest = s.Substring(mid - j + 1).TrimStart();
                            if (rest.Length > 0 && char.IsUpper(rest[0]))
                            {
                                if (mid - j > 5 && s[mid - j - 1] == ' ')
                                {
                                    if (Contains("!?.", s[mid - j - 2]))
                                    {
                                        splitPos = mid - j;
                                        break;
                                    }
                                    var first = s.Substring(0, mid - j - 1);
                                    if (first.EndsWith(".\"", StringComparison.Ordinal) || first.EndsWith("!\"", StringComparison.Ordinal) || first.EndsWith("?\"", StringComparison.Ordinal))
                                    {
                                        splitPos = mid - j;
                                        break;
                                    }
                                }
                            }
                        }
                    }
                }
            }

            if (splitPos == maximumLength + 1 && s[maximumLength] != ' ') // only allow space for last char (as it does not count)
            {
                splitPos = -1;
            }

            if (splitPos < 0)
            {
                const string expectedChars1 = ".!?0123456789";
                const string expectedChars2 = ".!?";
                for (int j = 0; j < 15; j++)
                {
                    if (mid + j + 1 < s.Length && mid + j > 0)
                    {
                        if (Contains(expectedChars2, s[mid + j]) && !IsPartOfNumber(s, mid + j) && CanBreak(s, mid + j + 1, language))
                        {
                            splitPos = mid + j + 1;
                            if (Contains(expectedChars1, s[splitPos]))
                            { // do not break double/tripple end lines like "!!!" or "..."
                                splitPos++;
                                if (Contains(expectedChars1, s[mid + j + 1]))
                                {
                                    splitPos++;
                                }
                            }
                            break;
                        }
                        if (Contains(expectedChars2, s[mid - j]) && !IsPartOfNumber(s, mid - j) && CanBreak(s, mid - j, language))
                        {
                            splitPos = mid - j;
                            splitPos++;
                            break;
                        }
                    }
                }
            }

            if (splitPos > maximumLength)                                     // too long first line
            {
                if (splitPos != maximumLength + 1 || s[maximumLength] != ' ') // allow for maxlength+1 char to be space (does not count)
                {
                    splitPos = -1;
                }
            }
            else if (splitPos >= 0 && s.Length - splitPos > maximumLength) // too long second line
            {
                splitPos = -1;
            }

            if (splitPos < 0)
            {
                const string expectedChars1 = ".!?, ";
                const string expectedChars2 = " .!?";
                const string expectedChars3 = ".!?";
                for (int j = 0; j < 25; j++)
                {
                    if (mid + j + 1 < s.Length && mid + j > 0)
                    {
                        if (Contains(expectedChars1, s[mid + j]) && !IsPartOfNumber(s, mid + j) && s.Length > mid + j + 2 && CanBreak(s, mid + j, language))
                        {
                            splitPos = mid + j;
                            if (Contains(expectedChars2, s[mid + j + 1]))
                            {
                                splitPos++;
                                if (Contains(expectedChars2, s[mid + j + 2]))
                                {
                                    splitPos++;
                                }
                            }
                            break;
                        }
                        if (Contains(expectedChars1, s[mid - j]) && !IsPartOfNumber(s, mid - j) && s.Length > mid + j + 2 && CanBreak(s, mid - j, language))
                        {
                            splitPos = mid - j;
                            if (Contains(expectedChars3, s[splitPos]))
                            {
                                splitPos--;
                            }
                            if (Contains(expectedChars3, s[splitPos]))
                            {
                                splitPos--;
                            }
                            if (Contains(expectedChars3, s[splitPos]))
                            {
                                splitPos--;
                            }
                            break;
                        }
                    }
                }
            }

            if (splitPos < 0)
            {
                splitPos = mid;
                s        = s.Insert(mid - 1, Environment.NewLine);
                s        = ReInsertHtmlTags(s, htmlTags);
                htmlTags = new Dictionary <int, string>();
                s        = s.Replace(Environment.NewLine, "-");
            }
            if (splitPos < s.Length - 2)
            {
                s = s.Substring(0, splitPos) + Environment.NewLine + s.Substring(splitPos);
            }

            s = ReInsertHtmlTags(s, htmlTags);
            var idx = s.IndexOf(Environment.NewLine + "</", StringComparison.Ordinal);

            if (idx > 2)
            {
                var endIdx = s.IndexOf('>', idx + 2);
                if (endIdx > idx)
                {
                    var tag = s.Substring(idx + Environment.NewLine.Length, endIdx - (idx + Environment.NewLine.Length) + 1);
                    s = s.Insert(idx, tag);
                    s = s.Remove(idx + tag.Length + Environment.NewLine.Length, tag.Length);
                }
            }
            s = s.Replace(" " + Environment.NewLine, Environment.NewLine);
            s = s.Replace(Environment.NewLine + " ", Environment.NewLine);
            return(s.TrimEnd());
        }
Пример #3
0
 public static int CountWords(this string source)
 {
     return(HtmlUtil.RemoveHtmlTags(source, true).Split(new[] { ' ', '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries).Length);
 }