public void StripableTextFontDontTouch()
 {
     var st = new StripableText("{MAN} Hi, how are you today!");
     Assert.AreEqual(st.Pre, "");
     Assert.AreEqual(st.Post, "!");
     Assert.AreEqual(st.StrippedText, "{MAN} Hi, how are you today");
 }
 public void StripableOnlyPre3()
 {
     var st = new StripableText("<i>");
     Assert.AreEqual(st.Pre, "<i>");
     Assert.AreEqual(st.Post, "");
     Assert.AreEqual(st.StrippedText, "");
 }
 public void StripableTextItalic2()
 {
     var st = new StripableText("<i>O</i>");
     Assert.AreEqual(st.Pre, "<i>");
     Assert.AreEqual(st.Post, "</i>");
     Assert.AreEqual(st.StrippedText, "O");
 }
 public void StripableTextItalic3()
 {
     var st = new StripableText("<i>Hi!");
     Assert.AreEqual(st.Pre, "<i>");
     Assert.AreEqual(st.Post, "!");
     Assert.AreEqual(st.StrippedText, "Hi");
 }
 public void StripableTextFont()
 {
     var st = new StripableText("<font color=\"red\">Hi!</font>");
     Assert.AreEqual(st.Pre, "<font color=\"red\">");
     Assert.AreEqual(st.Post, "!</font>");
     Assert.AreEqual(st.StrippedText, "Hi");
 }
 public void StripableTextAss()
 {
     var st = new StripableText("{\\an9}Hi!");
     Assert.AreEqual(st.Pre, "{\\an9}");
     Assert.AreEqual(st.Post, "!");
     Assert.AreEqual(st.StrippedText, "Hi");
 }
        private void FixSpanishInvertedLetter(char mark, string inverseMark, Paragraph p, Paragraph last, ref bool wasLastLineClosed, string fixAction, ref int fixCount)
        {
            if (p.Text.Contains(mark))
            {
                bool skip = false;
                if (last != null && p.Text.Contains(mark) && !p.Text.Contains(inverseMark) && last.Text.Contains(inverseMark) && !last.Text.Contains(mark))
                    skip = true;

                if (!skip && Utilities.CountTagInText(p.Text, mark) == Utilities.CountTagInText(p.Text, inverseMark) &&
                    HtmlUtil.RemoveHtmlTags(p.Text).TrimStart(inverseMark[0]).Contains(inverseMark) == false &&
                    HtmlUtil.RemoveHtmlTags(p.Text).TrimEnd(mark).Contains(mark) == false)
                {
                    skip = true;
                }

                if (!skip)
                {
                    int startIndex = 0;
                    int markIndex = p.Text.IndexOf(mark);
                    if (!wasLastLineClosed && ((p.Text.IndexOf('!') > 0 && p.Text.IndexOf('!') < markIndex) ||
                                               (p.Text.IndexOf('?') > 0 && p.Text.IndexOf('?') < markIndex) ||
                                               (p.Text.IndexOf('.') > 0 && p.Text.IndexOf('.') < markIndex)))
                        wasLastLineClosed = true;
                    while (markIndex > 0 && startIndex < p.Text.Length)
                    {
                        int inverseMarkIndex = p.Text.IndexOf(inverseMark, startIndex, StringComparison.Ordinal);
                        if (wasLastLineClosed && (inverseMarkIndex < 0 || inverseMarkIndex > markIndex))
                        {
                            if (AllowFix(p, fixAction))
                            {
                                int j = markIndex - 1;

                                while (j > startIndex && (p.Text[j] == '.' || p.Text[j] == '!' || p.Text[j] == '?'))
                                    j--;

                                while (j > startIndex &&
                                       (p.Text[j] != '.' || IsSpanishAbbreviation(p.Text, j)) &&
                                       p.Text[j] != '!' &&
                                       p.Text[j] != '?' &&
                                       !(j > 3 && p.Text.Substring(j - 3, 3) == Environment.NewLine + "-") &&
                                       !(j > 4 && p.Text.Substring(j - 4, 4) == Environment.NewLine + " -") &&
                                       !(j > 6 && p.Text.Substring(j - 6, 6) == Environment.NewLine + "<i>-"))
                                    j--;

                                if (@".!?".Contains(p.Text[j]))
                                {
                                    j++;
                                }
                                if (j + 3 < p.Text.Length && p.Text.Substring(j + 1, 2) == Environment.NewLine)
                                {
                                    j += 3;
                                }
                                else if (j + 2 < p.Text.Length && p.Text.Substring(j, 2) == Environment.NewLine)
                                {
                                    j += 2;
                                }
                                if (j >= startIndex)
                                {
                                    string part = p.Text.Substring(j, markIndex - j + 1);

                                    string speaker = string.Empty;
                                    int speakerEnd = part.IndexOf(')');
                                    if (part.StartsWith('(') && speakerEnd > 0 && speakerEnd < part.IndexOf(mark))
                                    {
                                        while (Environment.NewLine.Contains(part[speakerEnd + 1]))
                                            speakerEnd++;
                                        speaker = part.Substring(0, speakerEnd + 1);
                                        part = part.Substring(speakerEnd + 1);
                                    }
                                    speakerEnd = part.IndexOf(']');
                                    if (part.StartsWith('[') && speakerEnd > 0 && speakerEnd < part.IndexOf(mark))
                                    {
                                        while (Environment.NewLine.Contains(part[speakerEnd + 1]))
                                            speakerEnd++;
                                        speaker = part.Substring(0, speakerEnd + 1);
                                        part = part.Substring(speakerEnd + 1);
                                    }

                                    var st = new StripableText(part);
                                    if (j == 0 && mark == '!' && st.Pre == "¿" && Utilities.CountTagInText(p.Text, mark) == 1 && HtmlUtil.RemoveHtmlTags(p.Text).EndsWith(mark))
                                    {
                                        p.Text = inverseMark + p.Text;
                                    }
                                    else if (j == 0 && mark == '?' && st.Pre == "¡" && Utilities.CountTagInText(p.Text, mark) == 1 && HtmlUtil.RemoveHtmlTags(p.Text).EndsWith(mark))
                                    {
                                        p.Text = inverseMark + p.Text;
                                    }
                                    else
                                    {
                                        string temp = inverseMark;
                                        int addToIndex = 0;
                                        while (p.Text.Length > markIndex + 1 && p.Text[markIndex + 1] == mark &&
                                            Utilities.CountTagInText(p.Text, mark) > Utilities.CountTagInText(p.Text + temp, inverseMark))
                                        {
                                            temp += inverseMark;
                                            st.Post += mark;
                                            markIndex++;
                                            addToIndex++;
                                        }

                                        p.Text = p.Text.Remove(j, markIndex - j + 1).Insert(j, speaker + st.Pre + temp + st.StrippedText + st.Post);
                                        markIndex += addToIndex;
                                    }
                                }
                            }
                        }
                        else if (last != null && !wasLastLineClosed && inverseMarkIndex == p.Text.IndexOf(mark) && !last.Text.Contains(inverseMark))
                        {
                            string lastOldtext = last.Text;
                            int idx = last.Text.Length - 2;
                            while (idx > 0 && (last.Text.Substring(idx, 2) != ". ") && (last.Text.Substring(idx, 2) != "! ") && (last.Text.Substring(idx, 2) != "? "))
                                idx--;

                            last.Text = last.Text.Insert(idx, inverseMark);
                            fixCount++;
                            AddFixToListView(last, fixAction, lastOldtext, last.Text);
                        }

                        startIndex = markIndex + 2;
                        if (startIndex < p.Text.Length)
                            markIndex = p.Text.IndexOf(mark, startIndex);
                        else
                            markIndex = -1;
                        wasLastLineClosed = true;
                    }
                }
                if (p.Text.EndsWith(mark + "...", StringComparison.Ordinal) && p.Text.Length > 4)
                {
                    p.Text = p.Text.Remove(p.Text.Length - 4, 4) + "..." + mark;
                }
            }
            else if (Utilities.CountTagInText(p.Text, inverseMark) == 1)
            {
                int idx = p.Text.IndexOf(inverseMark, StringComparison.Ordinal);
                while (idx < p.Text.Length && !@".!?".Contains(p.Text[idx]))
                {
                    idx++;
                }
                if (idx < p.Text.Length)
                {
                    p.Text = p.Text.Insert(idx, mark.ToString(CultureInfo.InvariantCulture));
                    if (p.Text.Contains("¡¿") && p.Text.Contains("!?"))
                        p.Text = p.Text.Replace("!?", "?!");
                    if (p.Text.Contains("¿¡") && p.Text.Contains("?!"))
                        p.Text = p.Text.Replace("?!", "!?");
                }
            }
        }
        public static string FixStartWithUppercaseLetterAfterParagraph(Paragraph p, Paragraph prev, Encoding encoding, string language)
        {
            if (p.Text != null && p.Text.Length > 1)
            {
                string text = p.Text;
                string pre = string.Empty;
                if (text.Length > 4 && text.StartsWith("<i> ", StringComparison.Ordinal))
                {
                    pre = "<i> ";
                    text = text.Substring(4);
                }
                if (text.Length > 3 && text.StartsWith("<i>", StringComparison.Ordinal))
                {
                    pre = "<i>";
                    text = text.Substring(3);
                }
                if (text.Length > 4 && text.StartsWith("<I> ", StringComparison.Ordinal))
                {
                    pre = "<I> ";
                    text = text.Substring(4);
                }
                if (text.Length > 3 && text.StartsWith("<I>", StringComparison.Ordinal))
                {
                    pre = "<I>";
                    text = text.Substring(3);
                }
                if (text.Length > 2 && text.StartsWith('♪'))
                {
                    pre = pre + "♪";
                    text = text.Substring(1);
                }
                if (text.Length > 2 && text.StartsWith(' '))
                {
                    pre = pre + " ";
                    text = text.Substring(1);
                }
                if (text.Length > 2 && text.StartsWith('♫'))
                {
                    pre = pre + "♫";
                    text = text.Substring(1);
                }
                if (text.Length > 2 && text.StartsWith(' '))
                {
                    pre = pre + " ";
                    text = text.Substring(1);
                }

                var firstLetter = text[0];

                string prevText = " .";
                if (prev != null)
                    prevText = HtmlUtil.RemoveHtmlTags(prev.Text);

                bool isPrevEndOfLine = FixCommonErrorsHelper.IsPreviousTextEndOfParagraph(prevText);
                if (prevText == " .")
                    isPrevEndOfLine = true;
                if ((!text.StartsWith("www.", StringComparison.Ordinal) && !text.StartsWith("http:", StringComparison.Ordinal) && !text.StartsWith("https:", StringComparison.Ordinal)) &&
                    (char.IsLower(firstLetter) || IsTurkishLittleI(firstLetter, encoding, language)) &&
                    !char.IsDigit(firstLetter) &&
                    isPrevEndOfLine)
                {
                    bool isMatchInKnowAbbreviations = language == "en" &&
                        (prevText.EndsWith(" o.r.", StringComparison.Ordinal) ||
                         prevText.EndsWith(" a.m.", StringComparison.Ordinal) ||
                         prevText.EndsWith(" p.m.", StringComparison.Ordinal));

                    if (!isMatchInKnowAbbreviations)
                    {
                        if (IsTurkishLittleI(firstLetter, encoding, language))
                            p.Text = pre + GetTurkishUppercaseLetter(firstLetter, encoding) + text.Substring(1);
                        else if (language == "en" && (text.StartsWith("l ", StringComparison.Ordinal) || text.StartsWith("l-I", StringComparison.Ordinal) || text.StartsWith("ls ", StringComparison.Ordinal) || text.StartsWith("lnterested") ||
                                                      text.StartsWith("lsn't ", StringComparison.Ordinal) || text.StartsWith("ldiot", StringComparison.Ordinal) || text.StartsWith("ln", StringComparison.Ordinal) || text.StartsWith("lm", StringComparison.Ordinal) ||
                                                      text.StartsWith("ls", StringComparison.Ordinal) || text.StartsWith("lt", StringComparison.Ordinal) || text.StartsWith("lf ", StringComparison.Ordinal) || text.StartsWith("lc", StringComparison.Ordinal) || text.StartsWith("l'm ", StringComparison.Ordinal)) || text.StartsWith("l am ", StringComparison.Ordinal)) // l > I
                            p.Text = pre + "I" + text.Substring(1);
                        else
                            p.Text = pre + char.ToUpper(firstLetter) + text.Substring(1);
                    }
                }
            }

            if (p.Text != null && p.Text.Contains(Environment.NewLine))
            {
                var arr = p.Text.SplitToLines();
                if (arr.Length == 2 && arr[1].Length > 1)
                {
                    string text = arr[1];
                    string pre = string.Empty;
                    if (text.Length > 4 && text.StartsWith("<i> ", StringComparison.Ordinal))
                    {
                        pre = "<i> ";
                        text = text.Substring(4);
                    }
                    if (text.Length > 3 && text.StartsWith("<i>", StringComparison.Ordinal))
                    {
                        pre = "<i>";
                        text = text.Substring(3);
                    }
                    if (text.Length > 4 && text.StartsWith("<I> ", StringComparison.Ordinal))
                    {
                        pre = "<I> ";
                        text = text.Substring(4);
                    }
                    if (text.Length > 3 && text.StartsWith("<I>", StringComparison.Ordinal))
                    {
                        pre = "<I>";
                        text = text.Substring(3);
                    }
                    if (text.Length > 2 && text.StartsWith('♪'))
                    {
                        pre = pre + "♪";
                        text = text.Substring(1);
                    }
                    if (text.Length > 2 && text.StartsWith(' '))
                    {
                        pre = pre + " ";
                        text = text.Substring(1);
                    }
                    if (text.Length > 2 && text.StartsWith('♫'))
                    {
                        pre = pre + "♫";
                        text = text.Substring(1);
                    }
                    if (text.Length > 2 && text.StartsWith(' '))
                    {
                        pre = pre + " ";
                        text = text.Substring(1);
                    }

                    char firstLetter = text[0];
                    string prevText = HtmlUtil.RemoveHtmlTags(arr[0]);
                    bool isPrevEndOfLine = FixCommonErrorsHelper.IsPreviousTextEndOfParagraph(prevText);
                    if ((!text.StartsWith("www.", StringComparison.Ordinal) && !text.StartsWith("http:", StringComparison.Ordinal) && !text.StartsWith("https:", StringComparison.Ordinal)) &&
                        (char.IsLower(firstLetter) || IsTurkishLittleI(firstLetter, encoding, language)) &&
                        !prevText.EndsWith("...", StringComparison.Ordinal) &&
                        isPrevEndOfLine)
                    {
                        bool isMatchInKnowAbbreviations = language == "en" &&
                            (prevText.EndsWith(" o.r.", StringComparison.Ordinal) ||
                             prevText.EndsWith(" a.m.", StringComparison.Ordinal) ||
                             prevText.EndsWith(" p.m.", StringComparison.Ordinal));

                        if (!isMatchInKnowAbbreviations)
                        {
                            if (IsTurkishLittleI(firstLetter, encoding, language))
                                text = pre + GetTurkishUppercaseLetter(firstLetter, encoding) + text.Substring(1);
                            else if (language == "en" && (text.StartsWith("l ", StringComparison.Ordinal) || text.StartsWith("l-I", StringComparison.Ordinal) || text.StartsWith("ls ") || text.StartsWith("lnterested") ||
                                                     text.StartsWith("lsn't ", StringComparison.Ordinal) || text.StartsWith("ldiot", StringComparison.Ordinal) || text.StartsWith("ln", StringComparison.Ordinal) || text.StartsWith("lm", StringComparison.Ordinal) ||
                                                     text.StartsWith("ls", StringComparison.Ordinal) || text.StartsWith("lt", StringComparison.Ordinal) || text.StartsWith("lf ", StringComparison.Ordinal) || text.StartsWith("lc", StringComparison.Ordinal) || text.StartsWith("l'm ", StringComparison.Ordinal)) || text.StartsWith("l am ", StringComparison.Ordinal)) // l > I
                                text = pre + "I" + text.Substring(1);
                            else
                                text = pre + char.ToUpper(firstLetter) + text.Substring(1);
                            p.Text = arr[0] + Environment.NewLine + text;
                        }
                    }

                    arr = p.Text.SplitToLines();
                    if ((arr[0].StartsWith('-') || arr[0].StartsWith("<i>-", StringComparison.Ordinal)) &&
                        (arr[1].StartsWith('-') || arr[1].StartsWith("<i>-", StringComparison.Ordinal)) &&
                        !arr[0].StartsWith("--", StringComparison.Ordinal) && !arr[0].StartsWith("<i>--", StringComparison.Ordinal) &&
                        !arr[1].StartsWith("--", StringComparison.Ordinal) && !arr[1].StartsWith("<i>--", StringComparison.Ordinal))
                    {
                        if (isPrevEndOfLine && arr[1].StartsWith("<i>- ", StringComparison.Ordinal) && arr[1].Length > 6)
                        {
                            p.Text = arr[0] + Environment.NewLine + "<i>- " + char.ToUpper(arr[1][5]) + arr[1].Remove(0, 6);
                        }
                        else if (isPrevEndOfLine && arr[1].StartsWith("- ", StringComparison.Ordinal) && arr[1].Length > 3)
                        {
                            p.Text = arr[0] + Environment.NewLine + "- " + char.ToUpper(arr[1][2]) + arr[1].Remove(0, 3);
                        }
                        arr = p.Text.SplitToLines();

                        prevText = " .";
                        if (prev != null && p.StartTime.TotalMilliseconds - 10000 < prev.EndTime.TotalMilliseconds)
                            prevText = HtmlUtil.RemoveHtmlTags(prev.Text);
                        bool isPrevLineEndOfLine = FixCommonErrorsHelper.IsPreviousTextEndOfParagraph(prevText);
                        if (isPrevLineEndOfLine && arr[0].StartsWith("<i>- ", StringComparison.Ordinal) && arr[0].Length > 6)
                        {
                            p.Text = "<i>- " + char.ToUpper(arr[0][5]) + arr[0].Remove(0, 6) + Environment.NewLine + arr[1];
                        }
                        else if (isPrevLineEndOfLine && arr[0].StartsWith("- ", StringComparison.Ordinal) && arr[0].Length > 3)
                        {
                            p.Text = "- " + char.ToUpper(arr[0][2]) + arr[0].Remove(0, 3) + Environment.NewLine + arr[1];
                        }
                    }
                }
            }

            if (p.Text.Length > 4)
            {
                int len = 0;
                int indexOfNewLine = p.Text.IndexOf(Environment.NewLine + " -", 1, StringComparison.Ordinal);
                if (indexOfNewLine < 0)
                {
                    indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "- <i> ♪", 1, StringComparison.Ordinal);
                    len = "- <i> ♪".Length;
                }
                if (indexOfNewLine < 0)
                {
                    indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "-", 1, StringComparison.Ordinal);
                    len = "-".Length;
                }
                if (indexOfNewLine < 0)
                {
                    indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "<i>-", 1, StringComparison.Ordinal);
                    len = "<i>-".Length;
                }
                if (indexOfNewLine < 0)
                {
                    indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "<i> -", 1, StringComparison.Ordinal);
                    len = "<i> -".Length;
                }
                if (indexOfNewLine < 0)
                {
                    indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "♪ -", 1, StringComparison.Ordinal);
                    len = "♪ -".Length;
                }
                if (indexOfNewLine < 0)
                {
                    indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "♪ <i> -", 1, StringComparison.Ordinal);
                    len = "♪ <i> -".Length;
                }
                if (indexOfNewLine < 0)
                {
                    indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "♪ <i>-", 1, StringComparison.Ordinal);
                    len = "♪ <i>-".Length;
                }

                if (indexOfNewLine > 0)
                {
                    string text = p.Text.Substring(indexOfNewLine + len);
                    var st = new StripableText(text);

                    if (st.StrippedText.Length > 0 && IsTurkishLittleI(st.StrippedText[0], encoding, language) && !st.Pre.EndsWith('[') && !st.Pre.Contains("..."))
                    {
                        text = st.Pre + GetTurkishUppercaseLetter(st.StrippedText[0], encoding) + st.StrippedText.Substring(1) + st.Post;
                        p.Text = p.Text.Remove(indexOfNewLine + len).Insert(indexOfNewLine + len, text);
                    }
                    else if (st.StrippedText.Length > 0 && st.StrippedText[0] != char.ToUpper(st.StrippedText[0]) && !st.Pre.EndsWith('[') && !st.Pre.Contains("..."))
                    {
                        text = st.Pre + char.ToUpper(st.StrippedText[0]) + st.StrippedText.Substring(1) + st.Post;
                        p.Text = p.Text.Remove(indexOfNewLine + len).Insert(indexOfNewLine + len, text);
                    }
                }
            }
            return p.Text;
        }
        public void FixUppercaseIInsideWords()
        {
            string fixAction = _language.FixUppercaseIInsideLowercaseWord;
            int uppercaseIsInsideLowercaseWords = 0;
            // bool isLineContinuation = false;
            for (int i = 0; i < Subtitle.Paragraphs.Count; i++)
            {
                Paragraph p = Subtitle.Paragraphs[i];
                string oldText = p.Text;

                Match match = ReAfterLowercaseLetter.Match(p.Text);
                while (match.Success)
                {
                    if (!(match.Index > 1 && p.Text.Substring(match.Index - 1, 2) == "Mc") // irish names, McDonalds etc.
                        && p.Text[match.Index + 1] == 'I'
                        && AllowFix(p, fixAction))
                    {
                        p.Text = p.Text.Substring(0, match.Index + 1) + "l";
                        if (match.Index + 2 < oldText.Length)
                            p.Text += oldText.Substring(match.Index + 2);

                        uppercaseIsInsideLowercaseWords++;
                        AddFixToListView(p, fixAction, oldText, p.Text);
                    }
                    match = match.NextMatch();
                }

                var st = new StripableText(p.Text);
                match = ReBeforeLowercaseLetter.Match(st.StrippedText);
                while (match.Success)
                {
                    string word = GetWholeWord(st.StrippedText, match.Index);
                    if (!IsName(word))
                    {
                        if (AllowFix(p, fixAction))
                        {
                            if (word.Equals("internal", StringComparison.OrdinalIgnoreCase) ||
                                word.Equals("island", StringComparison.OrdinalIgnoreCase) ||
                                word.Equals("islands", StringComparison.OrdinalIgnoreCase))
                            {
                            }
                            else if (match.Index == 0)
                            {  // first letter in paragraph

                                //too risky! - perhaps if periods is fixed at the same time... or too complicated!?
                                //if (isLineContinuation)
                                //{
                                //    st.StrippedText = st.StrippedText.Remove(match.Index, 1).Insert(match.Index, "l");
                                //    p.Text = st.MergedString;
                                //    uppercaseIsInsideLowercaseWords++;
                                //    AddFixToListView(p, fixAction, oldText, p.Text);
                                //}
                            }
                            else
                            {
                                if (match.Index > 2 && st.StrippedText[match.Index - 1] == ' ')
                                {
                                    if ((Utilities.AllLettersAndNumbers + @",").Contains(st.StrippedText[match.Index - 2])
                                        && match.Length >= 2 && Utilities.LowercaseVowels.Contains(char.ToLower(match.Value[1])))
                                    {
                                        st.StrippedText = st.StrippedText.Remove(match.Index, 1).Insert(match.Index, "l");
                                        p.Text = st.MergedString;
                                        uppercaseIsInsideLowercaseWords++;
                                        AddFixToListView(p, fixAction, oldText, p.Text);
                                    }
                                }
                                else if (match.Index > Environment.NewLine.Length + 1 && Environment.NewLine.Contains(st.StrippedText[match.Index - 1]))
                                {
                                    if ((Utilities.AllLettersAndNumbers + @",").Contains(st.StrippedText[match.Index - Environment.NewLine.Length + 1])
                                        && match.Length >= 2 && Utilities.LowercaseVowels.Contains(match.Value[1]))
                                    {
                                        st.StrippedText = st.StrippedText.Remove(match.Index, 1).Insert(match.Index, "l");
                                        p.Text = st.MergedString;
                                        uppercaseIsInsideLowercaseWords++;
                                        AddFixToListView(p, fixAction, oldText, p.Text);
                                    }
                                }
                                else if (match.Index > 1 && ((st.StrippedText[match.Index - 1] == '\"') || (st.StrippedText[match.Index - 1] == '\'') ||
                                                             (st.StrippedText[match.Index - 1] == '>') || (st.StrippedText[match.Index - 1] == '-')))
                                {
                                }
                                else
                                {
                                    var before = '\0';
                                    var after = '\0';
                                    if (match.Index > 0)
                                        before = st.StrippedText[match.Index - 1];
                                    if (match.Index < st.StrippedText.Length - 2)
                                        after = st.StrippedText[match.Index + 1];
                                    if (before != '\0' && char.IsUpper(before) && after != '\0' && char.IsLower(after) &&
                                        !Utilities.LowercaseVowels.Contains(char.ToLower(before)) && !Utilities.LowercaseVowels.Contains(after))
                                    {
                                        st.StrippedText = st.StrippedText.Remove(match.Index, 1).Insert(match.Index, "i");
                                        p.Text = st.MergedString;
                                        uppercaseIsInsideLowercaseWords++;
                                        AddFixToListView(p, fixAction, oldText, p.Text);
                                    }
                                    else if (@"‘’¡¿„“()[]♪'. @".Contains(before) && !Utilities.LowercaseVowels.Contains(char.ToLower(after)))
                                    {
                                    }
                                    else
                                    {
                                        st.StrippedText = st.StrippedText.Remove(match.Index, 1).Insert(match.Index, "l");
                                        p.Text = st.MergedString;
                                        uppercaseIsInsideLowercaseWords++;
                                        AddFixToListView(p, fixAction, oldText, p.Text);
                                    }
                                }
                            }
                        }
                    }
                    match = match.NextMatch();
                }

                //isLineContinuation = p.Text.Length > 0 && Utilities.GetLetters(true, true, false).Contains(p.Text[p.Text.Length - 1].ToString());
            }
            UpdateFixStatus(uppercaseIsInsideLowercaseWords, _language.FixUppercaseIInsindeLowercaseWords, _language.XUppercaseIsFoundInsideLowercaseWords);
        }
        public void FixMissingPeriodsAtEndOfLine()
        {
            string fixAction = _language.FixMissingPeriodAtEndOfLine;
            int missigPeriodsAtEndOfLine = 0;
            for (int i = 0; i < Subtitle.Paragraphs.Count; i++)
            {
                Paragraph p = Subtitle.Paragraphs[i];
                Paragraph next = Subtitle.GetParagraphOrDefault(i + 1);
                string nextText = string.Empty;
                if (next != null)
                    nextText = HtmlUtil.RemoveHtmlTags(next.Text).TrimStart('-', '"', '„').TrimStart();
                string tempNoHtml = HtmlUtil.RemoveHtmlTags(p.Text).TrimEnd();

                if (IsOneLineUrl(p.Text) || p.Text.Contains(new[] { '♪', '♫' }) || p.Text.EndsWith('\''))
                {
                    // ignore urls
                }
                else if (!string.IsNullOrEmpty(nextText) && next != null &&
                    next.Text.Length > 0 &&
                    Utilities.UppercaseLetters.Contains(nextText[0]) &&
                    tempNoHtml.Length > 0 &&
                    !@",.!?:;>-])♪♫…".Contains(tempNoHtml[tempNoHtml.Length - 1]))
                {
                    string tempTrimmed = tempNoHtml.TrimEnd().TrimEnd('\'', '"', '“', '”').TrimEnd();
                    if (tempTrimmed.Length > 0 && !@")]*#¶.!?".Contains(tempTrimmed[tempTrimmed.Length - 1]) && p.Text != p.Text.ToUpper())
                    {
                        //don't end the sentence if the next word is an I word as they're always capped.
                        if (!next.Text.StartsWith("I ", StringComparison.Ordinal) && !next.Text.StartsWith("I'", StringComparison.Ordinal))
                        {
                            //test to see if the first word of the next line is a name
                            if (!IsName(next.Text.Split(new[] { ' ', '.', ',', '-', '?', '!', ':', ';', '"', '(', ')', '[', ']', '{', '}', '|', '<', '>', '/', '+', '\r', '\n' })[0]) && AllowFix(p, fixAction))
                            {
                                string oldText = p.Text;
                                if (p.Text.EndsWith('>'))
                                {
                                    int lastLessThan = p.Text.LastIndexOf('<');
                                    if (lastLessThan > 0)
                                        p.Text = p.Text.Insert(lastLessThan, ".");
                                }
                                else
                                {
                                    if (p.Text.EndsWith('“') && tempNoHtml.StartsWith('„'))
                                        p.Text = p.Text.TrimEnd('“') + ".“";
                                    else if (p.Text.EndsWith('"') && tempNoHtml.StartsWith('"'))
                                        p.Text = p.Text.TrimEnd('"') + ".\"";
                                    else
                                        p.Text += ".";
                                }
                                if (p.Text != oldText)
                                {
                                    missigPeriodsAtEndOfLine++;
                                    AddFixToListView(p, fixAction, oldText, p.Text);
                                }
                            }
                        }
                    }
                }
                else if (next != null && !string.IsNullOrEmpty(p.Text) && Utilities.AllLettersAndNumbers.Contains(p.Text[p.Text.Length - 1]))
                {
                    if (p.Text != p.Text.ToUpper())
                    {
                        var st = new StripableText(next.Text);
                        if (st.StrippedText.Length > 0 && st.StrippedText != st.StrippedText.ToUpper() &&
                            Utilities.UppercaseLetters.Contains(st.StrippedText[0]))
                        {
                            if (AllowFix(p, fixAction))
                            {
                                int j = p.Text.Length - 1;
                                while (j >= 0 && !@".!?¿¡".Contains(p.Text[j]))
                                    j--;
                                string endSign = ".";
                                if (j >= 0 && p.Text[j] == '¿')
                                    endSign = "?";
                                if (j >= 0 && p.Text[j] == '¡')
                                    endSign = "!";

                                string oldText = p.Text;
                                missigPeriodsAtEndOfLine++;
                                p.Text += endSign;
                                AddFixToListView(p, fixAction, oldText, p.Text);
                            }
                        }
                    }
                }

                if (p.Text.Length > 4)
                {
                    int indexOfNewLine = p.Text.IndexOf(Environment.NewLine + " -", 3, StringComparison.Ordinal);
                    if (indexOfNewLine < 0)
                        indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "-", 3, StringComparison.Ordinal);
                    if (indexOfNewLine < 0)
                        indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "<i>-", 3, StringComparison.Ordinal);
                    if (indexOfNewLine < 0)
                        indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "<i> -", 3, StringComparison.Ordinal);
                    if (indexOfNewLine > 0 && Configuration.Settings.General.UppercaseLetters.Contains(char.ToUpper(p.Text[indexOfNewLine - 1])) && AllowFix(p, fixAction))
                    {
                        string oldText = p.Text;

                        string text = p.Text.Substring(0, indexOfNewLine);
                        var st = new StripableText(text);
                        if (st.Pre.TrimEnd().EndsWith('¿')) // Spanish ¿
                            p.Text = p.Text.Insert(indexOfNewLine, "?");
                        else if (st.Pre.TrimEnd().EndsWith('¡')) // Spanish ¡
                            p.Text = p.Text.Insert(indexOfNewLine, "!");
                        else
                            p.Text = p.Text.Insert(indexOfNewLine, ".");

                        missigPeriodsAtEndOfLine++;
                        AddFixToListView(p, fixAction, oldText, p.Text);
                    }
                }
            }
            UpdateFixStatus(missigPeriodsAtEndOfLine, _language.AddPeriods, _language.XPeriodsAdded);
        }
        private void GeneratePreview()
        {
            Cursor = Cursors.WaitCursor;
            listViewFixes.BeginUpdate();
            listViewFixes.Items.Clear();
            foreach (Paragraph p in _subtitle.Paragraphs)
            {
                string text = p.Text;
                foreach (ListViewItem item in listViewNames.Items)
                {
                    string name = item.SubItems[1].Text;

                    string textNoTags = HtmlUtil.RemoveHtmlTags(text);
                    if (textNoTags != textNoTags.ToUpper())
                    {
                        if (item.Checked && text != null && text.Contains(name, StringComparison.OrdinalIgnoreCase) && name.Length > 1 && name != name.ToLower())
                        {
                            var st = new StripableText(text);
                            st.FixCasing(new List<string> { name }, true, false, false, string.Empty);
                            text = st.MergedString;
                        }
                    }
                }
                if (text != p.Text)
                    AddToPreviewListView(p, text);
            }
            listViewFixes.EndUpdate();
            groupBoxLinesFound.Text = string.Format(Configuration.Settings.Language.ChangeCasingNames.LinesFoundX, listViewFixes.Items.Count);
            Cursor = Cursors.Default;
        }
Beispiel #12
0
        private string FixCasing(string text, string lastLine, List<string> namesEtc)
        {
            string original = text;
            if (radioButtonNormal.Checked)
            {
                if (checkBoxOnlyAllUpper.Checked && text != text.ToUpper())
                    return text;

                if (text.Length > 1)
                {
                    // first all to lower
                    text = text.ToLower().Trim();
                    text = text.FixExtraSpaces();
                    var st = new StripableText(text);
                    st.FixCasing(namesEtc, false, true, true, lastLine); // fix all casing but names (that's a seperate option)
                    text = st.MergedString;
                }
            }
            else if (radioButtonUppercase.Checked)
            {
                var st = new StripableText(text);
                text = st.Pre + st.StrippedText.ToUpper() + st.Post;
                text = HtmlUtil.FixUpperTags(text); // tags inside text
            }
            else if (radioButtonLowercase.Checked)
            {
                text = text.ToLower();
            }
            if (original != text)
                _noOfLinesChanged++;
            return text;
        }
        private string FixLowercaseIToUppercaseI(string input, string lastLine)
        {
            var sb = new StringBuilder();
            var lines = input.SplitToLines();
            for (int i = 0; i < lines.Length; i++)
            {
                string l = lines[i];

                if (i > 0)
                    lastLine = lines[i - 1];
                lastLine = HtmlUtil.RemoveHtmlTags(lastLine);

                if (string.IsNullOrEmpty(lastLine) ||
                    lastLine.EndsWith('.') ||
                    lastLine.EndsWith('!') ||
                    lastLine.EndsWith('?'))
                {
                    var st = new StripableText(l);
                    if (st.StrippedText.StartsWith('i') && !st.Pre.EndsWith('[') && !st.Pre.EndsWith('(') && !st.Pre.EndsWith("...", StringComparison.Ordinal))
                    {
                        if (string.IsNullOrEmpty(lastLine) || (!lastLine.EndsWith("...", StringComparison.Ordinal) && !EndsWithAbbreviation(lastLine, _abbreviationList)))
                            l = st.Pre + "I" + st.StrippedText.Remove(0, 1) + st.Post;
                    }
                }
                sb.AppendLine(l);
            }
            return sb.ToString().TrimEnd('\r', '\n');
        }
 public void StripableTextItalicAndMore()
 {
     var st = new StripableText("<i>...<b>Hi!</b></i>");
     Assert.AreEqual(st.Pre, "<i>...<b>");
     Assert.AreEqual(st.Post, "!</b></i>");
     Assert.AreEqual(st.StrippedText, "Hi");
 }
        private void FixStartWithUppercaseLetterAfterColon()
        {
            string fixAction = _language.StartWithUppercaseLetterAfterColon;
            int noOfFixes = 0;
            listViewFixes.BeginUpdate();
            for (int i = 0; i < Subtitle.Paragraphs.Count; i++)
            {
                Paragraph p = Subtitle.Paragraphs[i];
                Paragraph last = Subtitle.GetParagraphOrDefault(i - 1);
                string oldText = p.Text;
                int skipCount = 0;

                if (last != null)
                {
                    string lastText = HtmlUtil.RemoveHtmlTags(last.Text);
                    if (lastText.EndsWith(':') || lastText.EndsWith(';'))
                    {
                        var st = new StripableText(p.Text);
                        if (st.StrippedText.Length > 0 && st.StrippedText[0] != char.ToUpper(st.StrippedText[0]))
                            p.Text = st.Pre + char.ToUpper(st.StrippedText[0]) + st.StrippedText.Substring(1) + st.Post;
                    }
                }

                if (oldText.Contains(new[] { ':', ';' }))
                {
                    bool lastWasColon = false;
                    for (int j = 0; j < p.Text.Length; j++)
                    {
                        var s = p.Text[j];
                        if (s == ':' || s == ';')
                        {
                            lastWasColon = true;
                        }
                        else if (lastWasColon)
                        {
                            var startFromJ = p.Text.Substring(j);
                            if (skipCount > 0)
                                skipCount--;
                            else if (startFromJ.StartsWith("<i>", StringComparison.OrdinalIgnoreCase))
                                skipCount = 2;
                            else if (startFromJ.StartsWith("<b>", StringComparison.OrdinalIgnoreCase))
                                skipCount = 2;
                            else if (startFromJ.StartsWith("<u>", StringComparison.OrdinalIgnoreCase))
                                skipCount = 2;
                            else if (startFromJ.StartsWith("<font ", StringComparison.OrdinalIgnoreCase) && p.Text.Substring(j).Contains('>'))
                                skipCount = startFromJ.IndexOf('>') - startFromJ.IndexOf("<font ", StringComparison.OrdinalIgnoreCase);
                            else if (IsTurkishLittleI(s, _encoding, Language))
                            {
                                p.Text = p.Text.Remove(j, 1).Insert(j, GetTurkishUppercaseLetter(s, _encoding).ToString(CultureInfo.InvariantCulture));
                                lastWasColon = false;
                            }
                            else if (char.IsLower(s))
                            {
                                // iPhone
                                bool change = true;
                                if (s == 'i' && p.Text.Length > j + 1)
                                {
                                    if (p.Text[j + 1] == char.ToUpper(p.Text[j + 1]))
                                        change = false;
                                }
                                if (change)
                                    p.Text = p.Text.Remove(j, 1).Insert(j, char.ToUpper(s).ToString(CultureInfo.InvariantCulture));
                                lastWasColon = false;
                            }
                            else if (!(" " + Environment.NewLine).Contains(s))
                                lastWasColon = false;
                        }
                    }
                }

                if (oldText != p.Text)
                {
                    noOfFixes++;
                    AddFixToListView(p, fixAction, oldText, p.Text);
                }
            }
            listViewFixes.EndUpdate();
            if (noOfFixes > 0)
            {
                _totalFixes += noOfFixes;
                LogStatus(_language.StartWithUppercaseLetterAfterColon, noOfFixes.ToString(CultureInfo.InvariantCulture));
            }
        }
Beispiel #16
0
        public string RemoveInterjections(string text)
        {
            string oldText = text;

            var arr = Configuration.Settings.Tools.Interjections.Split(new[] { ';' }, StringSplitOptions.RemoveEmptyEntries);
            if (_interjectionList == null)
            {
                _interjectionList = new List<string>();
                foreach (string s in arr)
                {
                    if (s.Length > 0)
                    {
                        if (!_interjectionList.Contains(s))
                            _interjectionList.Add(s);

                        string lower = s.ToLower();
                        if (!_interjectionList.Contains(lower))
                            _interjectionList.Add(lower);

                        string upper = s.ToUpper();
                        if (!_interjectionList.Contains(upper))
                            _interjectionList.Add(upper);

                        string pascalCasing = char.ToUpper(s[0]) + s.Substring(1);
                        if (!_interjectionList.Contains(pascalCasing))
                            _interjectionList.Add(pascalCasing);
                    }
                }
                _interjectionList.Sort(CompareLength);
            }

            bool doRepeat = true;
            while (doRepeat)
            {
                doRepeat = false;
                foreach (string s in _interjectionList)
                {
                    if (text.Contains(s))
                    {
                        var regex = new Regex("\\b" + s + "\\b");
                        var match = regex.Match(text);
                        if (match.Success)
                        {
                            int index = match.Index;
                            string temp = text.Remove(index, s.Length);
                            if (temp.Remove(0, index) == " —" && temp.EndsWith("—  —"))
                            {
                                temp = temp.TrimEnd('—').TrimEnd();
                                if (temp.TrimEnd().EndsWith(Environment.NewLine + "—"))
                                    temp = temp.TrimEnd().TrimEnd('—').TrimEnd();
                            }
                            else if (temp.Remove(0, index) == " —" && temp.EndsWith("-  —"))
                            {
                                temp = temp.TrimEnd('—').TrimEnd();
                                if (temp.TrimEnd().EndsWith(Environment.NewLine + "-"))
                                    temp = temp.TrimEnd().TrimEnd('-').TrimEnd();
                            }
                            else if (index == 2 && temp.StartsWith("-  —", StringComparison.Ordinal))
                            {
                                temp = temp.Remove(2, 2);
                            }
                            else if (index == 2 && temp.StartsWith("- —", StringComparison.Ordinal))
                            {
                                temp = temp.Remove(2, 1);
                            }
                            else if (index == 0 && temp.StartsWith(" —", StringComparison.Ordinal))
                            {
                                temp = temp.Remove(0, 2);
                            }
                            else if (index == 0 && temp.StartsWith('—'))
                            {
                                temp = temp.Remove(0, 1);
                            }
                            string pre = string.Empty;
                            if (index > 0)
                                doRepeat = true;

                            bool removeAfter = true;

                            if (temp.Length > index - s.Length + 3 && index > s.Length)
                            {
                                if (temp.Substring(index - s.Length + 1, 3) == ", !")
                                {
                                    temp = temp.Remove(index - s.Length + 1, 2);
                                    removeAfter = false;
                                }
                                else if (temp.Substring(index - s.Length + 1, 3) == ", ?")
                                {
                                    temp = temp.Remove(index - s.Length + 1, 2);
                                    removeAfter = false;
                                }
                                else if (temp.Substring(index - s.Length + 1, 3) == ", .")
                                {
                                    temp = temp.Remove(index - s.Length + 1, 2);
                                    removeAfter = false;
                                }
                            }
                            if (removeAfter && temp.Length > index - s.Length + 2 && index > s.Length)
                            {
                                if (temp.Substring(index - s.Length, 3) == ", !")
                                {
                                    temp = temp.Remove(index - s.Length, 2);
                                    removeAfter = false;
                                }
                                else if (temp.Substring(index - s.Length, 3) == ", ?")
                                {
                                    temp = temp.Remove(index - s.Length, 2);
                                    removeAfter = false;
                                }
                                else if (temp.Substring(index - s.Length, 3) == ", .")
                                {
                                    temp = temp.Remove(index - s.Length, 2);
                                    removeAfter = false;
                                }
                                else if (index > 0 && temp.Substring(index - s.Length).StartsWith(", -—"))
                                {
                                    temp = temp.Remove(index - s.Length, 3);
                                    removeAfter = false;
                                }
                                else if (index > 0 && temp.Substring(index - s.Length).StartsWith(", --"))
                                {
                                    temp = temp.Remove(index - s.Length, 2);
                                    removeAfter = false;
                                }
                            }
                            if (removeAfter && temp.Length > index - s.Length + 2 && index > s.Length)
                            {
                                if (temp.Substring(index - s.Length + 1, 2) == "-!")
                                {
                                    temp = temp.Remove(index - s.Length + 1, 1);
                                    removeAfter = false;
                                }
                                else if (temp.Substring(index - s.Length + 1, 2) == "-?")
                                {
                                    temp = temp.Remove(index - s.Length + 1, 1);
                                    removeAfter = false;
                                }
                                else if (temp.Substring(index - s.Length + 1, 2) == "-.")
                                {
                                    temp = temp.Remove(index - s.Length + 1, 1);
                                    removeAfter = false;
                                }
                            }

                            if (index > 3 && index - 2 < temp.Length && temp.Substring(index - 2).StartsWith(",  —", StringComparison.Ordinal))
                            {
                                temp = temp.Remove(index - 2, 1);
                                index--;
                            }
                            else if (index > 3 && index - 2 < temp.Length && temp.Substring(index - 2).StartsWith(", —", StringComparison.Ordinal))
                            {
                                temp = temp.Remove(index - 2, 1);
                                index--;
                            }

                            if (removeAfter)
                            {
                                if (index == 0)
                                {
                                    if (!string.IsNullOrEmpty(temp) && temp.StartsWith('-'))
                                        temp = temp.Remove(0, 1).Trim();
                                }
                                else if (index == 3 && !string.IsNullOrEmpty(temp) && temp.StartsWith("<i>-", StringComparison.Ordinal))
                                {
                                    temp = temp.Remove(3, 1);
                                }
                                else if (index > 0 && temp.Length > index)
                                {
                                    pre = text.Substring(0, index);
                                    temp = temp.Remove(0, index);
                                    if (pre.EndsWith('-') && temp.StartsWith('-'))
                                        temp = temp.Remove(0, 1);
                                    if (pre.EndsWith("- ") && temp.StartsWith('-'))
                                        temp = temp.Remove(0, 1);
                                }

                                while (temp.Length > 0 && (temp.StartsWith(' ') || temp.StartsWith(',') || temp.StartsWith('.') || temp.StartsWith('!') || temp.StartsWith('?')))
                                {
                                    temp = temp.Remove(0, 1);
                                    doRepeat = true;
                                }
                                if (temp.Length > 0 && s[0].ToString(CultureInfo.InvariantCulture) != s[0].ToString(CultureInfo.InvariantCulture).ToLower())
                                {
                                    temp = char.ToUpper(temp[0]) + temp.Substring(1);
                                    doRepeat = true;
                                }

                                if (pre.EndsWith(' ') && temp.StartsWith('-'))
                                    temp = temp.Remove(0, 1);

                                if (pre.EndsWith(',') && temp.StartsWith('—'))
                                    pre = pre.TrimEnd(',') + " ";
                                temp = pre + temp;
                            }

                            if (temp.EndsWith(Environment.NewLine + "- "))
                                temp = temp.Remove(temp.Length - 4, 4);

                            var st = new StripableText(temp);
                            if (st.StrippedText.Length == 0)
                                return string.Empty;

                            if (!temp.Contains(Environment.NewLine) && text.Contains(Environment.NewLine) && temp.StartsWith('-'))
                                temp = temp.Remove(0, 1).Trim();

                            text = temp;
                        }
                    }
                }
            }
            var lines = text.SplitToLines();
            if (text != oldText && lines.Length == 2)
            {
                if (lines[0] == "-" && lines[1] == "-")
                    return string.Empty;
                if (lines[0].StartsWith('-') && lines[0].Length > 1 && lines[1].Trim() == "-")
                    return lines[0].Remove(0, 1).Trim();
                if (lines[1].StartsWith('-') && lines[1].Length > 1 && lines[0].Trim() == "-")
                    return lines[1].Remove(0, 1).Trim();
                if (lines[1].StartsWith("<i>-", StringComparison.Ordinal) && lines[1].Length > 4 && lines[0].Trim() == "-")
                    return "<i>" + lines[1].Remove(0, 4).Trim();
                if (lines[0].Length > 1 && (lines[1] == "-") || lines[1] == "." || lines[1] == "!" || lines[1] == "?")
                {
                    if (oldText.Contains(Environment.NewLine + "-") && lines[0].StartsWith('-'))
                        lines[0] = lines[0].Remove(0, 1);
                    return lines[0].Trim();
                }
                if (HtmlUtil.RemoveHtmlTags(lines[0], false).Trim() == "-")
                {
                    if (HtmlUtil.RemoveHtmlTags(lines[1], false).Trim() == "-")
                        return string.Empty;
                    if (lines[1].StartsWith('-') && lines[1].Length > 1)
                        return lines[1].Remove(0, 1).Trim();
                    if (lines[1].StartsWith("<i>-", StringComparison.Ordinal) && lines[1].Length > 4)
                        return "<i>" + lines[1].Remove(0, 4).Trim();
                    return lines[1];
                }
                if (HtmlUtil.RemoveHtmlTags(lines[1], false).Trim() == "-")
                {
                    if (HtmlUtil.RemoveHtmlTags(lines[0], false).Trim() == "-")
                        return string.Empty;
                    if (lines[0].StartsWith('-') && lines[0].Length > 1)
                        return lines[0].Remove(0, 1).Trim();
                    if (lines[0].StartsWith("<i>-", StringComparison.Ordinal) && lines[0].Length > 4)
                        return "<i>" + lines[0].Remove(0, 4).Trim();
                    return lines[0];
                }
            }
            if (lines.Length == 2 && string.IsNullOrWhiteSpace(lines[1].Replace(".", string.Empty).Replace("?", string.Empty).Replace("!", string.Empty).Replace("-", string.Empty).Replace("—", string.Empty)))
            {
                text = lines[0];
                lines = text.SplitToLines();
            }
            else if (lines.Length == 2 && string.IsNullOrWhiteSpace(lines[0].Replace(".", string.Empty).Replace("?", string.Empty).Replace("!", string.Empty).Replace("-", string.Empty).Replace("—", string.Empty)))
            {
                text = lines[1];
                lines = text.SplitToLines();
            }
            if (text != oldText && lines.Length == 1 && Utilities.GetNumberOfLines(oldText) == 2)
            {
                if ((oldText.StartsWith('-') || oldText.StartsWith("<i>-", StringComparison.Ordinal)) &&
                    (oldText.Contains("." + Environment.NewLine) || oldText.Contains(".</i>" + Environment.NewLine) ||
                     oldText.Contains("!" + Environment.NewLine) || oldText.Contains("!</i>" + Environment.NewLine) ||
                     oldText.Contains("?" + Environment.NewLine) || oldText.Contains("?</i>" + Environment.NewLine)))
                {
                    if (text.StartsWith("<i>-"))
                        text = "<i>" + text.Remove(0, 4).TrimStart();
                    else
                        text = text.TrimStart('-').TrimStart();
                }
                else if ((oldText.Contains(Environment.NewLine + "-") || oldText.Contains(Environment.NewLine + "<i>-")) &&
                    (oldText.Contains("." + Environment.NewLine) || oldText.Contains(".</i>" + Environment.NewLine) ||
                     oldText.Contains("!" + Environment.NewLine) || oldText.Contains("!</i>" + Environment.NewLine) ||
                     oldText.Contains("?" + Environment.NewLine) || oldText.Contains("?</i>" + Environment.NewLine)))
                {
                    if (text.StartsWith("<i>-"))
                        text = "<i>" + text.Remove(0, 4).TrimStart();
                    else
                        text = text.TrimStart('-').TrimStart();
                }
            }
            return text;
        }
        private void FixStartWithUppercaseLetterAfterPeriodInsideParagraph()
        {
            string fixAction = _language.StartWithUppercaseLetterAfterPeriodInsideParagraph;
            int noOfFixes = 0;
            for (int i = 0; i < Subtitle.Paragraphs.Count; i++)
            {
                Paragraph p = Subtitle.Paragraphs[i];
                string oldText = p.Text;
                StripableText st = new StripableText(p.Text);
                if (p.Text.Length > 3)
                {
                    string text = st.StrippedText.Replace("  ", " ");
                    int start = text.IndexOfAny(new[] { '.', '!', '?' });
                    while (start != -1 && start < text.Length)
                    {
                        if (start > 0 && char.IsDigit(text[start - 1]))
                        {
                            // ignore periods after a number
                        }
                        else if (start + 4 < text.Length && text[start + 1] == ' ')
                        {
                            if (!IsAbbreviation(text, start))
                            {
                                var subText = new StripableText(text.Substring(start + 2));
                                if (subText.StrippedText.Length > 0 && IsTurkishLittleI(subText.StrippedText[0], _encoding, Language))
                                {
                                    if (subText.StrippedText.Length > 1 && !(subText.Pre.Contains('\'') && subText.StrippedText.StartsWith('s')))
                                    {
                                        text = text.Substring(0, start + 2) + subText.Pre + GetTurkishUppercaseLetter(subText.StrippedText[0], _encoding) + subText.StrippedText.Substring(1) + subText.Post;
                                        if (AllowFix(p, fixAction))
                                        {
                                            p.Text = st.Pre + text + st.Post;
                                        }
                                    }
                                }
                                else if (subText.StrippedText.Length > 0 && Configuration.Settings.General.UppercaseLetters.Contains(char.ToUpper(subText.StrippedText[0])))
                                {
                                    if (subText.StrippedText.Length > 1 && !(subText.Pre.Contains('\'') && subText.StrippedText.StartsWith('s')))
                                    {
                                        text = text.Substring(0, start + 2) + subText.Pre + char.ToUpper(subText.StrippedText[0]) + subText.StrippedText.Substring(1) + subText.Post;
                                        if (AllowFix(p, fixAction))
                                        {
                                            p.Text = st.Pre + text + st.Post;
                                        }
                                    }
                                }
                            }
                        }
                        start += 4;
                        if (start < text.Length)
                            start = text.IndexOfAny(new[] { '.', '!', '?' }, start);
                    }
                }

                if (oldText != p.Text)
                {
                    noOfFixes++;
                    AddFixToListView(p, fixAction, oldText, p.Text);
                }
            }
            if (noOfFixes > 0)
            {
                _totalFixes += noOfFixes;
                LogStatus(_language.StartWithUppercaseLetterAfterPeriodInsideParagraph, noOfFixes.ToString(CultureInfo.InvariantCulture));
            }
        }
Beispiel #18
0
        public string RemoveColon(string text)
        {
            if (!Settings.RemoveTextBeforeColon || text.IndexOf(':') < 0)
                return text;

            string preAssTag = string.Empty;
            if (text.StartsWith("{\\", StringComparison.Ordinal) && text.IndexOf('}') > 0)
            {
                int indexOfEndBracket = text.IndexOf('}') + 1;
                preAssTag = text.Substring(0, indexOfEndBracket);
                text = text.Remove(0, indexOfEndBracket).TrimStart();
            }

            // House 7x01 line 52: and she would like you to do three things:
            // Okay or remove???
            var noTagText = HtmlUtil.RemoveHtmlTags(text);
            if (noTagText.IndexOf(':') > 0 && noTagText.IndexOf(':') == noTagText.Length - 1 && noTagText != noTagText.ToUpper() && noTagText.Length > 10)
                return text;

            string newText = string.Empty;
            var lines = text.Trim().SplitToLines();
            int noOfNames = 0;
            int count = 0;
            bool removedInFirstLine = false;
            bool removedInSecondLine = false;
            foreach (string line in lines)
            {
                int indexOfColon = line.IndexOf(':');
                if (indexOfColon > 0 && IsNotInsideBrackets(text, indexOfColon))
                {
                    var pre = line.Substring(0, indexOfColon);
                    var noTagPre = HtmlUtil.RemoveHtmlTags(pre, true);
                    if (Settings.RemoveTextBeforeColonOnlyUppercase && noTagPre != noTagPre.ToUpper())
                    {
                        newText = (newText + Environment.NewLine + line).Trim();
                    }
                    else
                    {
                        var st = new StripableText(pre);
                        if (count == 1 && newText.Length > 1 && removedInFirstLine && Utilities.CountTagInText(line, ':') == 1 &&
                            ".?!".IndexOf(newText[newText.Length - 1]) < 0 && newText.LineEndsWithHtmlTag(true) &&
                            line != line.ToUpper())
                        {
                            if (pre.Contains("<i>") && line.Contains("</i>"))
                                newText = newText + Environment.NewLine + "<i>" + line;
                            else if (pre.Contains("<b>") && line.Contains("</b>"))
                                newText = newText + Environment.NewLine + "<b>" + line;
                            else if (pre.Contains("<u>") && line.Contains("</u>"))
                                newText = newText + Environment.NewLine + "<u>" + line;
                            else if (pre.Contains('[') && line.Contains(']'))
                                newText = newText + Environment.NewLine + "[" + line;
                            else if (pre.Contains('(') && line.EndsWith(')'))
                                newText = newText + Environment.NewLine + "(" + line;
                            else
                                newText = newText + Environment.NewLine + line;
                        }
                        else if (count == 1 && newText.Length > 1 && indexOfColon > 15 && line.Substring(0, indexOfColon).Contains(' ') && Utilities.CountTagInText(line, ':') == 1 &&
                            ".?!".IndexOf(newText[newText.Length - 1]) < 0 && newText.LineEndsWithHtmlTag(true) &&
                            line != line.ToUpper())
                        {
                            if (pre.Contains("<i>") && line.Contains("</i>"))
                                newText = newText + Environment.NewLine + "<i>" + line;
                            else if (pre.Contains("<b>") && line.Contains("</b>"))
                                newText = newText + Environment.NewLine + "<b>" + line;
                            else if (pre.Contains("<u>") && line.Contains("</u>"))
                                newText = newText + Environment.NewLine + "<u>" + line;
                            else if (pre.Contains('[') && line.Contains(']'))
                                newText = newText + Environment.NewLine + "[" + line;
                            else if (pre.Contains('(') && line.EndsWith(')'))
                                newText = newText + Environment.NewLine + "(" + line;
                            else
                                newText = newText + Environment.NewLine + line;
                        }
                        else if (Utilities.CountTagInText(line, ':') == 1)
                        {
                            bool remove = true;
                            if (indexOfColon > 0 && indexOfColon < line.Length - 1)
                            {
                                remove = !Utilities.IsBetweenNumbers(line, indexOfColon);
                            }

                            if (!DoRemove(pre))
                                remove = false;

                            if (remove && Settings.ColonSeparateLine)
                            {
                                if (indexOfColon == line.Length - 1 || line.Substring(indexOfColon + 1).StartsWith(Environment.NewLine, StringComparison.Ordinal))
                                    remove = true;
                                else
                                    remove = false;
                            }

                            if (remove)
                            {
                                var content = line.Substring(indexOfColon + 1).Trim();
                                if (content.Length > 0)
                                {
                                    if (pre.Contains("<i>") && content.Contains("</i>"))
                                        newText = newText + Environment.NewLine + "<i>" + content;
                                    else if (pre.Contains("<b>") && content.Contains("</b>"))
                                        newText = newText + Environment.NewLine + "<b>" + content;
                                    else if (pre.Contains('[') && content.Contains(']'))
                                        newText = newText + Environment.NewLine + "[" + content;
                                    else if (pre.Contains('(') && content.EndsWith(')'))
                                        newText = newText + Environment.NewLine + "(" + content;
                                    else
                                        newText = newText + Environment.NewLine + content;

                                    if (count == 0)
                                        removedInFirstLine = true;
                                    else if (count == 1)
                                        removedInSecondLine = true;
                                }
                                newText = newText.Trim();

                                if (text.StartsWith('(') && newText.EndsWith(')') && !newText.Contains('('))
                                    newText = newText.TrimEnd(')');
                                else if (text.StartsWith('[') && newText.EndsWith(']') && !newText.Contains('['))
                                    newText = newText.TrimEnd(']');
                                else if (newText.EndsWith("</i>", StringComparison.Ordinal) && text.StartsWith("<i>", StringComparison.Ordinal) && !newText.StartsWith("<i>", StringComparison.Ordinal))
                                    newText = "<i>" + newText;
                                else if (newText.EndsWith("</b>", StringComparison.Ordinal) && text.StartsWith("<b>", StringComparison.Ordinal) && !newText.StartsWith("<b>", StringComparison.Ordinal))
                                    newText = "<b>" + newText;
                                else if (newText.EndsWith("</u>", StringComparison.Ordinal) && text.StartsWith("<u>", StringComparison.Ordinal) && !newText.StartsWith("<u>", StringComparison.Ordinal))
                                    newText = "<u>" + newText;

                                if (!IsHIDescription(st.StrippedText))
                                    noOfNames++;
                            }
                            else
                            {
                                newText = (newText + Environment.NewLine + line).Trim();
                                if (newText.EndsWith("</i>", StringComparison.Ordinal) && text.StartsWith("<i>", StringComparison.Ordinal) && !newText.StartsWith("<i>", StringComparison.Ordinal))
                                    newText = "<i>" + newText;
                                else if (newText.EndsWith("</b>", StringComparison.Ordinal) && text.StartsWith("<b>", StringComparison.Ordinal) && !newText.StartsWith("<b>", StringComparison.Ordinal))
                                    newText = "<b>" + newText;
                                else if ((newText.EndsWith("</u>", StringComparison.Ordinal) && text.StartsWith("<u>", StringComparison.Ordinal) && !newText.StartsWith("<u>", StringComparison.Ordinal)))
                                    newText = "<u>" + newText;
                            }
                        }
                        else
                        {
                            string s2 = line;
                            for (int k = 0; k < 2; k++)
                            {
                                if (s2.Contains(':'))
                                {
                                    int colonIndex = s2.IndexOf(':');
                                    string start = s2.Substring(0, colonIndex);

                                    if (!Settings.RemoveTextBeforeColonOnlyUppercase || start == start.ToUpper())
                                    {
                                        int endIndex = start.LastIndexOfAny(new[] { '.', '!', '?' });
                                        if (colonIndex > 0 && colonIndex < s2.Length - 1)
                                        {
                                            if (char.IsDigit(s2[colonIndex - 1]) && char.IsDigit(s2[colonIndex + 1]))
                                                endIndex = 0;
                                        }
                                        if (endIndex < 0)
                                            s2 = s2.Remove(0, colonIndex - endIndex);
                                        else if (endIndex > 0)
                                            s2 = s2.Remove(endIndex + 1, colonIndex - endIndex);
                                    }

                                    if (count == 0)
                                        removedInFirstLine = true;
                                    else if (count == 1)
                                        removedInSecondLine = true;
                                }
                            }
                            newText = newText + Environment.NewLine + s2;
                            newText = newText.Trim();
                        }
                    }
                }
                else
                {
                    newText = (newText + Environment.NewLine + line).Trim();

                    if (newText.EndsWith("</i>", StringComparison.Ordinal) && text.StartsWith("<i>", StringComparison.Ordinal) && !newText.StartsWith("<i>", StringComparison.Ordinal))
                        newText = "<i>" + newText;
                    else if (newText.EndsWith("</b>", StringComparison.Ordinal) && text.StartsWith("<b>", StringComparison.Ordinal) && !newText.StartsWith("<b>", StringComparison.Ordinal))
                        newText = "<b>" + newText;
                }
                count++;
            }
            newText = newText.Trim();
            if (noOfNames > 0 && Utilities.GetNumberOfLines(newText) == 2)
            {
                int indexOfDialogChar = newText.IndexOf('-');
                bool insertDash = true;
                var arr = newText.SplitToLines();
                if (arr.Length == 2 && arr[0].Length > 1 && arr[1].Length > 1)
                {
                    string arr0 = new StripableText(arr[0]).StrippedText;
                    string arr1 = new StripableText(arr[1]).StrippedText;

                    //line continuation?
                    if (arr0.Length > 0 && arr1.Length > 1 && (Utilities.LowercaseLetters + ",").Contains(arr0.Substring(arr0.Length - 1), StringComparison.Ordinal) &&
                        Utilities.LowercaseLetters.Contains(arr1[0]))
                    {
                        if (new StripableText(arr[1]).Pre.Contains("...") == false)
                            insertDash = false;
                    }

                    string tempArr0QuoteTrimmed = arr[0].TrimEnd('"');
                    if (arr0.Length > 0 && arr1.Length > 1 &&
                        !(tempArr0QuoteTrimmed.EndsWith('.') || tempArr0QuoteTrimmed.EndsWith('!') || tempArr0QuoteTrimmed.EndsWith('?') || tempArr0QuoteTrimmed.EndsWith("</i>", StringComparison.Ordinal)) &&
                        !(new StripableText(arr[1]).Pre.Contains('-')))
                    {
                        insertDash = false;
                    }

                    if (removedInFirstLine && !removedInSecondLine && !text.StartsWith('-') && !text.StartsWith("<i>-", StringComparison.Ordinal))
                    {
                        if (!insertDash || (!arr[1].StartsWith('-') && !arr[1].StartsWith("<i>-", StringComparison.Ordinal)))
                            insertDash = false;
                    }
                }

                if (insertDash)
                {
                    if (indexOfDialogChar < 0 || indexOfDialogChar > 4)
                    {
                        var st = new StripableText(newText, string.Empty, string.Empty);
                        newText = st.Pre + "- " + st.StrippedText + st.Post;
                    }

                    int indexOfNewLine = newText.IndexOf(Environment.NewLine, StringComparison.Ordinal);
                    string second = newText.Substring(indexOfNewLine).Trim();
                    indexOfDialogChar = second.IndexOf('-');
                    if (indexOfDialogChar < 0 || indexOfDialogChar > 6)
                    {
                        var st = new StripableText(second, String.Empty, String.Empty);
                        second = st.Pre + "- " + st.StrippedText + st.Post;
                        newText = newText.Remove(indexOfNewLine) + Environment.NewLine + second;
                    }
                }
            }
            else if (!newText.Contains(Environment.NewLine) && newText.Contains('-'))
            {
                var st = new StripableText(newText);
                if (st.Pre.Contains('-'))
                    newText = st.Pre.Replace("-", string.Empty) + st.StrippedText + st.Post;
            }
            else if (Utilities.GetNumberOfLines(newText) == 2 && removedInFirstLine == false && removedInSecondLine)
            {
                string noTags = HtmlUtil.RemoveHtmlTags(newText, true).Trim();
                bool insertDash = noTags.StartsWith('-') && Utilities.CountTagInText(noTags, '-') == 1;
                if (insertDash)
                {
                    if (newText.Contains(Environment.NewLine + "<i>"))
                        newText = newText.Replace(Environment.NewLine + "<i>", Environment.NewLine + "<i>- ");
                    else
                        newText = newText.Replace(Environment.NewLine, Environment.NewLine + "- ");
                }
            }
            if (text.Contains("<i>", StringComparison.Ordinal) && !newText.Contains("<i>", StringComparison.Ordinal) && newText.EndsWith("</i>", StringComparison.Ordinal))
                newText = "<i>" + newText;

            if (string.IsNullOrWhiteSpace(newText))
                return string.Empty;

            return preAssTag + newText;
        }
        public string FixOcrErrorsViaHardcodedRules(string input, string lastLine, HashSet<string> abbreviationList)
        {
            if (!Configuration.Settings.Tools.OcrFixUseHardcodedRules)
                return input;

            input = input.Replace(",...", "...");

            if (input.StartsWith("..") && !input.StartsWith("...", StringComparison.Ordinal))
                input = "." + input;

            string pre = string.Empty;
            if (input.StartsWith("- ", StringComparison.Ordinal))
            {
                pre = "- ";
                input = input.Remove(0, 2);
            }
            else if (input.StartsWith('-'))
            {
                pre = "-";
                input = input.Remove(0, 1);
            }

            bool hasDotDot = input.Contains("..") || input.Contains(". .");
            if (hasDotDot)
            {
                if (input.Length > 5 && input.StartsWith("..") && Utilities.AllLettersAndNumbers.Contains(input[2]))
                    input = "..." + input.Remove(0, 2);
                if (input.Length > 7 && input.StartsWith("<i>..") && Utilities.AllLettersAndNumbers.Contains(input[5]))
                    input = "<i>..." + input.Remove(0, 5);

                if (input.Length > 5 && input.StartsWith(".. ") && Utilities.AllLettersAndNumbers.Contains(input[3]))
                    input = "..." + input.Remove(0, 3);
                if (input.Length > 7 && input.StartsWith("<i>.. ") && Utilities.AllLettersAndNumbers.Contains(input[6]))
                    input = "<i>..." + input.Remove(0, 6);
                if (input.Contains(Environment.NewLine + ".. "))
                    input = input.Replace(Environment.NewLine + ".. ", Environment.NewLine + "...");
                if (input.Contains(Environment.NewLine + "<i>.. "))
                    input = input.Replace(Environment.NewLine + "<i>.. ", Environment.NewLine + "<i>...");

                if (input.StartsWith(". ..", StringComparison.Ordinal))
                    input = "..." + input.Remove(0, 4);
                if (input.StartsWith(".. .", StringComparison.Ordinal))
                    input = "..." + input.Remove(0, 4);
                if (input.StartsWith(". . ."))
                    input = "..." + input.Remove(0, 5);
                if (input.StartsWith("... ", StringComparison.Ordinal))
                    input = input.Remove(3, 1);
            }

            input = pre + input;

            if (hasDotDot)
            {
                if (input.StartsWith("<i>. ..", StringComparison.Ordinal))
                    input = "<i>..." + input.Remove(0, 7);
                if (input.StartsWith("<i>.. .", StringComparison.Ordinal))
                    input = "<i>..." + input.Remove(0, 7);

                if (input.StartsWith("<i>. . .", StringComparison.Ordinal))
                    input = "<i>..." + input.Remove(0, 8);
                if (input.StartsWith("<i>... ", StringComparison.Ordinal))
                    input = input.Remove(6, 1);
                if (input.StartsWith(". . <i>.", StringComparison.Ordinal))
                    input = "<i>..." + input.Remove(0, 8);

                if (input.StartsWith("...<i>", StringComparison.Ordinal) && (input.IndexOf("</i>", StringComparison.Ordinal) > input.IndexOf(' ')))
                    input = "<i>..." + input.Remove(0, 6);

                if (input.EndsWith(". ..", StringComparison.Ordinal))
                    input = input.Remove(input.Length - 4, 4) + "...";
                if (input.EndsWith(".. .", StringComparison.Ordinal))
                    input = input.Remove(input.Length - 4, 4) + "...";
                if (input.EndsWith(". . .", StringComparison.Ordinal))
                    input = input.Remove(input.Length - 5, 5) + "...";
                if (input.EndsWith(". ..."))
                    input = input.Remove(input.Length - 5, 5) + "...";

                if (input.EndsWith(". ..</i>", StringComparison.Ordinal))
                    input = input.Remove(input.Length - 8, 8) + "...</i>";
                if (input.EndsWith(".. .</i>", StringComparison.Ordinal))
                    input = input.Remove(input.Length - 8, 8) + "...</i>";
                if (input.EndsWith(". . .</i>", StringComparison.Ordinal))
                    input = input.Remove(input.Length - 9, 9) + "...</i>";
                if (input.EndsWith(". ...</i>", StringComparison.Ordinal))
                    input = input.Remove(input.Length - 9, 9) + "...</i>";

                if (input.EndsWith(".</i> . .", StringComparison.Ordinal))
                    input = input.Remove(input.Length - 9, 9) + "...</i>";
                if (input.EndsWith(".</i>..", StringComparison.Ordinal))
                    input = input.Remove(input.Length - 7, 7) + "...</i>";
                input = input.Replace(".</i> . ." + Environment.NewLine, "...</i>" + Environment.NewLine);

                input = input.Replace(".. ?", "..?");
                input = input.Replace("..?", "...?");
                input = input.Replace("....?", "...?");

                input = input.Replace(".. !", "..!");
                input = input.Replace("..!", "...!");
                input = input.Replace("....!", "...!");

                input = input.Replace("... ?", "...?");
                input = input.Replace("... !", "...!");

                input = input.Replace("....", "...");
                input = input.Replace("....", "...");

                if (input.StartsWith("- ...") && lastLine != null && lastLine.EndsWith("...") && !(input.Contains(Environment.NewLine + "-")))
                    input = input.Remove(0, 2);
                if (input.StartsWith("-...") && lastLine != null && lastLine.EndsWith("...") && !(input.Contains(Environment.NewLine + "-")))
                    input = input.Remove(0, 1);
            }

            if (input.Length > 2 && input[0] == '-' && Utilities.UppercaseLetters.Contains(input[1]))
            {
                input = input.Insert(1, " ");
            }

            if (input.Length > 5 && input.StartsWith("<i>-", StringComparison.Ordinal) && Utilities.UppercaseLetters.Contains(input[4]))
            {
                input = input.Insert(4, " ");
            }

            int idx = input.IndexOf(Environment.NewLine + "-", StringComparison.Ordinal);
            if (idx > 0 && idx + Environment.NewLine.Length + 1 < input.Length && Utilities.UppercaseLetters.Contains(input[idx + Environment.NewLine.Length + 1]))
            {
                input = input.Insert(idx + Environment.NewLine.Length + 1, " ");
            }

            idx = input.IndexOf(Environment.NewLine + "<i>-", StringComparison.Ordinal);
            if (idx > 0 && Utilities.UppercaseLetters.Contains(input[idx + Environment.NewLine.Length + 4]))
            {
                input = input.Insert(idx + Environment.NewLine.Length + 4, " ");
            }

            if (string.IsNullOrEmpty(lastLine) ||
                lastLine.EndsWith('.') ||
                lastLine.EndsWith('!') ||
                lastLine.EndsWith('?') ||
                lastLine.EndsWith(']') ||
                lastLine.EndsWith('♪'))
            {
                lastLine = HtmlUtil.RemoveHtmlTags(lastLine);
                var st = new StripableText(input);
                if (lastLine == null || (!lastLine.EndsWith("...") && !EndsWithAbbreviation(lastLine, abbreviationList)))
                {
                    if (st.StrippedText.Length > 0 && !char.IsUpper(st.StrippedText[0]) && !st.Pre.EndsWith('[') && !st.Pre.EndsWith('(') && !st.Pre.EndsWith("..."))
                    {
                        if (!HtmlUtil.StartsWithUrl(st.StrippedText))
                        {
                            var uppercaseLetter = char.ToUpper(st.StrippedText[0]);
                            if (st.StrippedText.Length > 1 && uppercaseLetter == 'L' && @"abcdfghjklmnpqrstvwxz".Contains(st.StrippedText[1]))
                                uppercaseLetter = 'I';
                            if ((st.StrippedText.StartsWith("lo ") || st.StrippedText == "lo.") && _threeLetterIsoLanguageName == "ita")
                                uppercaseLetter = 'I';
                            if ((st.StrippedText.StartsWith("k ", StringComparison.Ordinal) || st.StrippedText.StartsWith("m ", StringComparison.Ordinal) || st.StrippedText.StartsWith("n ") || st.StrippedText.StartsWith("r ") || st.StrippedText.StartsWith("s ") || st.StrippedText.StartsWith("t ")) &&
                                st.Pre.EndsWith('\'') && _threeLetterIsoLanguageName == "nld")
                                uppercaseLetter = st.StrippedText[0];
                            if ((st.StrippedText.StartsWith("l-I'll ", StringComparison.Ordinal) || st.StrippedText.StartsWith("l-l'll ", StringComparison.Ordinal)) && _threeLetterIsoLanguageName == "eng")
                            {
                                uppercaseLetter = 'I';
                                st.StrippedText = "I-I" + st.StrippedText.Remove(0, 3);
                            }
                            st.StrippedText = uppercaseLetter + st.StrippedText.Substring(1);
                            input = st.Pre + st.StrippedText + st.Post;
                        }
                    }
                }
            }

            // lines ending with ". should often end at ... (of no other quotes exists near by)
            if ((lastLine == null || !lastLine.Contains('"')) &&
                input.EndsWith("\".") && input.IndexOf('"') == input.LastIndexOf('"') && input.Length > 3)
            {
                var lastChar = input[input.Length - 3];
                if (!char.IsDigit(lastChar))
                {
                    int position = input.Length - 2;
                    input = input.Remove(position).Insert(position, "...");
                }
            }

            // change '<number><space>1' to '<number>1'
            if (input.Contains('1'))
            {
                Match match = RegExNumber1.Match(input);
                while (match.Success)
                {
                    bool doFix = true;

                    if (match.Index + 4 < input.Length && input[match.Index + 3] == '/' && char.IsDigit(input[match.Index + 4]))
                        doFix = false;

                    if (doFix)
                    {
                        input = input.Substring(0, match.Index + 1) + input.Substring(match.Index + 2);
                        match = RegExNumber1.Match(input);
                    }
                    else
                    {
                        match = RegExNumber1.Match(input, match.Index + 1);
                    }
                }
            }

            // change '' to "
            input = input.Replace("''", "\"");

            // change 'sequeI of' to 'sequel of'
            if (input.Contains('I'))
            {
                var match = RegExUppercaseI.Match(input);
                while (match.Success)
                {
                    bool doFix = true;
                    if (match.Index >= 1 && input.Substring(match.Index - 1).StartsWith("Mc"))
                        doFix = false;
                    if (match.Index >= 2 && input.Substring(match.Index - 2).StartsWith("Mac"))
                        doFix = false;

                    if (doFix)
                        input = input.Substring(0, match.Index + 1) + "l" + input.Substring(match.Index + 2);

                    if (match.Index + 1 < input.Length)
                        match = RegExUppercaseI.Match(input, match.Index + 1);
                    else
                        break; // end while
                }
            }

            // change 'NlCE' to 'NICE'
            if (input.Contains('l'))
            {
                var match = RegExLowercaseL.Match(input);
                while (match.Success)
                {
                    input = input.Substring(0, match.Index + 1) + "I" + input.Substring(match.Index + 2);
                    match = RegExLowercaseL.Match(input);
                }
            }

            return input;
        }
Beispiel #20
0
        public string RemoveTextFromHearImpaired(string text)
        {
            if (Settings.RemoveWhereContains)
            {
                foreach (var removeIfTextContain in Settings.RemoveIfTextContains)
                {
                    if (text.Contains(removeIfTextContain))
                        return string.Empty;
                }
            }

            string oldText = text;
            text = RemoveColon(text);
            string pre = " >-\"'‘`´♪¿¡.…—";
            string post = " -\"'`´♪.!?:…—";
            if (Settings.RemoveTextBetweenCustomTags)
            {
                pre = pre.Replace(Settings.CustomStart, string.Empty);
                post = post.Replace(Settings.CustomEnd, string.Empty);
            }
            var st = new StripableText(text, pre, post);
            var sb = new StringBuilder();
            var parts = st.StrippedText.Trim().SplitToLines();
            int lineNumber = 0;
            bool removedDialogInFirstLine = false;
            int noOfNamesRemoved = 0;
            int noOfNamesRemovedNotInLineOne = 0;
            foreach (string s in parts)
            {
                var stSub = new StripableText(s, pre, post);
                string tempStrippedtext = stSub.StrippedText;
                if (lineNumber == parts.Length - 1 && st.Post.Contains('?'))
                    tempStrippedtext += "?";
                else if (stSub.Post.Contains('?'))
                    tempStrippedtext += "?";
                if (!StartAndEndsWithHearImpariedTags(tempStrippedtext))
                {
                    if (removedDialogInFirstLine && stSub.Pre.Contains("- ", StringComparison.Ordinal))
                        stSub.Pre = stSub.Pre.Replace("- ", string.Empty);

                    string newText = stSub.StrippedText;

                    newText = RemoveHearImpairedTags(newText);

                    if (stSub.StrippedText.Length - newText.Length > 2)
                    {
                        string removedText = GetRemovedString(stSub.StrippedText, newText);
                        if (!IsHIDescription(removedText))
                        {
                            noOfNamesRemoved++;
                            if (lineNumber > 0)
                                noOfNamesRemovedNotInLineOne++;
                        }
                    }
                    sb.AppendLine(stSub.Pre + newText + stSub.Post);
                }
                else
                {
                    if (!IsHIDescription(stSub.StrippedText))
                    {
                        noOfNamesRemoved++;
                        if (lineNumber > 0)
                            noOfNamesRemovedNotInLineOne++;
                    }

                    if (st.Pre.Contains("- ") && lineNumber == 0)
                    {
                        st.Pre = st.Pre.Replace("- ", string.Empty);
                        removedDialogInFirstLine = true;
                    }
                    else if (st.Pre == "-" && lineNumber == 0)
                    {
                        st.Pre = string.Empty;
                        removedDialogInFirstLine = true;
                    }

                    if (st.Pre.Contains("<i>") && stSub.Post.Contains("</i>"))
                        st.Pre = st.Pre.Replace("<i>", string.Empty);

                    if (s.Contains("<i>") && !s.Contains("</i>") && st.Post.Contains("</i>"))
                        st.Post = st.Post.Replace("</i>", string.Empty);
                }
                lineNumber++;
            }

            text = st.Pre + sb.ToString().Trim() + st.Post;
            text = text.Replace("  ", " ").Trim();
            text = text.Replace("<i></i>", string.Empty);
            text = text.Replace("<i> </i>", " ");
            text = text.Replace("<b></b>", string.Empty);
            text = text.Replace("<b> </b>", " ");
            text = text.Replace("<u></u>", string.Empty);
            text = text.Replace("<u> </u>", " ");
            text = RemoveEmptyFontTag(text);
            text = text.Replace("  ", " ").Trim();
            text = RemoveColon(text);
            text = RemoveLineIfAllUppercase(text);
            text = RemoveHearImpairedtagsInsideLine(text);
            if (Settings.RemoveInterjections)
                text = RemoveInterjections(text);

            st = new StripableText(text, " >-\"'‘`´♪¿¡.…—", " -\"'`´♪.!?:…—");
            text = st.StrippedText;
            if (StartAndEndsWithHearImpariedTags(text))
            {
                text = RemoveStartEndTags(text);
            }

            text = RemoveHearImpairedTags(text);

            // fix 3 lines to two liners - if only two lines
            if (noOfNamesRemoved >= 1 && Utilities.GetNumberOfLines(text) == 3)
            {
                char[] chars = { '!', '?', '.' };
                string[] a = HtmlUtil.RemoveHtmlTags(text).Replace(" ", string.Empty).Split(chars, StringSplitOptions.RemoveEmptyEntries);
                if (a.Length == 2)
                {
                    var temp = new StripableText(text);
                    temp.StrippedText = temp.StrippedText.Replace(Environment.NewLine, " ");
                    int splitIndex = temp.StrippedText.LastIndexOfAny(chars);
                    if (splitIndex > 0)
                    {
                        text = temp.Pre + temp.StrippedText.Insert(splitIndex + 1, Environment.NewLine) + temp.Post;
                    }
                }
            }

            if (!text.StartsWith('-') && noOfNamesRemoved >= 1 && Utilities.GetNumberOfLines(text) == 2)
            {
                string[] arr = text.SplitToLines();
                string part0 = arr[0].Trim().Replace("</i>", string.Empty).Trim();
                if (!part0.EndsWith(',') && (!part0.EndsWith('-') || noOfNamesRemovedNotInLineOne > 0))
                {
                    if (part0.Length > 0 && @".!?".Contains(part0[part0.Length - 1]))
                    {
                        if (noOfNamesRemovedNotInLineOne > 0)
                        {
                            if (!st.Pre.Contains('-'))
                                text = "- " + text.Replace(Environment.NewLine, Environment.NewLine + "- ");
                            if (!text.Contains(Environment.NewLine + "-") && !text.Contains(Environment.NewLine + "<i>-"))
                                text = text.Replace(Environment.NewLine, Environment.NewLine + "- ");
                        }
                    }
                }
            }

            if (!string.IsNullOrEmpty(text) || (st.Pre.Contains('♪') || st.Post.Contains('♪')))
                text = st.Pre + text + st.Post;

            if (oldText.TrimStart().StartsWith("- ", StringComparison.Ordinal) && text != null && !text.Contains(Environment.NewLine) &&
                (oldText.Contains(Environment.NewLine + "- ", StringComparison.Ordinal) ||
                 oldText.Contains(Environment.NewLine + " - ", StringComparison.Ordinal) ||
                 oldText.Contains(Environment.NewLine + "<i>- ", StringComparison.Ordinal) ||
                 oldText.Contains(Environment.NewLine + "<i> - ", StringComparison.Ordinal)))
            {
                text = text.TrimStart().TrimStart('-').TrimStart();
            }
            if (oldText.TrimStart().StartsWith("-", StringComparison.Ordinal) &&
                !oldText.TrimStart().StartsWith("--", StringComparison.Ordinal) &&
                text != null && !text.Contains(Environment.NewLine) &&
                (oldText.Contains(Environment.NewLine + "-", StringComparison.Ordinal) && !oldText.Contains(Environment.NewLine + "--", StringComparison.Ordinal) ||
                 oldText.Contains(Environment.NewLine + " - ", StringComparison.Ordinal) ||
                 oldText.Contains(Environment.NewLine + "<i>- ", StringComparison.Ordinal) ||
                 oldText.Contains(Environment.NewLine + "<i> - ", StringComparison.Ordinal)))
            {
                text = text.TrimStart().TrimStart('-').TrimStart();
            }

            if (oldText.TrimStart().StartsWith("<i>- ", StringComparison.Ordinal) &&
                text != null && text.StartsWith("<i>- ", StringComparison.Ordinal) && !text.Contains(Environment.NewLine, StringComparison.Ordinal) &&
                (oldText.Contains(Environment.NewLine + "- ", StringComparison.Ordinal) ||
                 oldText.Contains(Environment.NewLine + " - ", StringComparison.Ordinal) ||
                 oldText.Contains(Environment.NewLine + "<i>- ", StringComparison.Ordinal) ||
                 oldText.Contains(Environment.NewLine + "<i> - ", StringComparison.Ordinal)))
            {
                text = text.Remove(3, 2);
            }

            if (text != null && !text.Contains(Environment.NewLine, StringComparison.Ordinal) &&
                (oldText.Contains(':') && !text.Contains(':') ||
                 oldText.Contains('[') && !text.Contains('[') ||
                 oldText.Contains('(') && !text.Contains('(') ||
                 oldText.Contains('{') && !text.Contains('{')) &&
                (oldText.Contains(Environment.NewLine + "- ", StringComparison.Ordinal) ||
                 oldText.Contains(Environment.NewLine + " - ", StringComparison.Ordinal) ||
                 oldText.Contains(Environment.NewLine + "<i>- ", StringComparison.Ordinal) ||
                 oldText.Contains(Environment.NewLine + "<i> - ", StringComparison.Ordinal)))
            {
                text = text.TrimStart().TrimStart('-').TrimStart();
            }

            if (oldText != text)
            {
                // insert spaces before "-"
                text = text.Replace(Environment.NewLine + "- <i>", Environment.NewLine + "<i>- ");
                text = text.Replace(Environment.NewLine + "-<i>", Environment.NewLine + "<i>- ");
                if (text.StartsWith('-') && text.Length > 2 && text[1] != ' ' && text[1] != '-')
                    text = text.Insert(1, " ");
                if (text.StartsWith("<i>-", StringComparison.Ordinal) && text.Length > 5 && text[4] != ' ' && text[4] != '-')
                    text = text.Insert(4, " ");
                if (text.Contains(Environment.NewLine + "-", StringComparison.Ordinal))
                {
                    int index = text.IndexOf(Environment.NewLine + "-", StringComparison.Ordinal);
                    if (index + 4 < text.Length && text[index + Environment.NewLine.Length + 1] != ' ' && text[index + Environment.NewLine.Length + 1] != '-')
                        text = text.Insert(index + Environment.NewLine.Length + 1, " ");
                }
                if (text.Contains(Environment.NewLine + "<i>-", StringComparison.Ordinal))
                {
                    int index = text.IndexOf(Environment.NewLine + "<i>-", StringComparison.Ordinal);
                    if (index + 5 < text.Length && text[index + Environment.NewLine.Length + 4] != ' ' && text[index + Environment.NewLine.Length + 4] != '-')
                        text = text.Insert(index + Environment.NewLine.Length + 4, " ");
                }
            }
            return text.Trim();
        }
        public static string FixHyphensRemove(Subtitle subtitle, int i)
        {
            Paragraph p = subtitle.Paragraphs[i];
            string text = p.Text;

            if (text.TrimStart().StartsWith('-') || text.TrimStart().StartsWith("<i>-", StringComparison.OrdinalIgnoreCase) || text.TrimStart().StartsWith("<i> -", StringComparison.OrdinalIgnoreCase) || text.Contains(Environment.NewLine + '-') || text.Contains(Environment.NewLine + " -") || text.Contains(Environment.NewLine + "<i>-") || text.Contains(Environment.NewLine + "<i> -") || text.Contains(Environment.NewLine + "<I>-") || text.Contains(Environment.NewLine + "<I> -"))
            {
                Paragraph prev = subtitle.GetParagraphOrDefault(i - 1);

                if (prev == null || !HtmlUtil.RemoveHtmlTags(prev.Text).TrimEnd().EndsWith('-') || HtmlUtil.RemoveHtmlTags(prev.Text).TrimEnd().EndsWith("--", StringComparison.Ordinal))
                {
                    string[] noTaglines = HtmlUtil.RemoveHtmlTags(p.Text).SplitToLines();
                    int startHyphenCount = noTaglines.Count(line => line.TrimStart().StartsWith('-'));
                    if (startHyphenCount == 1)
                    {
                        bool remove = true;
                        string[] noTagparts = HtmlUtil.RemoveHtmlTags(text).SplitToLines();
                        if (noTagparts.Length == 2)
                        {
                            if (noTagparts[0].TrimStart().StartsWith('-') && noTagparts[1].Contains(": "))
                            {
                                remove = false;
                            }

                            if (noTagparts[1].TrimStart().StartsWith('-') && noTagparts[0].Contains(": "))
                            {
                                remove = false;
                            }
                        }

                        if (remove)
                        {
                            int idx = text.IndexOf('-');
                            StripableText st = new StripableText(text);
                            if (idx < 5 && st.Pre.Length >= idx)
                            {
                                text = text.Remove(idx, 1).TrimStart();
                                idx = text.IndexOf('-');
                                st = new StripableText(text);
                                if (idx < 5 && idx >= 0 && st.Pre.Length >= idx)
                                {
                                    text = text.Remove(idx, 1).TrimStart();
                                    st = new StripableText(text);
                                }

                                idx = text.IndexOf('-');
                                if (idx < 5 && idx >= 0 && st.Pre.Length >= idx)
                                {
                                    text = text.Remove(idx, 1).TrimStart();
                                }

                                text = RemoveSpacesBeginLine(text);
                            }
                            else
                            {
                                int indexOfNewLine = text.IndexOf(Environment.NewLine, StringComparison.Ordinal);
                                if (indexOfNewLine > 0)
                                {
                                    idx = text.IndexOf('-', indexOfNewLine);
                                    if (idx >= 0 && indexOfNewLine + 5 > indexOfNewLine)
                                    {
                                        text = text.Remove(idx, 1).TrimStart().Replace(Environment.NewLine + " ", Environment.NewLine);

                                        idx = text.IndexOf('-', indexOfNewLine);
                                        if (idx >= 0 && indexOfNewLine + 5 > indexOfNewLine)
                                        {
                                            text = text.Remove(idx, 1).TrimStart();

                                            text = RemoveSpacesBeginLine(text);
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            else if (text.StartsWith("<font ", StringComparison.Ordinal))
            {
                Paragraph prev = subtitle.GetParagraphOrDefault(i - 1);
                if (prev == null || !HtmlUtil.RemoveHtmlTags(prev.Text).TrimEnd().EndsWith('-') || HtmlUtil.RemoveHtmlTags(prev.Text).TrimEnd().EndsWith("--", StringComparison.Ordinal))
                {
                    StripableText st = new StripableText(text);
                    if (st.Pre.EndsWith('-') || st.Pre.EndsWith("- ", StringComparison.Ordinal))
                    {
                        text = st.Pre.TrimEnd('-', ' ') + st.StrippedText + st.Post;
                    }
                }
            }

            return text;
        }
Beispiel #22
0
        private void MergeLinesWithContinuation()
        {
            var temp = new Subtitle();
            bool skipNext = false;
            for (int i = 0; i < _subtitle.Paragraphs.Count; i++)
            {
                Paragraph p = _subtitle.Paragraphs[i];
                if (!skipNext)
                {
                    Paragraph next = _subtitle.GetParagraphOrDefault(i + 1);

                    bool merge = !(p.Text.Contains(Environment.NewLine) || next == null);

                    if (merge && (p.Text.TrimEnd().EndsWith('!') || p.Text.TrimEnd().EndsWith('.')))
                    {
                        var st = new StripableText(p.Text);
                        if (st.StrippedText.Length > 0 && Utilities.UppercaseLetters.Contains(st.StrippedText[0].ToString(CultureInfo.InvariantCulture)))
                            merge = false;
                    }

                    if (merge && (p.Text.Length >= Configuration.Settings.General.SubtitleLineMaximumLength - 5 || next.Text.Length >= Configuration.Settings.General.SubtitleLineMaximumLength - 5))
                        merge = false;

                    if (merge)
                    {
                        temp.Paragraphs.Add(new Paragraph { Text = p.Text + Environment.NewLine + next.Text });
                        skipNext = true;
                    }
                    else
                    {
                        temp.Paragraphs.Add(new Paragraph(p));
                    }
                }
                else
                {
                    skipNext = false;
                }
            }
            _subtitle = temp;
        }