Exemple #1
0
        private static string ToUpperFirstLetter(string text, IFixCallbacks callbacks)
        {
            if (string.IsNullOrEmpty(text) || !char.IsLetter(text[0]) || char.IsUpper(text[0]))
            {
                return(text);
            }
            // Skip words like iPhone, iPad...
            if (text[0] == 'i' && text.Length > 1 && char.IsUpper(text[1]))
            {
                return(text);
            }
            if (Helper.IsTurkishLittleI(text[0], callbacks.Encoding, callbacks.Language))
            {
                text = Helper.GetTurkishUppercaseLetter(text[0], callbacks.Encoding) + text.Substring(1);
            }
            else
            {
                text = char.ToUpper(text[0]) + text.Substring(1); // text.CapitalizeFirstLetter();
            }

            return(text);
        }
        private static string ToUpperFirstLetter(string textBefore, string text, IFixCallbacks callbacks)
        {
            if (string.IsNullOrEmpty(text) || !char.IsLetter(text[0]) || char.IsUpper(text[0]))
            {
                return(text);
            }

            if (textBefore != null && textBefore.EndsWith("...", System.StringComparison.Ordinal))
            {
                if (callbacks.Language == "en" && text.StartsWith("i "))
                {
                }
                else
                {
                    return(text); // too hard to say if uppercase after "..."
                }
            }

            if (textBefore != null && textBefore.EndsWith(" - ", System.StringComparison.Ordinal) && !textBefore.EndsWith(". - ", System.StringComparison.Ordinal))
            {
                return(text);
            }

            // Skip words like iPhone, iPad...
            if (text[0] == 'i' && text.Length > 1 && char.IsUpper(text[1]))
            {
                return(text);
            }

            if (Helper.IsTurkishLittleI(text[0], callbacks.Encoding, callbacks.Language))
            {
                return(Helper.GetTurkishUppercaseLetter(text[0], callbacks.Encoding) + text.Substring(1));
            }

            text = char.ToUpper(text[0]) + text.Substring(1);
            return(text);
        }
Exemple #3
0
        private static string DoFix(Paragraph p, Paragraph prev, Encoding encoding, string language)
        {
            if (p.Text != null && p.Text.Length > 1)
            {
                string text = p.Text;
                string pre  = string.Empty;
                if (text.Length > 4 && text.StartsWith("<i> ", StringComparison.Ordinal))
                {
                    pre  = "<i> ";
                    text = text.Substring(4);
                }
                if (text.Length > 3 && text.StartsWith("<i>", StringComparison.Ordinal))
                {
                    pre  = "<i>";
                    text = text.Substring(3);
                }
                if (text.Length > 4 && text.StartsWith("<I> ", StringComparison.Ordinal))
                {
                    pre  = "<I> ";
                    text = text.Substring(4);
                }
                if (text.Length > 3 && text.StartsWith("<I>", StringComparison.Ordinal))
                {
                    pre  = "<I>";
                    text = text.Substring(3);
                }
                if (text.Length > 2 && text.StartsWith('♪'))
                {
                    pre  = pre + "♪";
                    text = text.Substring(1);
                }
                if (text.Length > 2 && text.StartsWith(' '))
                {
                    pre  = pre + " ";
                    text = text.Substring(1);
                }
                if (text.Length > 2 && text.StartsWith('♫'))
                {
                    pre  = pre + "♫";
                    text = text.Substring(1);
                }
                if (text.Length > 2 && text.StartsWith(' '))
                {
                    pre  = pre + " ";
                    text = text.Substring(1);
                }

                var firstLetter = text[0];

                string prevText = " .";
                if (prev != null)
                {
                    prevText = HtmlUtil.RemoveHtmlTags(prev.Text);
                }

                bool isPrevEndOfLine = Helper.IsPreviousTextEndOfParagraph(prevText);
                if (prevText == " .")
                {
                    isPrevEndOfLine = true;
                }
                if ((!text.StartsWith("www.", StringComparison.Ordinal) && !text.StartsWith("http:", StringComparison.Ordinal) && !text.StartsWith("https:", StringComparison.Ordinal)) &&
                    (char.IsLower(firstLetter) || Helper.IsTurkishLittleI(firstLetter, encoding, language)) &&
                    !char.IsDigit(firstLetter) &&
                    isPrevEndOfLine)
                {
                    bool isMatchInKnowAbbreviations = language == "en" &&
                                                      (prevText.EndsWith(" o.r.", StringComparison.Ordinal) ||
                                                       prevText.EndsWith(" a.m.", StringComparison.Ordinal) ||
                                                       prevText.EndsWith(" p.m.", StringComparison.Ordinal));

                    if (!isMatchInKnowAbbreviations)
                    {
                        if (Helper.IsTurkishLittleI(firstLetter, encoding, language))
                        {
                            p.Text = pre + Helper.GetTurkishUppercaseLetter(firstLetter, encoding) + text.Substring(1);
                        }
                        else if (language == "en" && (text.StartsWith("l ", StringComparison.Ordinal) || text.StartsWith("l-I", StringComparison.Ordinal) || text.StartsWith("ls ", StringComparison.Ordinal) || text.StartsWith("lnterested") ||
                                                      text.StartsWith("lsn't ", StringComparison.Ordinal) || text.StartsWith("ldiot", StringComparison.Ordinal) || text.StartsWith("ln", StringComparison.Ordinal) || text.StartsWith("lm", StringComparison.Ordinal) ||
                                                      text.StartsWith("ls", StringComparison.Ordinal) || text.StartsWith("lt", StringComparison.Ordinal) || text.StartsWith("lf ", StringComparison.Ordinal) || text.StartsWith("lc", StringComparison.Ordinal) || text.StartsWith("l'm ", StringComparison.Ordinal)) || text.StartsWith("l am ", StringComparison.Ordinal)) // l > I
                        {
                            p.Text = pre + "I" + text.Substring(1);
                        }
                        else
                        {
                            p.Text = pre + char.ToUpper(firstLetter) + text.Substring(1);
                        }
                    }
                }
            }

            if (p.Text != null && p.Text.Contains(Environment.NewLine))
            {
                var arr = p.Text.SplitToLines();
                if (arr.Length == 2 && arr[1].Length > 1)
                {
                    string text = arr[1];
                    string pre  = string.Empty;
                    if (text.Length > 4 && text.StartsWith("<i> ", StringComparison.Ordinal))
                    {
                        pre  = "<i> ";
                        text = text.Substring(4);
                    }
                    if (text.Length > 3 && text.StartsWith("<i>", StringComparison.Ordinal))
                    {
                        pre  = "<i>";
                        text = text.Substring(3);
                    }
                    if (text.Length > 4 && text.StartsWith("<I> ", StringComparison.Ordinal))
                    {
                        pre  = "<I> ";
                        text = text.Substring(4);
                    }
                    if (text.Length > 3 && text.StartsWith("<I>", StringComparison.Ordinal))
                    {
                        pre  = "<I>";
                        text = text.Substring(3);
                    }
                    if (text.Length > 2 && text.StartsWith('♪'))
                    {
                        pre  = pre + "♪";
                        text = text.Substring(1);
                    }
                    if (text.Length > 2 && text.StartsWith(' '))
                    {
                        pre  = pre + " ";
                        text = text.Substring(1);
                    }
                    if (text.Length > 2 && text.StartsWith('♫'))
                    {
                        pre  = pre + "♫";
                        text = text.Substring(1);
                    }
                    if (text.Length > 2 && text.StartsWith(' '))
                    {
                        pre  = pre + " ";
                        text = text.Substring(1);
                    }

                    char   firstLetter     = text[0];
                    string prevText        = HtmlUtil.RemoveHtmlTags(arr[0]);
                    bool   isPrevEndOfLine = Helper.IsPreviousTextEndOfParagraph(prevText);
                    if ((!text.StartsWith("www.", StringComparison.Ordinal) && !text.StartsWith("http:", StringComparison.Ordinal) && !text.StartsWith("https:", StringComparison.Ordinal)) &&
                        (char.IsLower(firstLetter) || Helper.IsTurkishLittleI(firstLetter, encoding, language)) &&
                        !prevText.EndsWith("...", StringComparison.Ordinal) &&
                        isPrevEndOfLine)
                    {
                        bool isMatchInKnowAbbreviations = language == "en" &&
                                                          (prevText.EndsWith(" o.r.", StringComparison.Ordinal) ||
                                                           prevText.EndsWith(" a.m.", StringComparison.Ordinal) ||
                                                           prevText.EndsWith(" p.m.", StringComparison.Ordinal));

                        if (!isMatchInKnowAbbreviations)
                        {
                            if (Helper.IsTurkishLittleI(firstLetter, encoding, language))
                            {
                                text = pre + Helper.GetTurkishUppercaseLetter(firstLetter, encoding) + text.Substring(1);
                            }
                            else if (language == "en" && (text.StartsWith("l ", StringComparison.Ordinal) || text.StartsWith("l-I", StringComparison.Ordinal) || text.StartsWith("ls ") || text.StartsWith("lnterested") ||
                                                          text.StartsWith("lsn't ", StringComparison.Ordinal) || text.StartsWith("ldiot", StringComparison.Ordinal) || text.StartsWith("ln", StringComparison.Ordinal) || text.StartsWith("lm", StringComparison.Ordinal) ||
                                                          text.StartsWith("ls", StringComparison.Ordinal) || text.StartsWith("lt", StringComparison.Ordinal) || text.StartsWith("lf ", StringComparison.Ordinal) || text.StartsWith("lc", StringComparison.Ordinal) || text.StartsWith("l'm ", StringComparison.Ordinal)) || text.StartsWith("l am ", StringComparison.Ordinal)) // l > I
                            {
                                text = pre + "I" + text.Substring(1);
                            }
                            else
                            {
                                text = pre + char.ToUpper(firstLetter) + text.Substring(1);
                            }
                            p.Text = arr[0] + Environment.NewLine + text;
                        }
                    }

                    arr = p.Text.SplitToLines();
                    if ((arr[0].StartsWith('-') || arr[0].StartsWith("<i>-", StringComparison.Ordinal)) &&
                        (arr[1].StartsWith('-') || arr[1].StartsWith("<i>-", StringComparison.Ordinal)) &&
                        !arr[0].StartsWith("--", StringComparison.Ordinal) && !arr[0].StartsWith("<i>--", StringComparison.Ordinal) &&
                        !arr[1].StartsWith("--", StringComparison.Ordinal) && !arr[1].StartsWith("<i>--", StringComparison.Ordinal))
                    {
                        if (isPrevEndOfLine && arr[1].StartsWith("<i>- ", StringComparison.Ordinal) && arr[1].Length > 6)
                        {
                            p.Text = arr[0] + Environment.NewLine + "<i>- " + char.ToUpper(arr[1][5]) + arr[1].Remove(0, 6);
                        }
                        else if (isPrevEndOfLine && arr[1].StartsWith("- ", StringComparison.Ordinal) && arr[1].Length > 3)
                        {
                            p.Text = arr[0] + Environment.NewLine + "- " + char.ToUpper(arr[1][2]) + arr[1].Remove(0, 3);
                        }
                        arr = p.Text.SplitToLines();

                        prevText = " .";
                        if (prev != null && p.StartTime.TotalMilliseconds - 10000 < prev.EndTime.TotalMilliseconds)
                        {
                            prevText = HtmlUtil.RemoveHtmlTags(prev.Text);
                        }
                        bool isPrevLineEndOfLine = Helper.IsPreviousTextEndOfParagraph(prevText);
                        if (isPrevLineEndOfLine && arr[0].StartsWith("<i>- ", StringComparison.Ordinal) && arr[0].Length > 6)
                        {
                            p.Text = "<i>- " + char.ToUpper(arr[0][5]) + arr[0].Remove(0, 6) + Environment.NewLine + arr[1];
                        }
                        else if (isPrevLineEndOfLine && arr[0].StartsWith("- ", StringComparison.Ordinal) && arr[0].Length > 3)
                        {
                            p.Text = "- " + char.ToUpper(arr[0][2]) + arr[0].Remove(0, 3) + Environment.NewLine + arr[1];
                        }
                    }
                }
            }

            if (p.Text != null && p.Text.Length > 4)
            {
                int len            = 0;
                int indexOfNewLine = p.Text.IndexOf(Environment.NewLine + " -", 1, StringComparison.Ordinal);
                if (indexOfNewLine < 0)
                {
                    indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "- <i> ♪", 1, StringComparison.Ordinal);
                    len            = "- <i> ♪".Length;
                }
                if (indexOfNewLine < 0)
                {
                    indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "-", 1, StringComparison.Ordinal);
                    len            = "-".Length;
                }
                if (indexOfNewLine < 0)
                {
                    indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "<i>-", 1, StringComparison.Ordinal);
                    len            = "<i>-".Length;
                }
                if (indexOfNewLine < 0)
                {
                    indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "<i> -", 1, StringComparison.Ordinal);
                    len            = "<i> -".Length;
                }
                if (indexOfNewLine < 0)
                {
                    indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "♪ -", 1, StringComparison.Ordinal);
                    len            = "♪ -".Length;
                }
                if (indexOfNewLine < 0)
                {
                    indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "♪ <i> -", 1, StringComparison.Ordinal);
                    len            = "♪ <i> -".Length;
                }
                if (indexOfNewLine < 0)
                {
                    indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "♪ <i>-", 1, StringComparison.Ordinal);
                    len            = "♪ <i>-".Length;
                }

                if (indexOfNewLine > 0)
                {
                    string text = p.Text.Substring(indexOfNewLine + len);
                    var    st   = new StripableText(text);

                    if (st.StrippedText.Length > 0 && Helper.IsTurkishLittleI(st.StrippedText[0], encoding, language) && !st.Pre.EndsWith('[') && !st.Pre.Contains("..."))
                    {
                        text   = st.Pre + Helper.GetTurkishUppercaseLetter(st.StrippedText[0], encoding) + st.StrippedText.Substring(1) + st.Post;
                        p.Text = p.Text.Remove(indexOfNewLine + len).Insert(indexOfNewLine + len, text);
                    }
                    else if (st.StrippedText.Length > 0 && st.StrippedText[0] != char.ToUpper(st.StrippedText[0]) && !st.Pre.EndsWith('[') && !st.Pre.Contains("..."))
                    {
                        text   = st.Pre + char.ToUpper(st.StrippedText[0]) + st.StrippedText.Substring(1) + st.Post;
                        p.Text = p.Text.Remove(indexOfNewLine + len).Insert(indexOfNewLine + len, text);
                    }
                }
            }
            return(p.Text);
        }
Exemple #4
0
        public void Fix(Subtitle subtitle, IFixCallbacks callbacks)
        {
            var    language  = Configuration.Settings.Language.FixCommonErrors;
            string fixAction = language.StartWithUppercaseLetterAfterPeriodInsideParagraph;
            int    noOfFixes = 0;

            for (int i = 0; i < subtitle.Paragraphs.Count; i++)
            {
                Paragraph p       = subtitle.Paragraphs[i];
                string    oldText = p.Text;
                var       st      = new StripableText(p.Text);
                if (p.Text.Length > 3)
                {
                    string text  = st.StrippedText.Replace("  ", " ");
                    int    start = text.IndexOfAny(ExpectedChars);
                    while (start >= 0 && start < text.Length)
                    {
                        if (start > 0 && char.IsDigit(text[start - 1]))
                        {
                            // ignore periods after a number
                        }
                        else if (start + 4 < text.Length && text[start + 1] == ' ')
                        {
                            if (!IsAbbreviation(text, start, callbacks))
                            {
                                var subText = new StripableText(text.Substring(start + 2));
                                if (subText.StrippedText.Length > 0 && Helper.IsTurkishLittleI(subText.StrippedText[0], callbacks.Encoding, callbacks.Language))
                                {
                                    if (subText.StrippedText.Length > 1 && !(subText.Pre.Contains('\'') && subText.StrippedText.StartsWith('s')))
                                    {
                                        text = text.Substring(0, start + 2) + subText.Pre + Helper.GetTurkishUppercaseLetter(subText.StrippedText[0], callbacks.Encoding) + subText.StrippedText.Substring(1) + subText.Post;
                                        if (callbacks.AllowFix(p, fixAction))
                                        {
                                            p.Text = st.Pre + text + st.Post;
                                        }
                                    }
                                }
                                else if (subText.StrippedText.Length > 0 && Configuration.Settings.General.UppercaseLetters.Contains(char.ToUpper(subText.StrippedText[0])))
                                {
                                    if (subText.StrippedText.Length > 1 && !(subText.Pre.Contains('\'') && subText.StrippedText.StartsWith('s')))
                                    {
                                        text = text.Substring(0, start + 2) + subText.Pre + char.ToUpper(subText.StrippedText[0]) + subText.StrippedText.Substring(1) + subText.Post;
                                        if (callbacks.AllowFix(p, fixAction))
                                        {
                                            p.Text = st.Pre + text + st.Post;
                                        }
                                    }
                                }
                            }
                        }
                        start += 4;
                        if (start < text.Length)
                        {
                            start = text.IndexOfAny(ExpectedChars, start);
                        }
                    }
                }

                if (oldText != p.Text)
                {
                    noOfFixes++;
                    callbacks.AddFixToListView(p, fixAction, oldText, p.Text);
                }
            }
            callbacks.UpdateFixStatus(noOfFixes, language.StartWithUppercaseLetterAfterPeriodInsideParagraph, noOfFixes.ToString(CultureInfo.InvariantCulture));
        }
        public void Fix(Subtitle subtitle, IFixCallbacks callbacks)
        {
            string fixAction = Language.StartWithUppercaseLetterAfterColon;
            int    noOfFixes = 0;

            for (int i = 0; i < subtitle.Paragraphs.Count; i++)
            {
                var       p         = new Paragraph(subtitle.Paragraphs[i]);
                Paragraph last      = subtitle.GetParagraphOrDefault(i - 1);
                string    oldText   = p.Text;
                int       skipCount = 0;

                if (last != null)
                {
                    string lastText = HtmlUtil.RemoveHtmlTags(last.Text);
                    if (lastText.EndsWith(':') || lastText.EndsWith(';'))
                    {
                        var st = new StrippableText(p.Text);
                        if (st.StrippedText.Length > 0 && st.StrippedText[0] != char.ToUpper(st.StrippedText[0]))
                        {
                            p.Text = st.Pre + char.ToUpper(st.StrippedText[0]) + st.StrippedText.Substring(1) + st.Post;
                        }
                    }
                }

                if (oldText.Contains(ExpectedChars))
                {
                    bool lastWasColon = false;
                    for (int j = 0; j < p.Text.Length; j++)
                    {
                        var s = p.Text[j];
                        if (s == ':' || s == ';')
                        {
                            lastWasColon = true;
                        }
                        else if (lastWasColon)
                        {
                            // skip whitespace index
                            if (j + 2 < p.Text.Length && p.Text[j] == ' ')
                            {
                                s = p.Text[++j];
                            }

                            var startFromJ = p.Text.Substring(j);
                            if (startFromJ.Length > 3 && startFromJ[0] == '<' && startFromJ[2] == '>' && (startFromJ[1] == 'i' || startFromJ[1] == 'b' || startFromJ[1] == 'u'))
                            {
                                skipCount = 2;
                            }
                            else if (startFromJ.StartsWith("<font ", StringComparison.OrdinalIgnoreCase) && p.Text.Substring(j).Contains('>'))
                            {
                                skipCount = (j + startFromJ.IndexOf('>', 6)) - j;
                            }
                            else if (Helper.IsTurkishLittleI(s, callbacks.Encoding, callbacks.Language))
                            {
                                p.Text       = p.Text.Remove(j, 1).Insert(j, Helper.GetTurkishUppercaseLetter(s, callbacks.Encoding).ToString(CultureInfo.InvariantCulture));
                                lastWasColon = false;
                            }
                            else if (char.IsLower(s))
                            {
                                // iPhone
                                bool change = true;
                                if (s == 'i' && p.Text.Length > j + 1)
                                {
                                    if (p.Text[j + 1] == char.ToUpper(p.Text[j + 1]))
                                    {
                                        change = false;
                                    }
                                }
                                if (change)
                                {
                                    p.Text = p.Text.Remove(j, 1).Insert(j, char.ToUpper(s).ToString(CultureInfo.InvariantCulture));
                                }

                                lastWasColon = false;
                            }
                            else if (!(" " + Environment.NewLine).Contains(s))
                            {
                                lastWasColon = false;
                            }

                            // move the: 'j' pointer and reset skipCount to 0
                            if (skipCount > 0)
                            {
                                j        += skipCount;
                                skipCount = 0;
                            }
                        }
                    }
                }

                if (oldText != p.Text && callbacks.AllowFix(p, fixAction))
                {
                    noOfFixes++;
                    subtitle.Paragraphs[i].Text = p.Text;
                    callbacks.AddFixToListView(subtitle.Paragraphs[i], fixAction, oldText, p.Text);
                }
            }
            callbacks.UpdateFixStatus(noOfFixes, Language.StartWithUppercaseLetterAfterColon);
        }