Exemplo n.º 1
0
 public void StripableTextFontDontTouch()
 {
     var st = new StripableText("{MAN} Hi, how are you today!");
     Assert.AreEqual(st.Pre, "");
     Assert.AreEqual(st.Post, "!");
     Assert.AreEqual(st.StrippedText, "{MAN} Hi, how are you today");
 }
Exemplo n.º 2
0
 public void StripableOnlyPre3()
 {
     var st = new StripableText("<i>");
     Assert.AreEqual(st.Pre, "<i>");
     Assert.AreEqual(st.Post, "");
     Assert.AreEqual(st.StrippedText, "");
 }
Exemplo n.º 3
0
 public void StripableTextAss()
 {
     var st = new StripableText("{\\an9}Hi!");
     Assert.AreEqual(st.Pre, "{\\an9}");
     Assert.AreEqual(st.Post, "!");
     Assert.AreEqual(st.StrippedText, "Hi");
 }
Exemplo n.º 4
0
 public void StripableTextItalic2()
 {
     var st = new StripableText("<i>O</i>");
     Assert.AreEqual(st.Pre, "<i>");
     Assert.AreEqual(st.Post, "</i>");
     Assert.AreEqual(st.StrippedText, "O");
 }
Exemplo n.º 5
0
 public void StripableTextFont()
 {
     var st = new StripableText("<font color=\"red\">Hi!</font>");
     Assert.AreEqual(st.Pre, "<font color=\"red\">");
     Assert.AreEqual(st.Post, "!</font>");
     Assert.AreEqual(st.StrippedText, "Hi");
 }
Exemplo n.º 6
0
        private void GeneratePreview()
        {
            Cursor = Cursors.WaitCursor;
            listViewFixes.BeginUpdate();
            listViewFixes.Items.Clear();
            foreach (Paragraph p in _subtitle.Paragraphs)
            {
                string text = p.Text;
                foreach (ListViewItem item in listViewNames.Items)
                {
                    string name = item.SubItems[1].Text;

                    string textNoTags = HtmlUtil.RemoveHtmlTags(text);
                    if (textNoTags != textNoTags.ToUpper())
                    {
                        if (item.Checked && text != null && text.Contains(name, StringComparison.OrdinalIgnoreCase) && name.Length > 1 && name != name.ToLower())
                        {
                            var st = new StripableText(text);
                            st.FixCasing(new List <string> {
                                name
                            }, true, false, false, string.Empty);
                            text = st.MergedString;
                        }
                    }
                }
                if (text != p.Text)
                {
                    AddToPreviewListView(p, text);
                }
            }
            listViewFixes.EndUpdate();
            groupBoxLinesFound.Text = string.Format(Configuration.Settings.Language.ChangeCasingNames.LinesFoundX, listViewFixes.Items.Count);
            Cursor = Cursors.Default;
        }
Exemplo n.º 7
0
 public void StripableTextItalic3()
 {
     var st = new StripableText("<i>Hi!");
     Assert.AreEqual(st.Pre, "<i>");
     Assert.AreEqual(st.Post, "!");
     Assert.AreEqual(st.StrippedText, "Hi");
 }
Exemplo n.º 8
0
        public void StripableOnlyText()
        {
            var st = new StripableText("H");

            Assert.AreEqual(st.Pre, "");
            Assert.AreEqual(st.Post, "");
            Assert.AreEqual(st.StrippedText, "H");
        }
Exemplo n.º 9
0
        public void StripableTextItalic()
        {
            var st = new StripableText("<i>Hi!</i>");

            Assert.AreEqual(st.Pre, "<i>");
            Assert.AreEqual(st.Post, "!</i>");
            Assert.AreEqual(st.StrippedText, "Hi");
        }
Exemplo n.º 10
0
        public void StripableTextFontDontTouch()
        {
            var st = new StripableText("{MAN} Hi, how are you today!");

            Assert.AreEqual(st.Pre, "");
            Assert.AreEqual(st.Post, "!");
            Assert.AreEqual(st.StrippedText, "{MAN} Hi, how are you today");
        }
Exemplo n.º 11
0
        public void StripableTextItalic2()
        {
            var st = new StripableText("<i>O</i>");

            Assert.AreEqual(st.Pre, "<i>");
            Assert.AreEqual(st.Post, "</i>");
            Assert.AreEqual(st.StrippedText, "O");
        }
Exemplo n.º 12
0
        public void StripableTextFont()
        {
            var st = new StripableText("<font color=\"red\">Hi!</font>");

            Assert.AreEqual(st.Pre, "<font color=\"red\">");
            Assert.AreEqual(st.Post, "!</font>");
            Assert.AreEqual(st.StrippedText, "Hi");
        }
Exemplo n.º 13
0
        public void StripableTextAss()
        {
            var st = new StripableText("{\\an9}Hi!");

            Assert.AreEqual(st.Pre, "{\\an9}");
            Assert.AreEqual(st.Post, "!");
            Assert.AreEqual(st.StrippedText, "Hi");
        }
Exemplo n.º 14
0
        public void Fix(Subtitle subtitle, IFixCallbacks callbacks)
        {
            var    language  = Configuration.Settings.Language.FixCommonErrors;
            string fixAction = language.StartWithUppercaseLetterAfterPeriodInsideParagraph;
            int    noOfFixes = 0;

            for (int i = 0; i < subtitle.Paragraphs.Count; i++)
            {
                Paragraph p       = subtitle.Paragraphs[i];
                string    oldText = p.Text;
                if (p.Text.Length > 3 && callbacks.AllowFix(p, fixAction))
                {
                    var    st    = new StripableText(p.Text);
                    string text  = st.StrippedText;
                    int    start = text.IndexOfAny(ExpectedChars);
                    while (start > 0 && start < text.Length)
                    {
                        char charAtPosition = text[start];
                        // Allow fixing lowercase letter after recursive ??? or !!!.
                        if (charAtPosition != '.') // Dot is not include 'cause I don't capitalize word after the ellipses (...), right?
                        {
                            while (start + 1 < text.Length && text[start + 1] == charAtPosition)
                            {
                                start++;
                            }
                        }
                        if ((start + 3 < text.Length) && (text[start + 1] == ' ') && !IsAbbreviation(text, start, callbacks))
                        {
                            var subText = new StripableText(text.Substring(start + 2));
                            text = text.Substring(0, start + 2) + subText.CombineWithPrePost(ToUpperFirstLetter(subText.StrippedText, callbacks));
                        }
                        // Try to reach the last dot if char at *start is '.'.
                        if (charAtPosition == '.')
                        {
                            while (start + 1 < text.Length && text[start + 1] == '.')
                            {
                                start++;
                            }
                        }
                        start += 3;
                        if (start < text.Length)
                        {
                            start = text.IndexOfAny(ExpectedChars, start);
                        }
                    }
                    text = st.CombineWithPrePost(text);
                    if (oldText != text)
                    {
                        p.Text = text;
                        noOfFixes++;
                        callbacks.AddFixToListView(p, fixAction, oldText, p.Text);
                    }
                }
            }
            callbacks.UpdateFixStatus(noOfFixes, language.StartWithUppercaseLetterAfterPeriodInsideParagraph, noOfFixes.ToString(CultureInfo.InvariantCulture));
        }
 public void Fix(Subtitle subtitle, IFixCallbacks callbacks)
 {
     var language = Configuration.Settings.Language.FixCommonErrors;
     string fixAction = language.StartWithUppercaseLetterAfterPeriodInsideParagraph;
     int noOfFixes = 0;
     for (int i = 0; i < subtitle.Paragraphs.Count; i++)
     {
         Paragraph p = subtitle.Paragraphs[i];
         string oldText = p.Text;
         if (p.Text.Length > 3 && callbacks.AllowFix(p, fixAction))
         {
             var st = new StripableText(p.Text);
             string text = st.StrippedText;
             int start = text.IndexOfAny(ExpectedChars);
             while (start > 0 && start < text.Length)
             {
                 char charAtPosition = text[start];
                 // Allow fixing lowercase letter after recursive ??? or !!!.
                 if (charAtPosition != '.') // Dot is not include 'cause I don't capitalize word after the ellipses (...), right?
                 {
                     while (start + 1 < text.Length && text[start + 1] == charAtPosition)
                     {
                         start++;
                     }
                 }
                 if ((start + 3 < text.Length) && (text[start + 1] == ' ') && !IsAbbreviation(text, start, callbacks))
                 {
                     var subText = new StripableText(text.Substring(start + 2));
                     text = text.Substring(0, start + 2) + subText.CombineWithPrePost(ToUpperFirstLetter(subText.StrippedText, callbacks));
                 }
                 // Try to reach the last dot if char at *start is '.'.
                 if (charAtPosition == '.')
                 {
                     while (start + 1 < text.Length && text[start + 1] == '.')
                     {
                         start++;
                     }
                 }
                 start += 3;
                 if (start < text.Length)
                     start = text.IndexOfAny(ExpectedChars, start);
             }
             text = st.CombineWithPrePost(text);
             if (oldText != text)
             {
                 p.Text = text;
                 noOfFixes++;
                 callbacks.AddFixToListView(p, fixAction, oldText, p.Text);
             }
         }
     }
     callbacks.UpdateFixStatus(noOfFixes, language.StartWithUppercaseLetterAfterPeriodInsideParagraph, noOfFixes.ToString(CultureInfo.InvariantCulture));
 }
Exemplo n.º 16
0
        private string FixCasing(string text, string lastLine, List <string> namesEtc)
        {
            string original = text;

            if (radioButtonNormal.Checked)
            {
                if (checkBoxOnlyAllUpper.Checked && text != text.ToUpper())
                {
                    return(text);
                }

                if (text.Length > 1)
                {
                    // first all to lower
                    text = text.ToLower().Trim();
                    while (text.Contains("  "))
                    {
                        text = text.Replace("  ", " ");
                    }
                    text = text.Replace(" " + Environment.NewLine, Environment.NewLine);
                    text = text.Replace(Environment.NewLine + " ", Environment.NewLine);

                    var st = new StripableText(text);
                    st.FixCasing(namesEtc, false, true, true, lastLine); // fix all casing but names (that's a seperate option)
                    text = st.MergedString;
                }
            }
            else if (radioButtonUppercase.Checked)
            {
                StripableText st = new StripableText(text);
                text = st.Pre + st.StrippedText.ToUpper() + st.Post;
                text = text.Replace("<I>", "<i>");
                text = text.Replace("</I>", "</i>");
                text = text.Replace("<B>", "<b>");
                text = text.Replace("</B>", "</b>");
                text = text.Replace("<U>", "<u>");
                text = text.Replace("<U>", "</u>");
                text = text.Replace("<FONT COLOR>", "<font color>");
                text = text.Replace("</FONT>", "</font>");
            }
            else if (radioButtonLowercase.Checked)
            {
                text = text.ToLower();
            }
            if (original != text)
            {
                _noOfLinesChanged++;
            }
            return(text);
        }
Exemplo n.º 17
0
        private void MergeLinesWithContinuation()
        {
            var  temp     = new Subtitle();
            bool skipNext = false;

            for (int i = 0; i < _subtitle.Paragraphs.Count; i++)
            {
                Paragraph p = _subtitle.Paragraphs[i];
                if (!skipNext)
                {
                    Paragraph next = _subtitle.GetParagraphOrDefault(i + 1);

                    bool merge = !(p.Text.Contains(Environment.NewLine) || next == null) && Configuration.Settings.Tools.ListViewSyntaxMoreThanXLinesX > 1;

                    if (merge && (p.Text.TrimEnd().EndsWith('!') || p.Text.TrimEnd().EndsWith('.')))
                    {
                        var st = new StripableText(p.Text);
                        if (st.StrippedText.Length > 0 && char.IsUpper(st.StrippedText[0]))
                        {
                            merge = false;
                        }
                    }

                    if (merge && (p.Text.Length >= Configuration.Settings.General.SubtitleLineMaximumLength - 5 || next.Text.Length >= Configuration.Settings.General.SubtitleLineMaximumLength - 5))
                    {
                        merge = false;
                    }

                    if (merge)
                    {
                        temp.Paragraphs.Add(new Paragraph {
                            Text = p.Text + Environment.NewLine + next.Text
                        });
                        skipNext = true;
                    }
                    else
                    {
                        temp.Paragraphs.Add(new Paragraph(p));
                    }
                }
                else
                {
                    skipNext = false;
                }
            }
            _subtitle = temp;
        }
Exemplo n.º 18
0
        private void MergeLinesWithContinuation()
        {
            var  temp     = new Subtitle();
            bool skipNext = false;

            for (int i = 0; i < _subtitle.Paragraphs.Count; i++)
            {
                Paragraph p = _subtitle.Paragraphs[i];
                if (!skipNext)
                {
                    Paragraph next = _subtitle.GetParagraphOrDefault(i + 1);

                    bool merge = !(p.Text.Contains(Environment.NewLine) || next == null);

                    if (merge && (p.Text.TrimEnd().EndsWith('!') || p.Text.TrimEnd().EndsWith('.')))
                    {
                        var st = new StripableText(p.Text);
                        if (st.StrippedText.Length > 0 && Utilities.UppercaseLetters.Contains(st.StrippedText[0].ToString(CultureInfo.InvariantCulture)))
                        {
                            merge = false;
                        }
                    }

                    if (merge && (p.Text.Length >= Configuration.Settings.General.SubtitleLineMaximumLength - 5 || next.Text.Length >= Configuration.Settings.General.SubtitleLineMaximumLength - 5))
                    {
                        merge = false;
                    }

                    if (merge)
                    {
                        temp.Paragraphs.Add(new Paragraph {
                            Text = p.Text + Environment.NewLine + next.Text
                        });
                        skipNext = true;
                    }
                    else
                    {
                        temp.Paragraphs.Add(new Paragraph(p));
                    }
                }
                else
                {
                    skipNext = false;
                }
            }
            _subtitle = temp;
        }
Exemplo n.º 19
0
        private string FixCasing(string text, string lastLine, List <string> namesEtc)
        {
            string original = text;

            if (radioButtonNormal.Checked)
            {
                if (checkBoxOnlyAllUpper.Checked && text != text.ToUpper())
                {
                    return(text);
                }

                if (text.Length > 1)
                {
                    // first all to lower
                    text = text.ToLower().Trim();
                    text = text.FixExtraSpaces();
                    var st = new StripableText(text);
                    st.FixCasing(namesEtc, false, true, true, lastLine); // fix all casing but names (that's a seperate option)
                    text = st.MergedString;
                }
            }
            else if (radioButtonUppercase.Checked)
            {
                var st = new StripableText(text);
                text = st.Pre + st.StrippedText.ToUpper() + st.Post;
                text = HtmlUtil.FixUpperTags(text); // tags inside text
            }
            else if (radioButtonLowercase.Checked)
            {
                text = text.ToLower();
            }
            if (original != text)
            {
                _noOfLinesChanged++;
            }
            return(text);
        }
Exemplo n.º 20
0
        public static string FixHyphensRemove(Subtitle subtitle, int i)
        {
            Paragraph p = subtitle.Paragraphs[i];
            string text = p.Text;

            if (text.TrimStart().StartsWith('-') ||
                text.TrimStart().StartsWith("<i>-", StringComparison.OrdinalIgnoreCase) ||
                text.TrimStart().StartsWith("<i> -", StringComparison.OrdinalIgnoreCase) ||
                text.Contains(Environment.NewLine + '-') ||
                text.Contains(Environment.NewLine + " -") ||
                text.Contains(Environment.NewLine + "<i>-") ||
                text.Contains(Environment.NewLine + "<i> -") ||
                text.Contains(Environment.NewLine + "<I>-") ||
                text.Contains(Environment.NewLine + "<I> -"))
            {
                var prev = subtitle.GetParagraphOrDefault(i - 1);

                if (prev == null || !HtmlUtil.RemoveHtmlTags(prev.Text).TrimEnd().EndsWith('-') || HtmlUtil.RemoveHtmlTags(prev.Text).TrimEnd().EndsWith("--", StringComparison.Ordinal))
                {
                    var noTaglines = HtmlUtil.RemoveHtmlTags(p.Text).SplitToLines();
                    int startHyphenCount = noTaglines.Count(line => line.TrimStart().StartsWith('-'));
                    if (startHyphenCount == 1)
                    {
                        bool remove = true;
                        var noTagparts = HtmlUtil.RemoveHtmlTags(text).SplitToLines();
                        if (noTagparts.Length == 2)
                        {
                            if (noTagparts[0].TrimStart().StartsWith('-') && noTagparts[1].Contains(": "))
                                remove = false;
                            if (noTagparts[1].TrimStart().StartsWith('-') && noTagparts[0].Contains(": "))
                                remove = false;
                        }

                        if (remove)
                        {
                            int idx = text.IndexOf('-');
                            var st = new StripableText(text);
                            if (idx < 5 && st.Pre.Length >= idx)
                            {
                                text = text.Remove(idx, 1).TrimStart();
                                idx = text.IndexOf('-');
                                st = new StripableText(text);
                                if (idx < 5 && idx >= 0 && st.Pre.Length >= idx)
                                {
                                    text = text.Remove(idx, 1).TrimStart();
                                    st = new StripableText(text);
                                }
                                idx = text.IndexOf('-');
                                if (idx < 5 && idx >= 0 && st.Pre.Length >= idx)
                                    text = text.Remove(idx, 1).TrimStart();

                                text = RemoveSpacesBeginLine(text);
                            }
                            else
                            {
                                int indexOfNewLine = text.IndexOf(Environment.NewLine, StringComparison.Ordinal);
                                if (indexOfNewLine > 0)
                                {
                                    idx = text.IndexOf('-', indexOfNewLine);
                                    if (idx >= 0 && indexOfNewLine + 5 > indexOfNewLine)
                                    {
                                        text = text.Remove(idx, 1).TrimStart().Replace(Environment.NewLine + " ", Environment.NewLine);

                                        idx = text.IndexOf('-', indexOfNewLine);
                                        if (idx >= 0 && indexOfNewLine + 5 > indexOfNewLine)
                                        {
                                            text = text.Remove(idx, 1).TrimStart();

                                            text = RemoveSpacesBeginLine(text);
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            else if (text.StartsWith("<font ", StringComparison.Ordinal))
            {
                var prev = subtitle.GetParagraphOrDefault(i - 1);
                if (prev == null || !HtmlUtil.RemoveHtmlTags(prev.Text).TrimEnd().EndsWith('-') || HtmlUtil.RemoveHtmlTags(prev.Text).TrimEnd().EndsWith("--", StringComparison.Ordinal))
                {
                    var st = new StripableText(text);
                    if (st.Pre.EndsWith('-') || st.Pre.EndsWith("- ", StringComparison.Ordinal))
                    {
                        text = st.Pre.TrimEnd('-', ' ') + st.StrippedText + st.Post;
                    }
                }
            }
            return text;
        }
        public void Fix(Subtitle subtitle, IFixCallbacks callbacks)
        {
            var    language  = Configuration.Settings.Language.FixCommonErrors;
            string fixAction = language.FixMissingPeriodAtEndOfLine;
            int    missigPeriodsAtEndOfLine = 0;

            for (int i = 0; i < subtitle.Paragraphs.Count; i++)
            {
                Paragraph p        = subtitle.Paragraphs[i];
                Paragraph next     = subtitle.GetParagraphOrDefault(i + 1);
                string    nextText = string.Empty;
                if (next != null)
                {
                    nextText = HtmlUtil.RemoveHtmlTags(next.Text).TrimStart('-', '"', '„').TrimStart();
                }
                string tempNoHtml = HtmlUtil.RemoveHtmlTags(p.Text).TrimEnd();

                if (IsOneLineUrl(p.Text) || p.Text.Contains(new[] { '♪', '♫' }) || p.Text.EndsWith('\''))
                {
                    // ignore urls
                }
                else if (!string.IsNullOrEmpty(nextText) && next != null &&
                         next.Text.Length > 0 &&
                         Utilities.UppercaseLetters.Contains(nextText[0]) &&
                         tempNoHtml.Length > 0 &&
                         !@",.!?:;>-])♪♫…".Contains(tempNoHtml[tempNoHtml.Length - 1]))
                {
                    string tempTrimmed = tempNoHtml.TrimEnd().TrimEnd('\'', '"', '“', '”').TrimEnd();
                    if (tempTrimmed.Length > 0 && !@")]*#¶.!?".Contains(tempTrimmed[tempTrimmed.Length - 1]) && p.Text != p.Text.ToUpper())
                    {
                        //don't end the sentence if the next word is an I word as they're always capped.
                        if (!next.Text.StartsWith("I ", StringComparison.Ordinal) && !next.Text.StartsWith("I'", StringComparison.Ordinal))
                        {
                            //test to see if the first word of the next line is a name
                            if (!callbacks.IsName(next.Text.Split(WordSplitChars)[0]) && callbacks.AllowFix(p, fixAction))
                            {
                                string oldText = p.Text;
                                if (p.Text.EndsWith('>'))
                                {
                                    int lastLessThan = p.Text.LastIndexOf('<');
                                    if (lastLessThan > 0)
                                    {
                                        p.Text = p.Text.Insert(lastLessThan, ".");
                                    }
                                }
                                else
                                {
                                    if (p.Text.EndsWith('“') && tempNoHtml.StartsWith('„'))
                                    {
                                        p.Text = p.Text.TrimEnd('“') + ".“";
                                    }
                                    else if (p.Text.EndsWith('"') && tempNoHtml.StartsWith('"'))
                                    {
                                        p.Text = p.Text.TrimEnd('"') + ".\"";
                                    }
                                    else
                                    {
                                        p.Text += ".";
                                    }
                                }
                                if (p.Text != oldText)
                                {
                                    missigPeriodsAtEndOfLine++;
                                    callbacks.AddFixToListView(p, fixAction, oldText, p.Text);
                                }
                            }
                        }
                    }
                }
                else if (next != null && !string.IsNullOrEmpty(p.Text) && Utilities.AllLettersAndNumbers.Contains(p.Text[p.Text.Length - 1]))
                {
                    if (p.Text != p.Text.ToUpper())
                    {
                        var st = new StripableText(next.Text);
                        if (st.StrippedText.Length > 0 && st.StrippedText != st.StrippedText.ToUpper() &&
                            Utilities.UppercaseLetters.Contains(st.StrippedText[0]))
                        {
                            if (callbacks.AllowFix(p, fixAction))
                            {
                                int j = p.Text.Length - 1;
                                while (j >= 0 && !@".!?¿¡".Contains(p.Text[j]))
                                {
                                    j--;
                                }
                                string endSign = ".";
                                if (j >= 0 && p.Text[j] == '¿')
                                {
                                    endSign = "?";
                                }
                                if (j >= 0 && p.Text[j] == '¡')
                                {
                                    endSign = "!";
                                }

                                string oldText = p.Text;
                                missigPeriodsAtEndOfLine++;
                                p.Text += endSign;
                                callbacks.AddFixToListView(p, fixAction, oldText, p.Text);
                            }
                        }
                    }
                }

                if (p.Text.Length > 4)
                {
                    int indexOfNewLine = p.Text.IndexOf(Environment.NewLine + " -", 3, StringComparison.Ordinal);
                    if (indexOfNewLine < 0)
                    {
                        indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "-", 3, StringComparison.Ordinal);
                    }
                    if (indexOfNewLine < 0)
                    {
                        indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "<i>-", 3, StringComparison.Ordinal);
                    }
                    if (indexOfNewLine < 0)
                    {
                        indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "<i> -", 3, StringComparison.Ordinal);
                    }
                    if (indexOfNewLine > 0 && Configuration.Settings.General.UppercaseLetters.Contains(char.ToUpper(p.Text[indexOfNewLine - 1])) && callbacks.AllowFix(p, fixAction))
                    {
                        string oldText = p.Text;

                        string text = p.Text.Substring(0, indexOfNewLine);
                        var    st   = new StripableText(text);
                        if (st.Pre.TrimEnd().EndsWith('¿')) // Spanish ¿
                        {
                            p.Text = p.Text.Insert(indexOfNewLine, "?");
                        }
                        else if (st.Pre.TrimEnd().EndsWith('¡')) // Spanish ¡
                        {
                            p.Text = p.Text.Insert(indexOfNewLine, "!");
                        }
                        else
                        {
                            p.Text = p.Text.Insert(indexOfNewLine, ".");
                        }

                        missigPeriodsAtEndOfLine++;
                        callbacks.AddFixToListView(p, fixAction, oldText, p.Text);
                    }
                }
            }
            callbacks.UpdateFixStatus(missigPeriodsAtEndOfLine, language.AddPeriods, language.XPeriodsAdded);
        }
        public string RemoveTextFromHearImpaired(string text)
        {
            if (Settings.RemoveWhereContains)
            {
                foreach (var removeIfTextContain in Settings.RemoveIfTextContains)
                {
                    if (text.Contains(removeIfTextContain))
                        return string.Empty;
                }
            }

            string oldText = text;
            text = RemoveColon(text);
            string pre = " >-\"'‘`´♪¿¡.…—";
            string post = " -\"'`´♪.!?:…—";
            if (Settings.RemoveTextBetweenCustomTags)
            {
                pre = pre.Replace(Settings.CustomStart, string.Empty);
                post = post.Replace(Settings.CustomEnd, string.Empty);
            }
            var st = new StripableText(text, pre, post);
            var sb = new StringBuilder();
            var parts = st.StrippedText.Trim().SplitToLines();
            int lineNumber = 0;
            bool removedDialogInFirstLine = false;
            int noOfNamesRemoved = 0;
            int noOfNamesRemovedNotInLineOne = 0;
            foreach (string s in parts)
            {
                var stSub = new StripableText(s, pre, post);
                string strippedText = stSub.StrippedText;
                if (lineNumber == parts.Length - 1 && st.Post.Contains('?'))
                    strippedText += "?";
                else if (stSub.Post.Contains('?'))
                    strippedText += "?";
                if (!StartsAndEndsWithHearImpariedTags(strippedText))
                {
                    if (removedDialogInFirstLine && stSub.Pre.Contains("- "))
                        stSub.Pre = stSub.Pre.Replace("- ", string.Empty);

                    string newText = stSub.StrippedText;

                    newText = RemoveHearImpairedTags(newText);

                    if (stSub.StrippedText.Length - newText.Length > 2)
                    {
                        string removedText = GetRemovedString(stSub.StrippedText, newText);
                        if (!IsHIDescription(removedText))
                        {
                            noOfNamesRemoved++;
                            if (lineNumber > 0)
                                noOfNamesRemovedNotInLineOne++;
                        }
                    }
                    sb.AppendLine(stSub.Pre + newText + stSub.Post);
                }
                else
                {
                    if (!IsHIDescription(stSub.StrippedText))
                    {
                        noOfNamesRemoved++;
                        if (lineNumber > 0)
                            noOfNamesRemovedNotInLineOne++;
                    }

                    if (lineNumber == 0)
                    {
                        if (st.Pre.Contains("- "))
                        {
                            st.Pre = st.Pre.Replace("- ", string.Empty);
                            removedDialogInFirstLine = true;
                        }
                        else if (st.Pre == "-")
                        {
                            st.Pre = string.Empty;
                            removedDialogInFirstLine = true;
                        }
                    }

                    if (st.Pre.Contains("<i>") && stSub.Post.Contains("</i>"))
                        st.Pre = st.Pre.Replace("<i>", string.Empty);

                    if (s.Contains("<i>") && !s.Contains("</i>") && st.Post.Contains("</i>"))
                        st.Post = st.Post.Replace("</i>", string.Empty);
                }
                lineNumber++;
            }

            text = st.Pre + sb.ToString().Trim() + st.Post;
            text = text.Replace("  ", " ").Trim();
            text = text.Replace("<i></i>", string.Empty);
            text = text.Replace("<i> </i>", " ");
            text = text.Replace("<b></b>", string.Empty);
            text = text.Replace("<b> </b>", " ");
            text = text.Replace("<u></u>", string.Empty);
            text = text.Replace("<u> </u>", " ");
            text = RemoveEmptyFontTag(text);
            text = text.Replace("  ", " ").Trim();
            text = RemoveColon(text);
            text = RemoveLineIfAllUppercase(text);
            text = RemoveHearImpairedtagsInsideLine(text);
            if (Settings.RemoveInterjections)
                text = RemoveInterjections(text);

            st = new StripableText(text, " >-\"'‘`´♪¿¡.…—", " -\"'`´♪.!?:…—");
            text = st.StrippedText;
            if (StartsAndEndsWithHearImpariedTags(text))
            {
                text = RemoveStartEndTags(text);
            }

            text = RemoveHearImpairedTags(text);

            // fix 3 lines to two liners - if only two lines
            if (noOfNamesRemoved >= 1 && Utilities.GetNumberOfLines(text) == 3)
            {
                var splitChars = new[] { '.', '?', '!' };
                var splitParts = HtmlUtil.RemoveHtmlTags(text).Replace(" ", string.Empty).Split(splitChars, StringSplitOptions.RemoveEmptyEntries);
                if (splitParts.Length == 2)
                {
                    var temp = new StripableText(text);
                    temp.StrippedText = temp.StrippedText.Replace(Environment.NewLine, " ");
                    int splitIndex = temp.StrippedText.LastIndexOfAny(splitChars);
                    if (splitIndex > 0)
                    {
                        text = temp.Pre + temp.StrippedText.Insert(splitIndex + 1, Environment.NewLine) + temp.Post;
                    }
                }
            }

            if (!text.StartsWith('-') && noOfNamesRemoved >= 1 && Utilities.GetNumberOfLines(text) == 2)
            {
                var lines = text.SplitToLines();
                var part0 = lines[0].Trim().Replace("</i>", string.Empty).Trim();
                if (!part0.EndsWith(',') && (!part0.EndsWith('-') || noOfNamesRemovedNotInLineOne > 0))
                {
                    if (part0.Length > 0 && ".?!".Contains(part0[part0.Length - 1]))
                    {
                        if (noOfNamesRemovedNotInLineOne > 0)
                        {
                            if (!st.Pre.Contains('-'))
                                text = "- " + text.Replace(Environment.NewLine, Environment.NewLine + "- ");
                            if (!text.Contains(Environment.NewLine + "-") && !text.Contains(Environment.NewLine + "<i>-"))
                                text = text.Replace(Environment.NewLine, Environment.NewLine + "- ");
                        }
                    }
                }
            }

            if (!string.IsNullOrEmpty(text) || (st.Pre.Contains('♪') || st.Post.Contains('♪')))
                text = st.Pre + text + st.Post;

            if (oldText.TrimStart().StartsWith("- ", StringComparison.Ordinal) &&
                text != null && !text.Contains(Environment.NewLine) &&
                (oldText.Contains(Environment.NewLine + "- ") ||
                 oldText.Contains(Environment.NewLine + " - ") ||
                 oldText.Contains(Environment.NewLine + "<i>- ") ||
                 oldText.Contains(Environment.NewLine + "<i> - ")))
            {
                text = text.TrimStart().TrimStart('-').TrimStart();
            }
            if (oldText.TrimStart().StartsWith('-') && !oldText.TrimStart().StartsWith("--", StringComparison.Ordinal) &&
                text != null && !text.Contains(Environment.NewLine) &&
                (oldText.Contains(Environment.NewLine + "-") && !oldText.Contains(Environment.NewLine + "--") ||
                 oldText.Contains(Environment.NewLine + " - ") ||
                 oldText.Contains(Environment.NewLine + "<i>- ") ||
                 oldText.Contains(Environment.NewLine + "<i> - ")))
            {
                text = text.TrimStart().TrimStart('-').TrimStart();
            }

            if (oldText.TrimStart().StartsWith("<i>- ", StringComparison.Ordinal) &&
                text != null && text.StartsWith("<i>- ", StringComparison.Ordinal) && !text.Contains(Environment.NewLine) &&
                (oldText.Contains(Environment.NewLine + "- ") ||
                 oldText.Contains(Environment.NewLine + " - ") ||
                 oldText.Contains(Environment.NewLine + "<i>- ") ||
                 oldText.Contains(Environment.NewLine + "<i> - ")))
            {
                text = text.Remove(3, 2);
            }

            if (text != null && !text.Contains(Environment.NewLine) &&
                (oldText.Contains(':') && !text.Contains(':') ||
                 oldText.Contains('[') && !text.Contains('[') ||
                 oldText.Contains('(') && !text.Contains('(') ||
                 oldText.Contains('{') && !text.Contains('{')) &&
                (oldText.Contains(Environment.NewLine + "- ") ||
                 oldText.Contains(Environment.NewLine + " - ") ||
                 oldText.Contains(Environment.NewLine + "<i>- ") ||
                 oldText.Contains(Environment.NewLine + "<i> - ")))
            {
                text = text.TrimStart().TrimStart('-').TrimStart();
            }

            if (oldText != text)
            {
                // insert spaces before "-"
                text = text.Replace(Environment.NewLine + "- <i>", Environment.NewLine + "<i>- ");
                text = text.Replace(Environment.NewLine + "-<i>", Environment.NewLine + "<i>- ");
                if (text.Length > 2 && text[0] == '-' && text[1] != ' ' && text[1] != '-')
                    text = text.Insert(1, " ");
                if (text.Length > 5 && text.StartsWith("<i>-", StringComparison.Ordinal) && text[4] != ' ' && text[4] != '-')
                    text = text.Insert(4, " ");
                int index = text.IndexOf(Environment.NewLine + "-", StringComparison.Ordinal);
                if (index >= 0 && text.Length - index > 4)
                {
                    index += Environment.NewLine.Length + 1;
                    if (text[index] != ' ' && text[index] != '-')
                        text = text.Insert(index, " ");
                }
                index = text.IndexOf(Environment.NewLine + "<i>-", StringComparison.Ordinal);
                if (index >= 0 && text.Length - index > 5)
                {
                    index += Environment.NewLine.Length + 4;
                    if (text[index] != ' ' && text[index] != '-')
                        text = text.Insert(index, " ");
                }
            }
            return text.Trim();
        }
        public static string FixHyphensRemove(Subtitle subtitle, int i)
        {
            Paragraph p    = subtitle.Paragraphs[i];
            string    text = p.Text;

            if (text.TrimStart().StartsWith('-') ||
                text.TrimStart().StartsWith("<i>-", StringComparison.OrdinalIgnoreCase) ||
                text.TrimStart().StartsWith("<i> -", StringComparison.OrdinalIgnoreCase) ||
                text.Contains(Environment.NewLine + '-') ||
                text.Contains(Environment.NewLine + " -") ||
                text.Contains(Environment.NewLine + "<i>-") ||
                text.Contains(Environment.NewLine + "<i> -") ||
                text.Contains(Environment.NewLine + "<I>-") ||
                text.Contains(Environment.NewLine + "<I> -"))
            {
                var prev = subtitle.GetParagraphOrDefault(i - 1);

                if (prev == null || !HtmlUtil.RemoveHtmlTags(prev.Text).TrimEnd().EndsWith('-') || HtmlUtil.RemoveHtmlTags(prev.Text).TrimEnd().EndsWith("--", StringComparison.Ordinal))
                {
                    var noTaglines       = HtmlUtil.RemoveHtmlTags(p.Text).SplitToLines();
                    int startHyphenCount = noTaglines.Count(line => line.TrimStart().StartsWith('-'));
                    if (startHyphenCount == 1)
                    {
                        bool remove     = true;
                        var  noTagparts = HtmlUtil.RemoveHtmlTags(text).SplitToLines();
                        if (noTagparts.Length == 2)
                        {
                            if (noTagparts[0].TrimStart().StartsWith('-') && noTagparts[1].Contains(": "))
                            {
                                remove = false;
                            }
                            if (noTagparts[1].TrimStart().StartsWith('-') && noTagparts[0].Contains(": "))
                            {
                                remove = false;
                            }
                        }

                        if (remove)
                        {
                            int idx = text.IndexOf('-');
                            var st  = new StripableText(text);
                            if (idx < 5 && st.Pre.Length >= idx)
                            {
                                text = text.Remove(idx, 1).TrimStart();
                                idx  = text.IndexOf('-');
                                st   = new StripableText(text);
                                if (idx < 5 && idx >= 0 && st.Pre.Length >= idx)
                                {
                                    text = text.Remove(idx, 1).TrimStart();
                                    st   = new StripableText(text);
                                }
                                idx = text.IndexOf('-');
                                if (idx < 5 && idx >= 0 && st.Pre.Length >= idx)
                                {
                                    text = text.Remove(idx, 1).TrimStart();
                                }

                                text = RemoveSpacesBeginLine(text);
                            }
                            else
                            {
                                int indexOfNewLine = text.IndexOf(Environment.NewLine, StringComparison.Ordinal);
                                if (indexOfNewLine > 0)
                                {
                                    idx = text.IndexOf('-', indexOfNewLine);
                                    if (idx >= 0 && indexOfNewLine + 5 > indexOfNewLine)
                                    {
                                        text = text.Remove(idx, 1).TrimStart().Replace(Environment.NewLine + " ", Environment.NewLine);

                                        idx = text.IndexOf('-', indexOfNewLine);
                                        if (idx >= 0 && indexOfNewLine + 5 > indexOfNewLine)
                                        {
                                            text = text.Remove(idx, 1).TrimStart();

                                            text = RemoveSpacesBeginLine(text);
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            else if (text.StartsWith("<font ", StringComparison.Ordinal))
            {
                var prev = subtitle.GetParagraphOrDefault(i - 1);
                if (prev == null || !HtmlUtil.RemoveHtmlTags(prev.Text).TrimEnd().EndsWith('-') || HtmlUtil.RemoveHtmlTags(prev.Text).TrimEnd().EndsWith("--", StringComparison.Ordinal))
                {
                    var st = new StripableText(text);
                    if (st.Pre.EndsWith('-') || st.Pre.EndsWith("- ", StringComparison.Ordinal))
                    {
                        text = st.Pre.TrimEnd('-', ' ') + st.StrippedText + st.Post;
                    }
                }
            }
            return(text);
        }
Exemplo n.º 24
0
 public void StripableTextItalicAndMore()
 {
     var st = new StripableText("<i>...<b>Hi!</b></i>");
     Assert.AreEqual(st.Pre, "<i>...<b>");
     Assert.AreEqual(st.Post, "!</b></i>");
     Assert.AreEqual(st.StrippedText, "Hi");
 }
Exemplo n.º 25
0
        private static string DoFix(Paragraph p, Paragraph prev, Encoding encoding, string language)
        {
            if (p.Text != null && p.Text.Length > 1)
            {
                string text = p.Text;
                string pre  = string.Empty;
                if (text.Length > 4 && text.StartsWith("<i> ", StringComparison.Ordinal))
                {
                    pre  = "<i> ";
                    text = text.Substring(4);
                }
                if (text.Length > 3 && text.StartsWith("<i>", StringComparison.Ordinal))
                {
                    pre  = "<i>";
                    text = text.Substring(3);
                }
                if (text.Length > 4 && text.StartsWith("<I> ", StringComparison.Ordinal))
                {
                    pre  = "<I> ";
                    text = text.Substring(4);
                }
                if (text.Length > 3 && text.StartsWith("<I>", StringComparison.Ordinal))
                {
                    pre  = "<I>";
                    text = text.Substring(3);
                }
                if (text.Length > 2 && text.StartsWith('♪'))
                {
                    pre  = pre + "♪";
                    text = text.Substring(1);
                }
                if (text.Length > 2 && text.StartsWith(' '))
                {
                    pre  = pre + " ";
                    text = text.Substring(1);
                }
                if (text.Length > 2 && text.StartsWith('♫'))
                {
                    pre  = pre + "♫";
                    text = text.Substring(1);
                }
                if (text.Length > 2 && text.StartsWith(' '))
                {
                    pre  = pre + " ";
                    text = text.Substring(1);
                }

                var firstLetter = text[0];

                string prevText = " .";
                if (prev != null)
                {
                    prevText = HtmlUtil.RemoveHtmlTags(prev.Text);
                }

                bool isPrevEndOfLine = Helper.IsPreviousTextEndOfParagraph(prevText);
                if (prevText == " .")
                {
                    isPrevEndOfLine = true;
                }
                if ((!text.StartsWith("www.", StringComparison.Ordinal) && !text.StartsWith("http:", StringComparison.Ordinal) && !text.StartsWith("https:", StringComparison.Ordinal)) &&
                    (char.IsLower(firstLetter) || Helper.IsTurkishLittleI(firstLetter, encoding, language)) &&
                    !char.IsDigit(firstLetter) &&
                    isPrevEndOfLine)
                {
                    bool isMatchInKnowAbbreviations = language == "en" &&
                                                      (prevText.EndsWith(" o.r.", StringComparison.Ordinal) ||
                                                       prevText.EndsWith(" a.m.", StringComparison.Ordinal) ||
                                                       prevText.EndsWith(" p.m.", StringComparison.Ordinal));

                    if (!isMatchInKnowAbbreviations)
                    {
                        if (Helper.IsTurkishLittleI(firstLetter, encoding, language))
                        {
                            p.Text = pre + Helper.GetTurkishUppercaseLetter(firstLetter, encoding) + text.Substring(1);
                        }
                        else if (language == "en" && (text.StartsWith("l ", StringComparison.Ordinal) || text.StartsWith("l-I", StringComparison.Ordinal) || text.StartsWith("ls ", StringComparison.Ordinal) || text.StartsWith("lnterested") ||
                                                      text.StartsWith("lsn't ", StringComparison.Ordinal) || text.StartsWith("ldiot", StringComparison.Ordinal) || text.StartsWith("ln", StringComparison.Ordinal) || text.StartsWith("lm", StringComparison.Ordinal) ||
                                                      text.StartsWith("ls", StringComparison.Ordinal) || text.StartsWith("lt", StringComparison.Ordinal) || text.StartsWith("lf ", StringComparison.Ordinal) || text.StartsWith("lc", StringComparison.Ordinal) || text.StartsWith("l'm ", StringComparison.Ordinal)) || text.StartsWith("l am ", StringComparison.Ordinal)) // l > I
                        {
                            p.Text = pre + "I" + text.Substring(1);
                        }
                        else
                        {
                            p.Text = pre + char.ToUpper(firstLetter) + text.Substring(1);
                        }
                    }
                }
            }

            if (p.Text != null && p.Text.Contains(Environment.NewLine))
            {
                var arr = p.Text.SplitToLines();
                if (arr.Length == 2 && arr[1].Length > 1)
                {
                    string text = arr[1];
                    string pre  = string.Empty;
                    if (text.Length > 4 && text.StartsWith("<i> ", StringComparison.Ordinal))
                    {
                        pre  = "<i> ";
                        text = text.Substring(4);
                    }
                    if (text.Length > 3 && text.StartsWith("<i>", StringComparison.Ordinal))
                    {
                        pre  = "<i>";
                        text = text.Substring(3);
                    }
                    if (text.Length > 4 && text.StartsWith("<I> ", StringComparison.Ordinal))
                    {
                        pre  = "<I> ";
                        text = text.Substring(4);
                    }
                    if (text.Length > 3 && text.StartsWith("<I>", StringComparison.Ordinal))
                    {
                        pre  = "<I>";
                        text = text.Substring(3);
                    }
                    if (text.Length > 2 && text.StartsWith('♪'))
                    {
                        pre  = pre + "♪";
                        text = text.Substring(1);
                    }
                    if (text.Length > 2 && text.StartsWith(' '))
                    {
                        pre  = pre + " ";
                        text = text.Substring(1);
                    }
                    if (text.Length > 2 && text.StartsWith('♫'))
                    {
                        pre  = pre + "♫";
                        text = text.Substring(1);
                    }
                    if (text.Length > 2 && text.StartsWith(' '))
                    {
                        pre  = pre + " ";
                        text = text.Substring(1);
                    }

                    char   firstLetter     = text[0];
                    string prevText        = HtmlUtil.RemoveHtmlTags(arr[0]);
                    bool   isPrevEndOfLine = Helper.IsPreviousTextEndOfParagraph(prevText);
                    if ((!text.StartsWith("www.", StringComparison.Ordinal) && !text.StartsWith("http:", StringComparison.Ordinal) && !text.StartsWith("https:", StringComparison.Ordinal)) &&
                        (char.IsLower(firstLetter) || Helper.IsTurkishLittleI(firstLetter, encoding, language)) &&
                        !prevText.EndsWith("...", StringComparison.Ordinal) &&
                        isPrevEndOfLine)
                    {
                        bool isMatchInKnowAbbreviations = language == "en" &&
                                                          (prevText.EndsWith(" o.r.", StringComparison.Ordinal) ||
                                                           prevText.EndsWith(" a.m.", StringComparison.Ordinal) ||
                                                           prevText.EndsWith(" p.m.", StringComparison.Ordinal));

                        if (!isMatchInKnowAbbreviations)
                        {
                            if (Helper.IsTurkishLittleI(firstLetter, encoding, language))
                            {
                                text = pre + Helper.GetTurkishUppercaseLetter(firstLetter, encoding) + text.Substring(1);
                            }
                            else if (language == "en" && (text.StartsWith("l ", StringComparison.Ordinal) || text.StartsWith("l-I", StringComparison.Ordinal) || text.StartsWith("ls ") || text.StartsWith("lnterested") ||
                                                          text.StartsWith("lsn't ", StringComparison.Ordinal) || text.StartsWith("ldiot", StringComparison.Ordinal) || text.StartsWith("ln", StringComparison.Ordinal) || text.StartsWith("lm", StringComparison.Ordinal) ||
                                                          text.StartsWith("ls", StringComparison.Ordinal) || text.StartsWith("lt", StringComparison.Ordinal) || text.StartsWith("lf ", StringComparison.Ordinal) || text.StartsWith("lc", StringComparison.Ordinal) || text.StartsWith("l'm ", StringComparison.Ordinal)) || text.StartsWith("l am ", StringComparison.Ordinal)) // l > I
                            {
                                text = pre + "I" + text.Substring(1);
                            }
                            else
                            {
                                text = pre + char.ToUpper(firstLetter) + text.Substring(1);
                            }
                            p.Text = arr[0] + Environment.NewLine + text;
                        }
                    }

                    arr = p.Text.SplitToLines();
                    if ((arr[0].StartsWith('-') || arr[0].StartsWith("<i>-", StringComparison.Ordinal)) &&
                        (arr[1].StartsWith('-') || arr[1].StartsWith("<i>-", StringComparison.Ordinal)) &&
                        !arr[0].StartsWith("--", StringComparison.Ordinal) && !arr[0].StartsWith("<i>--", StringComparison.Ordinal) &&
                        !arr[1].StartsWith("--", StringComparison.Ordinal) && !arr[1].StartsWith("<i>--", StringComparison.Ordinal))
                    {
                        if (isPrevEndOfLine && arr[1].StartsWith("<i>- ", StringComparison.Ordinal) && arr[1].Length > 6)
                        {
                            p.Text = arr[0] + Environment.NewLine + "<i>- " + char.ToUpper(arr[1][5]) + arr[1].Remove(0, 6);
                        }
                        else if (isPrevEndOfLine && arr[1].StartsWith("- ", StringComparison.Ordinal) && arr[1].Length > 3)
                        {
                            p.Text = arr[0] + Environment.NewLine + "- " + char.ToUpper(arr[1][2]) + arr[1].Remove(0, 3);
                        }
                        arr = p.Text.SplitToLines();

                        prevText = " .";
                        if (prev != null && p.StartTime.TotalMilliseconds - 10000 < prev.EndTime.TotalMilliseconds)
                        {
                            prevText = HtmlUtil.RemoveHtmlTags(prev.Text);
                        }
                        bool isPrevLineEndOfLine = Helper.IsPreviousTextEndOfParagraph(prevText);
                        if (isPrevLineEndOfLine && arr[0].StartsWith("<i>- ", StringComparison.Ordinal) && arr[0].Length > 6)
                        {
                            p.Text = "<i>- " + char.ToUpper(arr[0][5]) + arr[0].Remove(0, 6) + Environment.NewLine + arr[1];
                        }
                        else if (isPrevLineEndOfLine && arr[0].StartsWith("- ", StringComparison.Ordinal) && arr[0].Length > 3)
                        {
                            p.Text = "- " + char.ToUpper(arr[0][2]) + arr[0].Remove(0, 3) + Environment.NewLine + arr[1];
                        }
                    }
                }
            }

            if (p.Text != null && p.Text.Length > 4)
            {
                int len            = 0;
                int indexOfNewLine = p.Text.IndexOf(Environment.NewLine + " -", 1, StringComparison.Ordinal);
                if (indexOfNewLine < 0)
                {
                    indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "- <i> ♪", 1, StringComparison.Ordinal);
                    len            = "- <i> ♪".Length;
                }
                if (indexOfNewLine < 0)
                {
                    indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "-", 1, StringComparison.Ordinal);
                    len            = "-".Length;
                }
                if (indexOfNewLine < 0)
                {
                    indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "<i>-", 1, StringComparison.Ordinal);
                    len            = "<i>-".Length;
                }
                if (indexOfNewLine < 0)
                {
                    indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "<i> -", 1, StringComparison.Ordinal);
                    len            = "<i> -".Length;
                }
                if (indexOfNewLine < 0)
                {
                    indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "♪ -", 1, StringComparison.Ordinal);
                    len            = "♪ -".Length;
                }
                if (indexOfNewLine < 0)
                {
                    indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "♪ <i> -", 1, StringComparison.Ordinal);
                    len            = "♪ <i> -".Length;
                }
                if (indexOfNewLine < 0)
                {
                    indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "♪ <i>-", 1, StringComparison.Ordinal);
                    len            = "♪ <i>-".Length;
                }

                if (indexOfNewLine > 0)
                {
                    string text = p.Text.Substring(indexOfNewLine + len);
                    var    st   = new StripableText(text);

                    if (st.StrippedText.Length > 0 && Helper.IsTurkishLittleI(st.StrippedText[0], encoding, language) && !st.Pre.EndsWith('[') && !st.Pre.Contains("..."))
                    {
                        text   = st.Pre + Helper.GetTurkishUppercaseLetter(st.StrippedText[0], encoding) + st.StrippedText.Substring(1) + st.Post;
                        p.Text = p.Text.Remove(indexOfNewLine + len).Insert(indexOfNewLine + len, text);
                    }
                    else if (st.StrippedText.Length > 0 && st.StrippedText[0] != char.ToUpper(st.StrippedText[0]) && !st.Pre.EndsWith('[') && !st.Pre.Contains("..."))
                    {
                        text   = st.Pre + char.ToUpper(st.StrippedText[0]) + st.StrippedText.Substring(1) + st.Post;
                        p.Text = p.Text.Remove(indexOfNewLine + len).Insert(indexOfNewLine + len, text);
                    }
                }
            }
            return(p.Text);
        }
        public void Fix(Subtitle subtitle, IFixCallbacks callbacks)
        {
            var language = Configuration.Settings.Language.FixCommonErrors;
            string fixAction = language.StartWithUppercaseLetterAfterColon;
            int noOfFixes = 0;
            for (int i = 0; i < subtitle.Paragraphs.Count; i++)
            {
                var p = new Paragraph(subtitle.Paragraphs[i]);
                Paragraph last = subtitle.GetParagraphOrDefault(i - 1);
                string oldText = p.Text;
                int skipCount = 0;

                if (last != null)
                {
                    string lastText = HtmlUtil.RemoveHtmlTags(last.Text);
                    if (lastText.EndsWith(':') || lastText.EndsWith(';'))
                    {
                        var st = new StripableText(p.Text);
                        if (st.StrippedText.Length > 0 && st.StrippedText[0] != char.ToUpper(st.StrippedText[0]))
                            p.Text = st.Pre + char.ToUpper(st.StrippedText[0]) + st.StrippedText.Substring(1) + st.Post;
                    }
                }

                if (oldText.Contains(new[] { ':', ';' }))
                {
                    bool lastWasColon = false;
                    for (int j = 0; j < p.Text.Length; j++)
                    {
                        var s = p.Text[j];
                        if (s == ':' || s == ';')
                        {
                            lastWasColon = true;
                        }
                        else if (lastWasColon)
                        {
                            var startFromJ = p.Text.Substring(j);
                            if (skipCount > 0)
                                skipCount--;
                            else if (startFromJ.StartsWith("<i>", StringComparison.OrdinalIgnoreCase))
                                skipCount = 2;
                            else if (startFromJ.StartsWith("<b>", StringComparison.OrdinalIgnoreCase))
                                skipCount = 2;
                            else if (startFromJ.StartsWith("<u>", StringComparison.OrdinalIgnoreCase))
                                skipCount = 2;
                            else if (startFromJ.StartsWith("<font ", StringComparison.OrdinalIgnoreCase) && p.Text.Substring(j).Contains('>'))
                                skipCount = startFromJ.IndexOf('>') - startFromJ.IndexOf("<font ", StringComparison.OrdinalIgnoreCase);
                            else if (Helper.IsTurkishLittleI(s, callbacks.Encoding, callbacks.Language))
                            {
                                p.Text = p.Text.Remove(j, 1).Insert(j, Helper.GetTurkishUppercaseLetter(s, callbacks.Encoding).ToString(CultureInfo.InvariantCulture));
                                lastWasColon = false;
                            }
                            else if (char.IsLower(s))
                            {
                                // iPhone
                                bool change = true;
                                if (s == 'i' && p.Text.Length > j + 1)
                                {
                                    if (p.Text[j + 1] == char.ToUpper(p.Text[j + 1]))
                                        change = false;
                                }
                                if (change)
                                    p.Text = p.Text.Remove(j, 1).Insert(j, char.ToUpper(s).ToString(CultureInfo.InvariantCulture));
                                lastWasColon = false;
                            }
                            else if (!(" " + Environment.NewLine).Contains(s))
                                lastWasColon = false;
                        }
                    }
                }

                if (oldText != p.Text && callbacks.AllowFix(p, fixAction))
                {
                    noOfFixes++;
                    subtitle.Paragraphs[i].Text = p.Text;
                    callbacks.AddFixToListView(p, fixAction, oldText, p.Text);
                }
            }
            callbacks.UpdateFixStatus(noOfFixes, language.StartWithUppercaseLetterAfterColon, noOfFixes.ToString(CultureInfo.InvariantCulture));
        }
Exemplo n.º 27
0
        public static string FixStartWithUppercaseLetterAfterParagraph(Paragraph p, Paragraph prev, Encoding encoding, string language)
        {
            if (p.Text != null && p.Text.Length > 1)
            {
                string text = p.Text;
                string pre = string.Empty;
                if (text.Length > 4 && text.StartsWith("<i> ", StringComparison.Ordinal))
                {
                    pre = "<i> ";
                    text = text.Substring(4);
                }
                if (text.Length > 3 && text.StartsWith("<i>", StringComparison.Ordinal))
                {
                    pre = "<i>";
                    text = text.Substring(3);
                }
                if (text.Length > 4 && text.StartsWith("<I> ", StringComparison.Ordinal))
                {
                    pre = "<I> ";
                    text = text.Substring(4);
                }
                if (text.Length > 3 && text.StartsWith("<I>", StringComparison.Ordinal))
                {
                    pre = "<I>";
                    text = text.Substring(3);
                }
                if (text.Length > 2 && text.StartsWith('♪'))
                {
                    pre = pre + "♪";
                    text = text.Substring(1);
                }
                if (text.Length > 2 && text.StartsWith(' '))
                {
                    pre = pre + " ";
                    text = text.Substring(1);
                }
                if (text.Length > 2 && text.StartsWith('♫'))
                {
                    pre = pre + "♫";
                    text = text.Substring(1);
                }
                if (text.Length > 2 && text.StartsWith(' '))
                {
                    pre = pre + " ";
                    text = text.Substring(1);
                }

                var firstLetter = text[0];

                string prevText = " .";
                if (prev != null)
                    prevText = HtmlUtil.RemoveHtmlTags(prev.Text);

                bool isPrevEndOfLine = FixCommonErrorsHelper.IsPreviousTextEndOfParagraph(prevText);
                if (prevText == " .")
                    isPrevEndOfLine = true;
                if ((!text.StartsWith("www.", StringComparison.Ordinal) && !text.StartsWith("http:", StringComparison.Ordinal) && !text.StartsWith("https:", StringComparison.Ordinal)) &&
                    (char.IsLower(firstLetter) || IsTurkishLittleI(firstLetter, encoding, language)) &&
                    !char.IsDigit(firstLetter) &&
                    isPrevEndOfLine)
                {
                    bool isMatchInKnowAbbreviations = language == "en" &&
                        (prevText.EndsWith(" o.r.", StringComparison.Ordinal) ||
                         prevText.EndsWith(" a.m.", StringComparison.Ordinal) ||
                         prevText.EndsWith(" p.m.", StringComparison.Ordinal));

                    if (!isMatchInKnowAbbreviations)
                    {
                        if (IsTurkishLittleI(firstLetter, encoding, language))
                            p.Text = pre + GetTurkishUppercaseLetter(firstLetter, encoding) + text.Substring(1);
                        else if (language == "en" && (text.StartsWith("l ", StringComparison.Ordinal) || text.StartsWith("l-I", StringComparison.Ordinal) || text.StartsWith("ls ", StringComparison.Ordinal) || text.StartsWith("lnterested") ||
                                                      text.StartsWith("lsn't ", StringComparison.Ordinal) || text.StartsWith("ldiot", StringComparison.Ordinal) || text.StartsWith("ln", StringComparison.Ordinal) || text.StartsWith("lm", StringComparison.Ordinal) ||
                                                      text.StartsWith("ls", StringComparison.Ordinal) || text.StartsWith("lt", StringComparison.Ordinal) || text.StartsWith("lf ", StringComparison.Ordinal) || text.StartsWith("lc", StringComparison.Ordinal) || text.StartsWith("l'm ", StringComparison.Ordinal)) || text.StartsWith("l am ", StringComparison.Ordinal)) // l > I
                            p.Text = pre + "I" + text.Substring(1);
                        else
                            p.Text = pre + char.ToUpper(firstLetter) + text.Substring(1);
                    }
                }
            }

            if (p.Text != null && p.Text.Contains(Environment.NewLine))
            {
                var arr = p.Text.SplitToLines();
                if (arr.Length == 2 && arr[1].Length > 1)
                {
                    string text = arr[1];
                    string pre = string.Empty;
                    if (text.Length > 4 && text.StartsWith("<i> ", StringComparison.Ordinal))
                    {
                        pre = "<i> ";
                        text = text.Substring(4);
                    }
                    if (text.Length > 3 && text.StartsWith("<i>", StringComparison.Ordinal))
                    {
                        pre = "<i>";
                        text = text.Substring(3);
                    }
                    if (text.Length > 4 && text.StartsWith("<I> ", StringComparison.Ordinal))
                    {
                        pre = "<I> ";
                        text = text.Substring(4);
                    }
                    if (text.Length > 3 && text.StartsWith("<I>", StringComparison.Ordinal))
                    {
                        pre = "<I>";
                        text = text.Substring(3);
                    }
                    if (text.Length > 2 && text.StartsWith('♪'))
                    {
                        pre = pre + "♪";
                        text = text.Substring(1);
                    }
                    if (text.Length > 2 && text.StartsWith(' '))
                    {
                        pre = pre + " ";
                        text = text.Substring(1);
                    }
                    if (text.Length > 2 && text.StartsWith('♫'))
                    {
                        pre = pre + "♫";
                        text = text.Substring(1);
                    }
                    if (text.Length > 2 && text.StartsWith(' '))
                    {
                        pre = pre + " ";
                        text = text.Substring(1);
                    }

                    char firstLetter = text[0];
                    string prevText = HtmlUtil.RemoveHtmlTags(arr[0]);
                    bool isPrevEndOfLine = FixCommonErrorsHelper.IsPreviousTextEndOfParagraph(prevText);
                    if ((!text.StartsWith("www.", StringComparison.Ordinal) && !text.StartsWith("http:", StringComparison.Ordinal) && !text.StartsWith("https:", StringComparison.Ordinal)) &&
                        (char.IsLower(firstLetter) || IsTurkishLittleI(firstLetter, encoding, language)) &&
                        !prevText.EndsWith("...", StringComparison.Ordinal) &&
                        isPrevEndOfLine)
                    {
                        bool isMatchInKnowAbbreviations = language == "en" &&
                            (prevText.EndsWith(" o.r.", StringComparison.Ordinal) ||
                             prevText.EndsWith(" a.m.", StringComparison.Ordinal) ||
                             prevText.EndsWith(" p.m.", StringComparison.Ordinal));

                        if (!isMatchInKnowAbbreviations)
                        {
                            if (IsTurkishLittleI(firstLetter, encoding, language))
                                text = pre + GetTurkishUppercaseLetter(firstLetter, encoding) + text.Substring(1);
                            else if (language == "en" && (text.StartsWith("l ", StringComparison.Ordinal) || text.StartsWith("l-I", StringComparison.Ordinal) || text.StartsWith("ls ") || text.StartsWith("lnterested") ||
                                                     text.StartsWith("lsn't ", StringComparison.Ordinal) || text.StartsWith("ldiot", StringComparison.Ordinal) || text.StartsWith("ln", StringComparison.Ordinal) || text.StartsWith("lm", StringComparison.Ordinal) ||
                                                     text.StartsWith("ls", StringComparison.Ordinal) || text.StartsWith("lt", StringComparison.Ordinal) || text.StartsWith("lf ", StringComparison.Ordinal) || text.StartsWith("lc", StringComparison.Ordinal) || text.StartsWith("l'm ", StringComparison.Ordinal)) || text.StartsWith("l am ", StringComparison.Ordinal)) // l > I
                                text = pre + "I" + text.Substring(1);
                            else
                                text = pre + char.ToUpper(firstLetter) + text.Substring(1);
                            p.Text = arr[0] + Environment.NewLine + text;
                        }
                    }

                    arr = p.Text.SplitToLines();
                    if ((arr[0].StartsWith('-') || arr[0].StartsWith("<i>-", StringComparison.Ordinal)) &&
                        (arr[1].StartsWith('-') || arr[1].StartsWith("<i>-", StringComparison.Ordinal)) &&
                        !arr[0].StartsWith("--", StringComparison.Ordinal) && !arr[0].StartsWith("<i>--", StringComparison.Ordinal) &&
                        !arr[1].StartsWith("--", StringComparison.Ordinal) && !arr[1].StartsWith("<i>--", StringComparison.Ordinal))
                    {
                        if (isPrevEndOfLine && arr[1].StartsWith("<i>- ", StringComparison.Ordinal) && arr[1].Length > 6)
                        {
                            p.Text = arr[0] + Environment.NewLine + "<i>- " + char.ToUpper(arr[1][5]) + arr[1].Remove(0, 6);
                        }
                        else if (isPrevEndOfLine && arr[1].StartsWith("- ", StringComparison.Ordinal) && arr[1].Length > 3)
                        {
                            p.Text = arr[0] + Environment.NewLine + "- " + char.ToUpper(arr[1][2]) + arr[1].Remove(0, 3);
                        }
                        arr = p.Text.SplitToLines();

                        prevText = " .";
                        if (prev != null && p.StartTime.TotalMilliseconds - 10000 < prev.EndTime.TotalMilliseconds)
                            prevText = HtmlUtil.RemoveHtmlTags(prev.Text);
                        bool isPrevLineEndOfLine = FixCommonErrorsHelper.IsPreviousTextEndOfParagraph(prevText);
                        if (isPrevLineEndOfLine && arr[0].StartsWith("<i>- ", StringComparison.Ordinal) && arr[0].Length > 6)
                        {
                            p.Text = "<i>- " + char.ToUpper(arr[0][5]) + arr[0].Remove(0, 6) + Environment.NewLine + arr[1];
                        }
                        else if (isPrevLineEndOfLine && arr[0].StartsWith("- ", StringComparison.Ordinal) && arr[0].Length > 3)
                        {
                            p.Text = "- " + char.ToUpper(arr[0][2]) + arr[0].Remove(0, 3) + Environment.NewLine + arr[1];
                        }
                    }
                }
            }

            if (p.Text.Length > 4)
            {
                int len = 0;
                int indexOfNewLine = p.Text.IndexOf(Environment.NewLine + " -", 1, StringComparison.Ordinal);
                if (indexOfNewLine < 0)
                {
                    indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "- <i> ♪", 1, StringComparison.Ordinal);
                    len = "- <i> ♪".Length;
                }
                if (indexOfNewLine < 0)
                {
                    indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "-", 1, StringComparison.Ordinal);
                    len = "-".Length;
                }
                if (indexOfNewLine < 0)
                {
                    indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "<i>-", 1, StringComparison.Ordinal);
                    len = "<i>-".Length;
                }
                if (indexOfNewLine < 0)
                {
                    indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "<i> -", 1, StringComparison.Ordinal);
                    len = "<i> -".Length;
                }
                if (indexOfNewLine < 0)
                {
                    indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "♪ -", 1, StringComparison.Ordinal);
                    len = "♪ -".Length;
                }
                if (indexOfNewLine < 0)
                {
                    indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "♪ <i> -", 1, StringComparison.Ordinal);
                    len = "♪ <i> -".Length;
                }
                if (indexOfNewLine < 0)
                {
                    indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "♪ <i>-", 1, StringComparison.Ordinal);
                    len = "♪ <i>-".Length;
                }

                if (indexOfNewLine > 0)
                {
                    string text = p.Text.Substring(indexOfNewLine + len);
                    var st = new StripableText(text);

                    if (st.StrippedText.Length > 0 && IsTurkishLittleI(st.StrippedText[0], encoding, language) && !st.Pre.EndsWith('[') && !st.Pre.Contains("..."))
                    {
                        text = st.Pre + GetTurkishUppercaseLetter(st.StrippedText[0], encoding) + st.StrippedText.Substring(1) + st.Post;
                        p.Text = p.Text.Remove(indexOfNewLine + len).Insert(indexOfNewLine + len, text);
                    }
                    else if (st.StrippedText.Length > 0 && st.StrippedText[0] != char.ToUpper(st.StrippedText[0]) && !st.Pre.EndsWith('[') && !st.Pre.Contains("..."))
                    {
                        text = st.Pre + char.ToUpper(st.StrippedText[0]) + st.StrippedText.Substring(1) + st.Post;
                        p.Text = p.Text.Remove(indexOfNewLine + len).Insert(indexOfNewLine + len, text);
                    }
                }
            }
            return p.Text;
        }
        public string RemoveInterjections(string text)
        {
            if (_interjectionList == null)
            {
                var interjectionList = new HashSet<string>();
                foreach (var s in Configuration.Settings.Tools.Interjections.Split(new[] { ';' }, StringSplitOptions.RemoveEmptyEntries))
                {
                    if (s.Length > 0)
                    {
                        interjectionList.Add(s);
                        var upper = s.ToUpper();
                        interjectionList.Add(upper);
                        var lower = s.ToLower();
                        interjectionList.Add(lower);
                        interjectionList.Add(lower.CapitalizeFirstLetter());
                    }
                }
                _interjectionList = new List<string>(interjectionList);
                interjectionList.Clear();
                interjectionList.TrimExcess();
                _interjectionList.Sort(CompareLength);
            }

            string oldText = text;
            bool doRepeat = true;
            while (doRepeat)
            {
                doRepeat = false;
                foreach (string s in _interjectionList)
                {
                    if (text.Contains(s))
                    {
                        var regex = new Regex("\\b" + Regex.Escape(s) + "\\b");
                        var match = regex.Match(text);
                        if (match.Success)
                        {
                            int index = match.Index;
                            string temp = text.Remove(index, s.Length);
                            if (temp.Remove(0, index) == " —" && temp.EndsWith("—  —", StringComparison.Ordinal))
                            {
                                temp = temp.Remove(temp.Length - 3);
                                if (temp.EndsWith(Environment.NewLine + "—", StringComparison.Ordinal))
                                    temp = temp.Remove(temp.Length - 1).TrimEnd();
                            }
                            else if (temp.Remove(0, index) == " —" && temp.EndsWith("-  —", StringComparison.Ordinal))
                            {
                                temp = temp.Remove(temp.Length - 3);
                                if (temp.EndsWith(Environment.NewLine + "-", StringComparison.Ordinal))
                                    temp = temp.Remove(temp.Length - 1).TrimEnd();
                            }
                            else if (index == 2 && temp.StartsWith("-  —", StringComparison.Ordinal))
                            {
                                temp = temp.Remove(2, 2);
                            }
                            else if (index == 2 && temp.StartsWith("- —", StringComparison.Ordinal))
                            {
                                temp = temp.Remove(2, 1);
                            }
                            else if (index == 0 && temp.StartsWith(" —", StringComparison.Ordinal))
                            {
                                temp = temp.Remove(0, 2);
                            }
                            else if (index == 0 && temp.StartsWith('—'))
                            {
                                temp = temp.Remove(0, 1);
                            }
                            string pre = string.Empty;
                            if (index > 0)
                                doRepeat = true;

                            bool removeAfter = true;

                            if (index > s.Length)
                            {
                                if (temp.Length > index - s.Length + 3)
                                {
                                    int subIndex = index - s.Length + 1;
                                    string subTemp = temp.Substring(subIndex, 3);
                                    if (subTemp == ", !" || subTemp == ", ?" || subTemp == ", .")
                                    {
                                        temp = temp.Remove(subIndex, 2);
                                        removeAfter = false;
                                    }
                                }
                                if (removeAfter && temp.Length > index - s.Length + 2)
                                {
                                    int subIndex = index - s.Length;
                                    string subTemp = temp.Substring(subIndex, 3);
                                    if (subTemp == ", !" || subTemp == ", ?" || subTemp == ", .")
                                    {
                                        temp = temp.Remove(subIndex, 2);
                                        removeAfter = false;
                                    }
                                    else
                                    {
                                        subTemp = temp.Substring(subIndex);
                                        if (subTemp.StartsWith(", -—", StringComparison.Ordinal))
                                        {
                                            temp = temp.Remove(subIndex, 3);
                                            removeAfter = false;
                                        }
                                        else if (subTemp.StartsWith(", --", StringComparison.Ordinal))
                                        {
                                            temp = temp.Remove(subIndex, 2);
                                            removeAfter = false;
                                        }
                                    }
                                }
                                if (removeAfter && temp.Length > index - s.Length + 2)
                                {
                                    int subIndex = index - s.Length + 1;
                                    string subTemp = temp.Substring(subIndex, 2);
                                    if (subTemp == "-!" || subTemp == "-?" || subTemp == "-.")
                                    {
                                        temp = temp.Remove(subIndex, 1);
                                        removeAfter = false;
                                    }
                                }
                            }

                            if (index > 3 && index - 2 < temp.Length)
                            {
                                string subTemp = temp.Substring(index - 2);
                                if (subTemp.StartsWith(",  —", StringComparison.Ordinal) || subTemp.StartsWith(", —", StringComparison.Ordinal))
                                {
                                    temp = temp.Remove(index - 2, 1);
                                    index--;
                                }
                            }

                            if (removeAfter)
                            {
                                if (index == 0)
                                {
                                    if (temp.StartsWith('-'))
                                        temp = temp.Remove(0, 1).Trim();
                                }
                                else if (index == 3 && temp.StartsWith("<i>-", StringComparison.Ordinal))
                                {
                                    temp = temp.Remove(3, 1);
                                }
                                else if (index > 0 && temp.Length > index)
                                {
                                    pre = text.Substring(0, index);
                                    temp = temp.Remove(0, index);
                                    if (temp.StartsWith('-') && pre.EndsWith('-'))
                                        temp = temp.Remove(0, 1);
                                    if (temp.StartsWith('-') && pre.EndsWith("- ", StringComparison.Ordinal))
                                        temp = temp.Remove(0, 1);
                                }

                                while (temp.Length > 0 && " ,.?!".Contains(temp[0]))
                                {
                                    temp = temp.Remove(0, 1);
                                    doRepeat = true;
                                }
                                if (temp.Length > 0 && s[0].ToString(CultureInfo.InvariantCulture) != s[0].ToString(CultureInfo.InvariantCulture).ToLower())
                                {
                                    temp = char.ToUpper(temp[0]) + temp.Substring(1);
                                    doRepeat = true;
                                }

                                if (temp.StartsWith('-') && pre.EndsWith(' '))
                                    temp = temp.Remove(0, 1);

                                if (temp.StartsWith('—') && pre.EndsWith(','))
                                    pre = pre.TrimEnd(',') + " ";
                                temp = pre + temp;
                            }

                            if (temp.EndsWith(Environment.NewLine + "- ", StringComparison.Ordinal))
                                temp = temp.Remove(temp.Length - 2).TrimEnd();

                            var st = new StripableText(temp);
                            if (st.StrippedText.Length == 0)
                                return string.Empty;

                            if (temp.StartsWith('-') && !temp.Contains(Environment.NewLine) && text.Contains(Environment.NewLine))
                                temp = temp.Remove(0, 1).Trim();

                            text = temp;
                        }
                    }
                }
            }
            var lines = text.SplitToLines();
            if (lines.Length == 2 && text != oldText)
            {
                if (lines[0] == "-" && lines[1] == "-")
                    return string.Empty;
                if (lines[0].Length > 1 && lines[0][0] == '-' && lines[1].Trim() == "-")
                    return lines[0].Remove(0, 1).Trim();
                if (lines[1].Length > 1 && lines[1][0] == '-' && lines[0].Trim() == "-")
                    return lines[1].Remove(0, 1).Trim();
                if (lines[1].Length > 4 && lines[1].StartsWith("<i>-", StringComparison.Ordinal) && lines[0].Trim() == "-")
                    return "<i>" + lines[1].Remove(0, 4).Trim();
                if (lines[0].Length > 1 && lines[1] == "-" || lines[1] == "." || lines[1] == "!" || lines[1] == "?")
                {
                    if (lines[0].StartsWith('-') && oldText.Contains(Environment.NewLine + "-"))
                        lines[0] = lines[0].Remove(0, 1);
                    return lines[0].Trim();
                }
                var noTags0 = HtmlUtil.RemoveHtmlTags(lines[0], false).Trim();
                var noTags1 = HtmlUtil.RemoveHtmlTags(lines[1], false).Trim();
                if (noTags0 == "-")
                {
                    if (noTags1 == noTags0)
                        return string.Empty;
                    if (lines[1].Length > 1 && lines[1][0] == '-')
                        return lines[1].Remove(0, 1).Trim();
                    if (lines[1].Length > 4 && lines[1].StartsWith("<i>-", StringComparison.Ordinal))
                        return "<i>" + lines[1].Remove(0, 4).Trim();
                    return lines[1];
                }
                if (noTags1 == "-")
                {
                    if (lines[0].Length > 1 && lines[0][0] == '-')
                        return lines[0].Remove(0, 1).Trim();
                    if (lines[0].Length > 4 && lines[0].StartsWith("<i>-", StringComparison.Ordinal))
                        return "<i>" + lines[0].Remove(0, 4).Trim();
                    return lines[0];
                }
            }
            if (lines.Length == 2)
            {
                if (string.IsNullOrWhiteSpace(lines[1].Replace(".", string.Empty).Replace("?", string.Empty).Replace("!", string.Empty).Replace("-", string.Empty).Replace("—", string.Empty)))
                {
                    text = lines[0];
                    lines = text.SplitToLines();
                }
                else if (string.IsNullOrWhiteSpace(lines[0].Replace(".", string.Empty).Replace("?", string.Empty).Replace("!", string.Empty).Replace("-", string.Empty).Replace("—", string.Empty)))
                {
                    text = lines[1];
                    lines = text.SplitToLines();
                }
            }
            if (lines.Length == 1 && text != oldText && Utilities.GetNumberOfLines(oldText) == 2)
            {
                if ((oldText.StartsWith('-') || oldText.StartsWith("<i>-", StringComparison.Ordinal)) &&
                    (oldText.Contains("." + Environment.NewLine) || oldText.Contains(".</i>" + Environment.NewLine) ||
                     oldText.Contains("!" + Environment.NewLine) || oldText.Contains("!</i>" + Environment.NewLine) ||
                     oldText.Contains("?" + Environment.NewLine) || oldText.Contains("?</i>" + Environment.NewLine)))
                {
                    if (text.StartsWith("<i>-", StringComparison.Ordinal))
                        text = "<i>" + text.Remove(0, 4).TrimStart();
                    else
                        text = text.TrimStart('-').TrimStart();
                }
                else if ((oldText.Contains(Environment.NewLine + "-") || oldText.Contains(Environment.NewLine + "<i>-")) &&
                    (oldText.Contains("." + Environment.NewLine) || oldText.Contains(".</i>" + Environment.NewLine) ||
                     oldText.Contains("!" + Environment.NewLine) || oldText.Contains("!</i>" + Environment.NewLine) ||
                     oldText.Contains("?" + Environment.NewLine) || oldText.Contains("?</i>" + Environment.NewLine)))
                {
                    if (text.StartsWith("<i>-", StringComparison.Ordinal))
                        text = "<i>" + text.Remove(0, 4).TrimStart();
                    else
                        text = text.TrimStart('-').TrimStart();
                }
            }
            return text;
        }
Exemplo n.º 29
0
        private void GeneratePreview()
        {
            Cursor = Cursors.WaitCursor;
            listViewFixes.BeginUpdate();
            listViewFixes.Items.Clear();
            foreach (Paragraph p in _subtitle.Paragraphs)
            {
                string text = p.Text;
                foreach (ListViewItem item in listViewNames.Items)
                {
                    string name = item.SubItems[1].Text;

                    string textNoTags = HtmlUtil.RemoveHtmlTags(text);
                    if (textNoTags != textNoTags.ToUpper())
                    {
                        if (item.Checked && text != null && text.Contains(name, StringComparison.OrdinalIgnoreCase) && name.Length > 1 && name != name.ToLower())
                        {
                            var st = new StripableText(text);
                            st.FixCasing(new List<string> { name }, true, false, false, string.Empty);
                            text = st.MergedString;
                        }
                    }
                }
                if (text != p.Text)
                    AddToPreviewListView(p, text);
            }
            listViewFixes.EndUpdate();
            groupBoxLinesFound.Text = string.Format(Configuration.Settings.Language.ChangeCasingNames.LinesFoundX, listViewFixes.Items.Count);
            Cursor = Cursors.Default;
        }
        private string FixCasing(string text, string lastLine, List<string> namesEtc)
        {
            string original = text;
            if (radioButtonNormal.Checked)
            {
                if (checkBoxOnlyAllUpper.Checked && text != text.ToUpper())
                    return text;

                if (text.Length > 1)
                {
                    // first all to lower
                    text = text.ToLower().Trim();
                    text = text.FixExtraSpaces();
                    var st = new StripableText(text);
                    st.FixCasing(namesEtc, false, true, true, lastLine); // fix all casing but names (that's a seperate option)
                    text = st.MergedString;
                }
            }
            else if (radioButtonUppercase.Checked)
            {
                var st = new StripableText(text);
                text = st.Pre + st.StrippedText.ToUpper() + st.Post;
                text = HtmlUtil.FixUpperTags(text); // tags inside text
            }
            else if (radioButtonLowercase.Checked)
            {
                text = text.ToLower();
            }
            if (original != text)
                _noOfLinesChanged++;
            return text;
        }
Exemplo n.º 31
0
        private string FixLowercaseIToUppercaseI(string input, string lastLine)
        {
            var sb = new StringBuilder();
            string[] lines = input.Split(Environment.NewLine.ToCharArray(), StringSplitOptions.RemoveEmptyEntries);
            for (int i = 0; i < lines.Length; i++)
            {
                string l = lines[i];

                if (i > 0)
                    lastLine = lines[i - 1];
                lastLine = Utilities.RemoveHtmlTags(lastLine);

                if (string.IsNullOrEmpty(lastLine) ||
                    lastLine.EndsWith(".", StringComparison.Ordinal) ||
                    lastLine.EndsWith("!", StringComparison.Ordinal) ||
                    lastLine.EndsWith("?", StringComparison.Ordinal))
                {
                    var st = new StripableText(l);
                    if (st.StrippedText.StartsWith("i", StringComparison.Ordinal) && !st.Pre.EndsWith("[", StringComparison.Ordinal) && !st.Pre.EndsWith("(", StringComparison.Ordinal) && !st.Pre.EndsWith("...", StringComparison.Ordinal))
                    {
                        if (string.IsNullOrEmpty(lastLine) || (!lastLine.EndsWith("...", StringComparison.Ordinal) && !EndsWithAbbreviation(lastLine, _abbreviationList)))
                            l = st.Pre + "I" + st.StrippedText.Remove(0, 1) + st.Post;
                    }
                }
                sb.AppendLine(l);
            }
            return sb.ToString().TrimEnd('\r').TrimEnd('\n').TrimEnd('\r').TrimEnd('\n');
        }
Exemplo n.º 32
0
        public string FixOcrErrorsViaHardcodedRules(string input, string lastLine, HashSet<string> abbreviationList)
        {
            if (!Configuration.Settings.Tools.OcrFixUseHardcodedRules)
                return input;

            input = input.Replace(",...", "...");

            if (input.StartsWith("..") && !input.StartsWith("..."))
                input = "." + input;

            string pre = string.Empty;
            if (input.StartsWith("- ", StringComparison.Ordinal))
            {
                pre = "- ";
                input = input.Remove(0, 2);
            }
            else if (input.StartsWith("-", StringComparison.Ordinal))
            {
                pre = "-";
                input = input.Remove(0, 1);
            }

            bool hasDotDot = input.Contains("..") || input.Contains(". .");
            if (hasDotDot)
            {
                if (input.Length > 5 && input.StartsWith("..") && Utilities.AllLettersAndNumbers.Contains(input.Substring(2, 1)))
                    input = "..." + input.Remove(0, 2);
                if (input.Length > 7 && input.StartsWith("<i>..") && Utilities.AllLettersAndNumbers.Contains(input.Substring(5, 1)))
                    input = "<i>..." + input.Remove(0, 5);

                if (input.Length > 5 && input.StartsWith(".. ") && Utilities.AllLettersAndNumbers.Contains(input.Substring(3, 1)))
                    input = "..." + input.Remove(0, 3);
                if (input.Length > 7 && input.StartsWith("<i>.. ") && Utilities.AllLettersAndNumbers.Contains(input.Substring(6, 1)))
                    input = "<i>..." + input.Remove(0, 6);
                if (input.Contains(Environment.NewLine + ".. "))
                    input = input.Replace(Environment.NewLine + ".. ", Environment.NewLine + "...");
                if (input.Contains(Environment.NewLine + "<i>.. "))
                    input = input.Replace(Environment.NewLine + "<i>.. ", Environment.NewLine + "<i>...");

                if (input.StartsWith(". .."))
                    input = "..." + input.Remove(0, 4);
                if (input.StartsWith(".. ."))
                    input = "..." + input.Remove(0, 4);
                if (input.StartsWith(". . ."))
                    input = "..." + input.Remove(0, 5);
                if (input.StartsWith("... "))
                    input = input.Remove(3, 1);
            }

            input = pre + input;

            if (hasDotDot)
            {
                if (input.StartsWith("<i>. .."))
                    input = "<i>..." + input.Remove(0, 7);
                if (input.StartsWith("<i>.. ."))
                    input = "<i>..." + input.Remove(0, 7);

                if (input.StartsWith("<i>. . ."))
                    input = "<i>..." + input.Remove(0, 8);
                if (input.StartsWith("<i>... "))
                    input = input.Remove(6, 1);
                if (input.StartsWith(". . <i>."))
                    input = "<i>..." + input.Remove(0, 8);

                if (input.StartsWith("...<i>") && (input.IndexOf("</i>", StringComparison.Ordinal) > input.IndexOf(" ", StringComparison.Ordinal)))
                    input = "<i>..." + input.Remove(0, 6);

                if (input.EndsWith(". .."))
                    input = input.Remove(input.Length - 4, 4) + "...";
                if (input.EndsWith(".. ."))
                    input = input.Remove(input.Length - 4, 4) + "...";
                if (input.EndsWith(". . ."))
                    input = input.Remove(input.Length - 5, 5) + "...";
                if (input.EndsWith(". ..."))
                    input = input.Remove(input.Length - 5, 5) + "...";

                if (input.EndsWith(". ..</i>"))
                    input = input.Remove(input.Length - 8, 8) + "...</i>";
                if (input.EndsWith(".. .</i>"))
                    input = input.Remove(input.Length - 8, 8) + "...</i>";
                if (input.EndsWith(". . .</i>"))
                    input = input.Remove(input.Length - 9, 9) + "...</i>";
                if (input.EndsWith(". ...</i>"))
                    input = input.Remove(input.Length - 9, 9) + "...</i>";

                if (input.EndsWith(".</i> . ."))
                    input = input.Remove(input.Length - 9, 9) + "...</i>";
                if (input.EndsWith(".</i>.."))
                    input = input.Remove(input.Length - 7, 7) + "...</i>";
                input = input.Replace(".</i> . ." + Environment.NewLine, "...</i>" + Environment.NewLine);

                input = input.Replace(".. ?", "..?");
                input = input.Replace("..?", "...?");
                input = input.Replace("....?", "...?");

                input = input.Replace(".. !", "..!");
                input = input.Replace("..!", "...!");
                input = input.Replace("....!", "...!");

                input = input.Replace("... ?", "...?");
                input = input.Replace("... !", "...!");

                input = input.Replace("....", "...");
                input = input.Replace("....", "...");

                if (input.StartsWith("- ...") && lastLine != null && lastLine.EndsWith("...") && !(input.Contains(Environment.NewLine + "-")))
                    input = input.Remove(0, 2);
                if (input.StartsWith("-...") && lastLine != null && lastLine.EndsWith("...") && !(input.Contains(Environment.NewLine + "-")))
                    input = input.Remove(0, 1);
            }

            if (input.Length > 2 && input[0] == '-' && Utilities.UppercaseLetters.Contains(input[1].ToString()))
            {
                input = input.Insert(1, " ");
            }

            if (input.Length > 5 && input.StartsWith("<i>-") && Utilities.UppercaseLetters.Contains(input[4].ToString()))
            {
                input = input.Insert(4, " ");
            }

            int idx = input.IndexOf(Environment.NewLine + "-", StringComparison.Ordinal);
            if (idx > 0 && idx + Environment.NewLine.Length + 1 < input.Length && Utilities.UppercaseLetters.Contains(input[idx + Environment.NewLine.Length + 1].ToString()))
            {
                input = input.Insert(idx + Environment.NewLine.Length + 1, " ");
            }

            idx = input.IndexOf(Environment.NewLine + "<i>-", StringComparison.Ordinal);
            if (idx > 0 && Utilities.UppercaseLetters.Contains(input[idx + Environment.NewLine.Length + 4].ToString()))
            {
                input = input.Insert(idx + Environment.NewLine.Length + 4, " ");
            }

            if (string.IsNullOrEmpty(lastLine) ||
                lastLine.EndsWith(".") ||
                lastLine.EndsWith("!") ||
                lastLine.EndsWith("?") ||
                lastLine.EndsWith("]") ||
                lastLine.EndsWith("♪"))
            {
                lastLine = Utilities.RemoveHtmlTags(lastLine);
                var st = new StripableText(input);
                if (lastLine == null || (!lastLine.EndsWith("...") && !EndsWithAbbreviation(lastLine, abbreviationList)))
                {
                    if (st.StrippedText.Length > 0 && st.StrippedText[0].ToString() != st.StrippedText[0].ToString().ToUpper() && !st.Pre.EndsWith("[") && !st.Pre.EndsWith("(") && !st.Pre.EndsWith("..."))
                    {
                        string uppercaseLetter = st.StrippedText[0].ToString().ToUpper();
                        if (st.StrippedText.Length > 1 && uppercaseLetter == "L" && "abcdfghjklmnpqrstvwxz".Contains(st.StrippedText[1].ToString()))
                            uppercaseLetter = "I";
                        if ((st.StrippedText.StartsWith("lo ") || st.StrippedText == "lo.") && _threeLetterIsoLanguageName == "ita")
                            uppercaseLetter = "I";
                        if ((st.StrippedText.StartsWith("k ") || st.StrippedText.StartsWith("m ") || st.StrippedText.StartsWith("n ") || st.StrippedText.StartsWith("r ") || st.StrippedText.StartsWith("s ") || st.StrippedText.StartsWith("t ")) &&
                            st.Pre.EndsWith("'") && _threeLetterIsoLanguageName == "nld")
                            uppercaseLetter = st.StrippedText.Substring(0, 1);
                        if ((st.StrippedText.StartsWith("l-I'll ") || st.StrippedText.StartsWith("l-l'll ")) && _threeLetterIsoLanguageName == "eng")
                        {
                            uppercaseLetter = "I";
                            st.StrippedText = "I-I" + st.StrippedText.Remove(0, 3);
                        }
                        st.StrippedText = st.StrippedText.Remove(0, 1).Insert(0, uppercaseLetter);
                        input = st.Pre + st.StrippedText + st.Post;
                    }
                }
            }

            // lines ending with ". should often end at ... (of no other quotes exists near by)
            if ((lastLine == null || !lastLine.Contains("\"")) && input != null &&
                input.EndsWith("\".") && input.IndexOf('"') == input.LastIndexOf('"') && input.Length > 3)
            {
                string lastChar = input.Substring(input.Length - 3, 1);
                if (!"0123456789".Contains(lastChar))
                {
                    int position = input.Length - 2;
                    input = input.Remove(position).Insert(position, "...");
                }
            }

            // change '<number><space>1' to '<number>1'
            if (input.Contains("1"))
            {
                Match match = RegExNumber1.Match(input);
                while (match.Success)
                {
                    bool doFix = true;

                    if (match.Index + 4 < input.Length && input[match.Index + 3] == '/' && "0123456789".Contains(input[match.Index + 4].ToString()))
                        doFix = false;

                    if (doFix)
                    {
                        input = input.Substring(0, match.Index + 1) + input.Substring(match.Index + 2);
                        match = RegExNumber1.Match(input);
                    }
                    else
                    {
                        match = RegExNumber1.Match(input, match.Index + 1);
                    }
                }
            }

            // change '' to "
            input = input.Replace("''", "\"");

            // change 'sequeI of' to 'sequel of'
            if (input.Contains("I"))
            {
                var match = RegExUppercaseI.Match(input);
                while (match.Success)
                {
                    bool doFix = true;
                    if (match.Index >= 1 && input.Substring(match.Index - 1).StartsWith("Mc"))
                        doFix = false;
                    if (match.Index >= 2 && input.Substring(match.Index - 2).StartsWith("Mac"))
                        doFix = false;

                    if (doFix)
                        input = input.Substring(0, match.Index + 1) + "l" + input.Substring(match.Index + 2);

                    if (match.Index + 1 < input.Length)
                        match = RegExUppercaseI.Match(input, match.Index + 1);
                    else
                        break; // end while
                }
            }

            // change 'NlCE' to 'NICE'
            if (input.Contains("l"))
            {
                var match = RegExLowercaseL.Match(input);
                while (match.Success)
                {
                    input = input.Substring(0, match.Index + 1) + "I" + input.Substring(match.Index + 2);
                    match = RegExLowercaseL.Match(input);
                }
            }

            return input;
        }
Exemplo n.º 33
0
        public void FixMissingPeriodsAtEndOfLine()
        {
            string fixAction = _language.FixMissingPeriodAtEndOfLine;
            int missigPeriodsAtEndOfLine = 0;
            for (int i = 0; i < Subtitle.Paragraphs.Count; i++)
            {
                Paragraph p = Subtitle.Paragraphs[i];
                Paragraph next = Subtitle.GetParagraphOrDefault(i + 1);
                string nextText = string.Empty;
                if (next != null)
                    nextText = HtmlUtil.RemoveHtmlTags(next.Text).TrimStart('-', '"', '„').TrimStart();
                string tempNoHtml = HtmlUtil.RemoveHtmlTags(p.Text).TrimEnd();

                if (IsOneLineUrl(p.Text) || p.Text.Contains(new[] { '♪', '♫' }) || p.Text.EndsWith('\''))
                {
                    // ignore urls
                }
                else if (!string.IsNullOrEmpty(nextText) && next != null &&
                    next.Text.Length > 0 &&
                    Utilities.UppercaseLetters.Contains(nextText[0]) &&
                    tempNoHtml.Length > 0 &&
                    !@",.!?:;>-])♪♫…".Contains(tempNoHtml[tempNoHtml.Length - 1]))
                {
                    string tempTrimmed = tempNoHtml.TrimEnd().TrimEnd('\'', '"', '“', '”').TrimEnd();
                    if (tempTrimmed.Length > 0 && !@")]*#¶.!?".Contains(tempTrimmed[tempTrimmed.Length - 1]) && p.Text != p.Text.ToUpper())
                    {
                        //don't end the sentence if the next word is an I word as they're always capped.
                        if (!next.Text.StartsWith("I ", StringComparison.Ordinal) && !next.Text.StartsWith("I'", StringComparison.Ordinal))
                        {
                            //test to see if the first word of the next line is a name
                            if (!IsName(next.Text.Split(new[] { ' ', '.', ',', '-', '?', '!', ':', ';', '"', '(', ')', '[', ']', '{', '}', '|', '<', '>', '/', '+', '\r', '\n' })[0]) && AllowFix(p, fixAction))
                            {
                                string oldText = p.Text;
                                if (p.Text.EndsWith('>'))
                                {
                                    int lastLessThan = p.Text.LastIndexOf('<');
                                    if (lastLessThan > 0)
                                        p.Text = p.Text.Insert(lastLessThan, ".");
                                }
                                else
                                {
                                    if (p.Text.EndsWith('“') && tempNoHtml.StartsWith('„'))
                                        p.Text = p.Text.TrimEnd('“') + ".“";
                                    else if (p.Text.EndsWith('"') && tempNoHtml.StartsWith('"'))
                                        p.Text = p.Text.TrimEnd('"') + ".\"";
                                    else
                                        p.Text += ".";
                                }
                                if (p.Text != oldText)
                                {
                                    missigPeriodsAtEndOfLine++;
                                    AddFixToListView(p, fixAction, oldText, p.Text);
                                }
                            }
                        }
                    }
                }
                else if (next != null && !string.IsNullOrEmpty(p.Text) && Utilities.AllLettersAndNumbers.Contains(p.Text[p.Text.Length - 1]))
                {
                    if (p.Text != p.Text.ToUpper())
                    {
                        var st = new StripableText(next.Text);
                        if (st.StrippedText.Length > 0 && st.StrippedText != st.StrippedText.ToUpper() &&
                            Utilities.UppercaseLetters.Contains(st.StrippedText[0]))
                        {
                            if (AllowFix(p, fixAction))
                            {
                                int j = p.Text.Length - 1;
                                while (j >= 0 && !@".!?¿¡".Contains(p.Text[j]))
                                    j--;
                                string endSign = ".";
                                if (j >= 0 && p.Text[j] == '¿')
                                    endSign = "?";
                                if (j >= 0 && p.Text[j] == '¡')
                                    endSign = "!";

                                string oldText = p.Text;
                                missigPeriodsAtEndOfLine++;
                                p.Text += endSign;
                                AddFixToListView(p, fixAction, oldText, p.Text);
                            }
                        }
                    }
                }

                if (p.Text.Length > 4)
                {
                    int indexOfNewLine = p.Text.IndexOf(Environment.NewLine + " -", 3, StringComparison.Ordinal);
                    if (indexOfNewLine < 0)
                        indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "-", 3, StringComparison.Ordinal);
                    if (indexOfNewLine < 0)
                        indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "<i>-", 3, StringComparison.Ordinal);
                    if (indexOfNewLine < 0)
                        indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "<i> -", 3, StringComparison.Ordinal);
                    if (indexOfNewLine > 0 && Configuration.Settings.General.UppercaseLetters.Contains(char.ToUpper(p.Text[indexOfNewLine - 1])) && AllowFix(p, fixAction))
                    {
                        string oldText = p.Text;

                        string text = p.Text.Substring(0, indexOfNewLine);
                        var st = new StripableText(text);
                        if (st.Pre.TrimEnd().EndsWith('¿')) // Spanish ¿
                            p.Text = p.Text.Insert(indexOfNewLine, "?");
                        else if (st.Pre.TrimEnd().EndsWith('¡')) // Spanish ¡
                            p.Text = p.Text.Insert(indexOfNewLine, "!");
                        else
                            p.Text = p.Text.Insert(indexOfNewLine, ".");

                        missigPeriodsAtEndOfLine++;
                        AddFixToListView(p, fixAction, oldText, p.Text);
                    }
                }
            }
            UpdateFixStatus(missigPeriodsAtEndOfLine, _language.AddPeriods, _language.XPeriodsAdded);
        }
Exemplo n.º 34
0
        private void FixStartWithUppercaseLetterAfterColon()
        {
            string fixAction = _language.StartWithUppercaseLetterAfterColon;
            int noOfFixes = 0;
            listViewFixes.BeginUpdate();
            for (int i = 0; i < Subtitle.Paragraphs.Count; i++)
            {
                Paragraph p = Subtitle.Paragraphs[i];
                Paragraph last = Subtitle.GetParagraphOrDefault(i - 1);
                string oldText = p.Text;
                int skipCount = 0;

                if (last != null)
                {
                    string lastText = HtmlUtil.RemoveHtmlTags(last.Text);
                    if (lastText.EndsWith(':') || lastText.EndsWith(';'))
                    {
                        var st = new StripableText(p.Text);
                        if (st.StrippedText.Length > 0 && st.StrippedText[0] != char.ToUpper(st.StrippedText[0]))
                            p.Text = st.Pre + char.ToUpper(st.StrippedText[0]) + st.StrippedText.Substring(1) + st.Post;
                    }
                }

                if (oldText.Contains(new[] { ':', ';' }))
                {
                    bool lastWasColon = false;
                    for (int j = 0; j < p.Text.Length; j++)
                    {
                        var s = p.Text[j];
                        if (s == ':' || s == ';')
                        {
                            lastWasColon = true;
                        }
                        else if (lastWasColon)
                        {
                            var startFromJ = p.Text.Substring(j);
                            if (skipCount > 0)
                                skipCount--;
                            else if (startFromJ.StartsWith("<i>", StringComparison.OrdinalIgnoreCase))
                                skipCount = 2;
                            else if (startFromJ.StartsWith("<b>", StringComparison.OrdinalIgnoreCase))
                                skipCount = 2;
                            else if (startFromJ.StartsWith("<u>", StringComparison.OrdinalIgnoreCase))
                                skipCount = 2;
                            else if (startFromJ.StartsWith("<font ", StringComparison.OrdinalIgnoreCase) && p.Text.Substring(j).Contains('>'))
                                skipCount = startFromJ.IndexOf('>') - startFromJ.IndexOf("<font ", StringComparison.OrdinalIgnoreCase);
                            else if (IsTurkishLittleI(s, _encoding, Language))
                            {
                                p.Text = p.Text.Remove(j, 1).Insert(j, GetTurkishUppercaseLetter(s, _encoding).ToString(CultureInfo.InvariantCulture));
                                lastWasColon = false;
                            }
                            else if (char.IsLower(s))
                            {
                                // iPhone
                                bool change = true;
                                if (s == 'i' && p.Text.Length > j + 1)
                                {
                                    if (p.Text[j + 1] == char.ToUpper(p.Text[j + 1]))
                                        change = false;
                                }
                                if (change)
                                    p.Text = p.Text.Remove(j, 1).Insert(j, char.ToUpper(s).ToString(CultureInfo.InvariantCulture));
                                lastWasColon = false;
                            }
                            else if (!(" " + Environment.NewLine).Contains(s))
                                lastWasColon = false;
                        }
                    }
                }

                if (oldText != p.Text)
                {
                    noOfFixes++;
                    AddFixToListView(p, fixAction, oldText, p.Text);
                }
            }
            listViewFixes.EndUpdate();
            if (noOfFixes > 0)
            {
                _totalFixes += noOfFixes;
                LogStatus(_language.StartWithUppercaseLetterAfterColon, noOfFixes.ToString(CultureInfo.InvariantCulture));
            }
        }
Exemplo n.º 35
0
        public void FixUppercaseIInsideWords()
        {
            string fixAction = _language.FixUppercaseIInsideLowercaseWord;
            int uppercaseIsInsideLowercaseWords = 0;
            // bool isLineContinuation = false;
            for (int i = 0; i < Subtitle.Paragraphs.Count; i++)
            {
                Paragraph p = Subtitle.Paragraphs[i];
                string oldText = p.Text;

                Match match = ReAfterLowercaseLetter.Match(p.Text);
                while (match.Success)
                {
                    if (!(match.Index > 1 && p.Text.Substring(match.Index - 1, 2) == "Mc") // irish names, McDonalds etc.
                        && p.Text[match.Index + 1] == 'I'
                        && AllowFix(p, fixAction))
                    {
                        p.Text = p.Text.Substring(0, match.Index + 1) + "l";
                        if (match.Index + 2 < oldText.Length)
                            p.Text += oldText.Substring(match.Index + 2);

                        uppercaseIsInsideLowercaseWords++;
                        AddFixToListView(p, fixAction, oldText, p.Text);
                    }
                    match = match.NextMatch();
                }

                var st = new StripableText(p.Text);
                match = ReBeforeLowercaseLetter.Match(st.StrippedText);
                while (match.Success)
                {
                    string word = GetWholeWord(st.StrippedText, match.Index);
                    if (!IsName(word))
                    {
                        if (AllowFix(p, fixAction))
                        {
                            if (word.Equals("internal", StringComparison.OrdinalIgnoreCase) ||
                                word.Equals("island", StringComparison.OrdinalIgnoreCase) ||
                                word.Equals("islands", StringComparison.OrdinalIgnoreCase))
                            {
                            }
                            else if (match.Index == 0)
                            {  // first letter in paragraph

                                //too risky! - perhaps if periods is fixed at the same time... or too complicated!?
                                //if (isLineContinuation)
                                //{
                                //    st.StrippedText = st.StrippedText.Remove(match.Index, 1).Insert(match.Index, "l");
                                //    p.Text = st.MergedString;
                                //    uppercaseIsInsideLowercaseWords++;
                                //    AddFixToListView(p, fixAction, oldText, p.Text);
                                //}
                            }
                            else
                            {
                                if (match.Index > 2 && st.StrippedText[match.Index - 1] == ' ')
                                {
                                    if ((Utilities.AllLettersAndNumbers + @",").Contains(st.StrippedText[match.Index - 2])
                                        && match.Length >= 2 && Utilities.LowercaseVowels.Contains(char.ToLower(match.Value[1])))
                                    {
                                        st.StrippedText = st.StrippedText.Remove(match.Index, 1).Insert(match.Index, "l");
                                        p.Text = st.MergedString;
                                        uppercaseIsInsideLowercaseWords++;
                                        AddFixToListView(p, fixAction, oldText, p.Text);
                                    }
                                }
                                else if (match.Index > Environment.NewLine.Length + 1 && Environment.NewLine.Contains(st.StrippedText[match.Index - 1]))
                                {
                                    if ((Utilities.AllLettersAndNumbers + @",").Contains(st.StrippedText[match.Index - Environment.NewLine.Length + 1])
                                        && match.Length >= 2 && Utilities.LowercaseVowels.Contains(match.Value[1]))
                                    {
                                        st.StrippedText = st.StrippedText.Remove(match.Index, 1).Insert(match.Index, "l");
                                        p.Text = st.MergedString;
                                        uppercaseIsInsideLowercaseWords++;
                                        AddFixToListView(p, fixAction, oldText, p.Text);
                                    }
                                }
                                else if (match.Index > 1 && ((st.StrippedText[match.Index - 1] == '\"') || (st.StrippedText[match.Index - 1] == '\'') ||
                                                             (st.StrippedText[match.Index - 1] == '>') || (st.StrippedText[match.Index - 1] == '-')))
                                {
                                }
                                else
                                {
                                    var before = '\0';
                                    var after = '\0';
                                    if (match.Index > 0)
                                        before = st.StrippedText[match.Index - 1];
                                    if (match.Index < st.StrippedText.Length - 2)
                                        after = st.StrippedText[match.Index + 1];
                                    if (before != '\0' && char.IsUpper(before) && after != '\0' && char.IsLower(after) &&
                                        !Utilities.LowercaseVowels.Contains(char.ToLower(before)) && !Utilities.LowercaseVowels.Contains(after))
                                    {
                                        st.StrippedText = st.StrippedText.Remove(match.Index, 1).Insert(match.Index, "i");
                                        p.Text = st.MergedString;
                                        uppercaseIsInsideLowercaseWords++;
                                        AddFixToListView(p, fixAction, oldText, p.Text);
                                    }
                                    else if (@"‘’¡¿„“()[]♪'. @".Contains(before) && !Utilities.LowercaseVowels.Contains(char.ToLower(after)))
                                    {
                                    }
                                    else
                                    {
                                        st.StrippedText = st.StrippedText.Remove(match.Index, 1).Insert(match.Index, "l");
                                        p.Text = st.MergedString;
                                        uppercaseIsInsideLowercaseWords++;
                                        AddFixToListView(p, fixAction, oldText, p.Text);
                                    }
                                }
                            }
                        }
                    }
                    match = match.NextMatch();
                }

                //isLineContinuation = p.Text.Length > 0 && Utilities.GetLetters(true, true, false).Contains(p.Text[p.Text.Length - 1].ToString());
            }
            UpdateFixStatus(uppercaseIsInsideLowercaseWords, _language.FixUppercaseIInsindeLowercaseWords, _language.XUppercaseIsFoundInsideLowercaseWords);
        }
        public void Fix(Subtitle subtitle, IFixCallbacks callbacks)
        {
            var language = Configuration.Settings.Language.FixCommonErrors;
            string fixAction = language.StartWithUppercaseLetterAfterPeriodInsideParagraph;
            int noOfFixes = 0;
            for (int i = 0; i < subtitle.Paragraphs.Count; i++)
            {
                Paragraph p = subtitle.Paragraphs[i];
                string oldText = p.Text;
                var st = new StripableText(p.Text);
                if (p.Text.Length > 3)
                {
                    string text = st.StrippedText.Replace("  ", " ");
                    int start = text.IndexOfAny(ExpectedChars);
                    while (start >= 0 && start < text.Length)
                    {
                        if (start > 0 && char.IsDigit(text[start - 1]))
                        {
                            // ignore periods after a number
                        }
                        else if (start + 4 < text.Length && text[start + 1] == ' ')
                        {
                            if (!IsAbbreviation(text, start, callbacks))
                            {
                                var subText = new StripableText(text.Substring(start + 2));
                                if (subText.StrippedText.Length > 0 && Helper.IsTurkishLittleI(subText.StrippedText[0], callbacks.Encoding, callbacks.Language))
                                {
                                    if (subText.StrippedText.Length > 1 && !(subText.Pre.Contains('\'') && subText.StrippedText.StartsWith('s')))
                                    {
                                        text = text.Substring(0, start + 2) + subText.Pre + Helper.GetTurkishUppercaseLetter(subText.StrippedText[0], callbacks.Encoding) + subText.StrippedText.Substring(1) + subText.Post;
                                        if (callbacks.AllowFix(p, fixAction))
                                        {
                                            p.Text = st.Pre + text + st.Post;
                                        }
                                    }
                                }
                                else if (subText.StrippedText.Length > 0 && Configuration.Settings.General.UppercaseLetters.Contains(char.ToUpper(subText.StrippedText[0])))
                                {
                                    if (subText.StrippedText.Length > 1 && !(subText.Pre.Contains('\'') && subText.StrippedText.StartsWith('s')))
                                    {
                                        text = text.Substring(0, start + 2) + subText.Pre + char.ToUpper(subText.StrippedText[0]) + subText.StrippedText.Substring(1) + subText.Post;
                                        if (callbacks.AllowFix(p, fixAction))
                                        {
                                            p.Text = st.Pre + text + st.Post;
                                        }
                                    }
                                }
                            }
                        }
                        start += 4;
                        if (start < text.Length)
                            start = text.IndexOfAny(ExpectedChars, start);
                    }
                }

                if (oldText != p.Text)
                {
                    noOfFixes++;
                    callbacks.AddFixToListView(p, fixAction, oldText, p.Text);
                }
            }
            callbacks.UpdateFixStatus(noOfFixes, language.StartWithUppercaseLetterAfterPeriodInsideParagraph, noOfFixes.ToString(CultureInfo.InvariantCulture));
        }
Exemplo n.º 37
0
        private void FixSpanishInvertedLetter(char mark, string inverseMark, Paragraph p, Paragraph last, ref bool wasLastLineClosed, string fixAction, ref int fixCount)
        {
            if (p.Text.Contains(mark))
            {
                bool skip = false;
                if (last != null && p.Text.Contains(mark) && !p.Text.Contains(inverseMark) && last.Text.Contains(inverseMark) && !last.Text.Contains(mark))
                    skip = true;

                if (!skip && Utilities.CountTagInText(p.Text, mark) == Utilities.CountTagInText(p.Text, inverseMark) &&
                    HtmlUtil.RemoveHtmlTags(p.Text).TrimStart(inverseMark[0]).Contains(inverseMark) == false &&
                    HtmlUtil.RemoveHtmlTags(p.Text).TrimEnd(mark).Contains(mark) == false)
                {
                    skip = true;
                }

                if (!skip)
                {
                    int startIndex = 0;
                    int markIndex = p.Text.IndexOf(mark);
                    if (!wasLastLineClosed && ((p.Text.IndexOf('!') > 0 && p.Text.IndexOf('!') < markIndex) ||
                                               (p.Text.IndexOf('?') > 0 && p.Text.IndexOf('?') < markIndex) ||
                                               (p.Text.IndexOf('.') > 0 && p.Text.IndexOf('.') < markIndex)))
                        wasLastLineClosed = true;
                    while (markIndex > 0 && startIndex < p.Text.Length)
                    {
                        int inverseMarkIndex = p.Text.IndexOf(inverseMark, startIndex, StringComparison.Ordinal);
                        if (wasLastLineClosed && (inverseMarkIndex < 0 || inverseMarkIndex > markIndex))
                        {
                            if (AllowFix(p, fixAction))
                            {
                                int j = markIndex - 1;

                                while (j > startIndex && (p.Text[j] == '.' || p.Text[j] == '!' || p.Text[j] == '?'))
                                    j--;

                                while (j > startIndex &&
                                       (p.Text[j] != '.' || IsSpanishAbbreviation(p.Text, j)) &&
                                       p.Text[j] != '!' &&
                                       p.Text[j] != '?' &&
                                       !(j > 3 && p.Text.Substring(j - 3, 3) == Environment.NewLine + "-") &&
                                       !(j > 4 && p.Text.Substring(j - 4, 4) == Environment.NewLine + " -") &&
                                       !(j > 6 && p.Text.Substring(j - 6, 6) == Environment.NewLine + "<i>-"))
                                    j--;

                                if (@".!?".Contains(p.Text[j]))
                                {
                                    j++;
                                }
                                if (j + 3 < p.Text.Length && p.Text.Substring(j + 1, 2) == Environment.NewLine)
                                {
                                    j += 3;
                                }
                                else if (j + 2 < p.Text.Length && p.Text.Substring(j, 2) == Environment.NewLine)
                                {
                                    j += 2;
                                }
                                if (j >= startIndex)
                                {
                                    string part = p.Text.Substring(j, markIndex - j + 1);

                                    string speaker = string.Empty;
                                    int speakerEnd = part.IndexOf(')');
                                    if (part.StartsWith('(') && speakerEnd > 0 && speakerEnd < part.IndexOf(mark))
                                    {
                                        while (Environment.NewLine.Contains(part[speakerEnd + 1]))
                                            speakerEnd++;
                                        speaker = part.Substring(0, speakerEnd + 1);
                                        part = part.Substring(speakerEnd + 1);
                                    }
                                    speakerEnd = part.IndexOf(']');
                                    if (part.StartsWith('[') && speakerEnd > 0 && speakerEnd < part.IndexOf(mark))
                                    {
                                        while (Environment.NewLine.Contains(part[speakerEnd + 1]))
                                            speakerEnd++;
                                        speaker = part.Substring(0, speakerEnd + 1);
                                        part = part.Substring(speakerEnd + 1);
                                    }

                                    var st = new StripableText(part);
                                    if (j == 0 && mark == '!' && st.Pre == "¿" && Utilities.CountTagInText(p.Text, mark) == 1 && HtmlUtil.RemoveHtmlTags(p.Text).EndsWith(mark))
                                    {
                                        p.Text = inverseMark + p.Text;
                                    }
                                    else if (j == 0 && mark == '?' && st.Pre == "¡" && Utilities.CountTagInText(p.Text, mark) == 1 && HtmlUtil.RemoveHtmlTags(p.Text).EndsWith(mark))
                                    {
                                        p.Text = inverseMark + p.Text;
                                    }
                                    else
                                    {
                                        string temp = inverseMark;
                                        int addToIndex = 0;
                                        while (p.Text.Length > markIndex + 1 && p.Text[markIndex + 1] == mark &&
                                            Utilities.CountTagInText(p.Text, mark) > Utilities.CountTagInText(p.Text + temp, inverseMark))
                                        {
                                            temp += inverseMark;
                                            st.Post += mark;
                                            markIndex++;
                                            addToIndex++;
                                        }

                                        p.Text = p.Text.Remove(j, markIndex - j + 1).Insert(j, speaker + st.Pre + temp + st.StrippedText + st.Post);
                                        markIndex += addToIndex;
                                    }
                                }
                            }
                        }
                        else if (last != null && !wasLastLineClosed && inverseMarkIndex == p.Text.IndexOf(mark) && !last.Text.Contains(inverseMark))
                        {
                            string lastOldtext = last.Text;
                            int idx = last.Text.Length - 2;
                            while (idx > 0 && (last.Text.Substring(idx, 2) != ". ") && (last.Text.Substring(idx, 2) != "! ") && (last.Text.Substring(idx, 2) != "? "))
                                idx--;

                            last.Text = last.Text.Insert(idx, inverseMark);
                            fixCount++;
                            AddFixToListView(last, fixAction, lastOldtext, last.Text);
                        }

                        startIndex = markIndex + 2;
                        if (startIndex < p.Text.Length)
                            markIndex = p.Text.IndexOf(mark, startIndex);
                        else
                            markIndex = -1;
                        wasLastLineClosed = true;
                    }
                }
                if (p.Text.EndsWith(mark + "...", StringComparison.Ordinal) && p.Text.Length > 4)
                {
                    p.Text = p.Text.Remove(p.Text.Length - 4, 4) + "..." + mark;
                }
            }
            else if (Utilities.CountTagInText(p.Text, inverseMark) == 1)
            {
                int idx = p.Text.IndexOf(inverseMark, StringComparison.Ordinal);
                while (idx < p.Text.Length && !@".!?".Contains(p.Text[idx]))
                {
                    idx++;
                }
                if (idx < p.Text.Length)
                {
                    p.Text = p.Text.Insert(idx, mark.ToString(CultureInfo.InvariantCulture));
                    if (p.Text.Contains("¡¿") && p.Text.Contains("!?"))
                        p.Text = p.Text.Replace("!?", "?!");
                    if (p.Text.Contains("¿¡") && p.Text.Contains("?!"))
                        p.Text = p.Text.Replace("?!", "!?");
                }
            }
        }
Exemplo n.º 38
0
        public void Fix(Subtitle subtitle, IFixCallbacks callbacks)
        {
            var    language  = Configuration.Settings.Language.FixCommonErrors;
            string fixAction = language.StartWithUppercaseLetterAfterPeriodInsideParagraph;
            int    noOfFixes = 0;

            for (int i = 0; i < subtitle.Paragraphs.Count; i++)
            {
                Paragraph p       = subtitle.Paragraphs[i];
                string    oldText = p.Text;
                var       st      = new StripableText(p.Text);
                if (p.Text.Length > 3)
                {
                    string text  = st.StrippedText.Replace("  ", " ");
                    int    start = text.IndexOfAny(ExpectedChars);
                    while (start >= 0 && start < text.Length)
                    {
                        if (start > 0 && char.IsDigit(text[start - 1]))
                        {
                            // ignore periods after a number
                        }
                        else if (start + 4 < text.Length && text[start + 1] == ' ')
                        {
                            if (!IsAbbreviation(text, start, callbacks))
                            {
                                var subText = new StripableText(text.Substring(start + 2));
                                if (subText.StrippedText.Length > 0 && Helper.IsTurkishLittleI(subText.StrippedText[0], callbacks.Encoding, callbacks.Language))
                                {
                                    if (subText.StrippedText.Length > 1 && !(subText.Pre.Contains('\'') && subText.StrippedText.StartsWith('s')))
                                    {
                                        text = text.Substring(0, start + 2) + subText.Pre + Helper.GetTurkishUppercaseLetter(subText.StrippedText[0], callbacks.Encoding) + subText.StrippedText.Substring(1) + subText.Post;
                                        if (callbacks.AllowFix(p, fixAction))
                                        {
                                            p.Text = st.Pre + text + st.Post;
                                        }
                                    }
                                }
                                else if (subText.StrippedText.Length > 0 && Configuration.Settings.General.UppercaseLetters.Contains(char.ToUpper(subText.StrippedText[0])))
                                {
                                    if (subText.StrippedText.Length > 1 && !(subText.Pre.Contains('\'') && subText.StrippedText.StartsWith('s')))
                                    {
                                        text = text.Substring(0, start + 2) + subText.Pre + char.ToUpper(subText.StrippedText[0]) + subText.StrippedText.Substring(1) + subText.Post;
                                        if (callbacks.AllowFix(p, fixAction))
                                        {
                                            p.Text = st.Pre + text + st.Post;
                                        }
                                    }
                                }
                            }
                        }
                        start += 4;
                        if (start < text.Length)
                        {
                            start = text.IndexOfAny(ExpectedChars, start);
                        }
                    }
                }

                if (oldText != p.Text)
                {
                    noOfFixes++;
                    callbacks.AddFixToListView(p, fixAction, oldText, p.Text);
                }
            }
            callbacks.UpdateFixStatus(noOfFixes, language.StartWithUppercaseLetterAfterPeriodInsideParagraph, noOfFixes.ToString(CultureInfo.InvariantCulture));
        }
Exemplo n.º 39
0
        private void FixStartWithUppercaseLetterAfterPeriodInsideParagraph()
        {
            string fixAction = _language.StartWithUppercaseLetterAfterPeriodInsideParagraph;
            int noOfFixes = 0;
            for (int i = 0; i < Subtitle.Paragraphs.Count; i++)
            {
                Paragraph p = Subtitle.Paragraphs[i];
                string oldText = p.Text;
                StripableText st = new StripableText(p.Text);
                if (p.Text.Length > 3)
                {
                    string text = st.StrippedText.Replace("  ", " ");
                    int start = text.IndexOfAny(new[] { '.', '!', '?' });
                    while (start != -1 && start < text.Length)
                    {
                        if (start > 0 && char.IsDigit(text[start - 1]))
                        {
                            // ignore periods after a number
                        }
                        else if (start + 4 < text.Length && text[start + 1] == ' ')
                        {
                            if (!IsAbbreviation(text, start))
                            {
                                var subText = new StripableText(text.Substring(start + 2));
                                if (subText.StrippedText.Length > 0 && IsTurkishLittleI(subText.StrippedText[0], _encoding, Language))
                                {
                                    if (subText.StrippedText.Length > 1 && !(subText.Pre.Contains('\'') && subText.StrippedText.StartsWith('s')))
                                    {
                                        text = text.Substring(0, start + 2) + subText.Pre + GetTurkishUppercaseLetter(subText.StrippedText[0], _encoding) + subText.StrippedText.Substring(1) + subText.Post;
                                        if (AllowFix(p, fixAction))
                                        {
                                            p.Text = st.Pre + text + st.Post;
                                        }
                                    }
                                }
                                else if (subText.StrippedText.Length > 0 && Configuration.Settings.General.UppercaseLetters.Contains(char.ToUpper(subText.StrippedText[0])))
                                {
                                    if (subText.StrippedText.Length > 1 && !(subText.Pre.Contains('\'') && subText.StrippedText.StartsWith('s')))
                                    {
                                        text = text.Substring(0, start + 2) + subText.Pre + char.ToUpper(subText.StrippedText[0]) + subText.StrippedText.Substring(1) + subText.Post;
                                        if (AllowFix(p, fixAction))
                                        {
                                            p.Text = st.Pre + text + st.Post;
                                        }
                                    }
                                }
                            }
                        }
                        start += 4;
                        if (start < text.Length)
                            start = text.IndexOfAny(new[] { '.', '!', '?' }, start);
                    }
                }

                if (oldText != p.Text)
                {
                    noOfFixes++;
                    AddFixToListView(p, fixAction, oldText, p.Text);
                }
            }
            if (noOfFixes > 0)
            {
                _totalFixes += noOfFixes;
                LogStatus(_language.StartWithUppercaseLetterAfterPeriodInsideParagraph, noOfFixes.ToString(CultureInfo.InvariantCulture));
            }
        }
Exemplo n.º 40
0
        public string RemoveTextFromHearImpaired(string text)
        {
            if (Settings.RemoveWhereContains && Settings.RemoveIfTextContains.Length > 0 && text.Contains(Settings.RemoveIfTextContains))
            {
                return string.Empty;
            }

            string oldText = text;
            text = RemoveColon(text);
            string pre = " >-\"'‘`´♪¿¡.…—";
            string post = " -\"'`´♪.!?:…—";
            if (Settings.RemoveTextBetweenCustomTags)
            {
                pre = pre.Replace(Settings.CustomStart, string.Empty);
                post = post.Replace(Settings.CustomEnd, string.Empty);
            }
            var st = new StripableText(text, pre, post);
            var sb = new StringBuilder();
            string[] parts = st.StrippedText.Trim().Split(Environment.NewLine.ToCharArray(), StringSplitOptions.RemoveEmptyEntries);
            int lineNumber = 0;
            bool removedDialogInFirstLine = false;
            int noOfNamesRemoved = 0;
            int noOfNamesRemovedNotInLineOne = 0;
            foreach (string s in parts)
            {
                StripableText stSub = new StripableText(s, pre, post);
                if (!StartAndEndsWithHearImpariedTags(stSub.StrippedText))
                {
                    if (removedDialogInFirstLine && stSub.Pre.Contains("- "))
                        stSub.Pre = stSub.Pre.Replace("- ", string.Empty);

                    string newText = stSub.StrippedText;

                    newText = RemoveHearImpairedTags(newText);

                    if (stSub.StrippedText.Length - newText.Length > 2)
                    {
                        string removedText = GetRemovedString(stSub.StrippedText, newText);
                        if (!IsHIDescription(removedText))
                        {
                            noOfNamesRemoved++;
                            if (lineNumber > 0)
                                noOfNamesRemovedNotInLineOne++;
                        }
                    }
                    sb.AppendLine(stSub.Pre + newText + stSub.Post);
                }
                else
                {
                    if (!IsHIDescription(stSub.StrippedText))
                    {
                        noOfNamesRemoved++;
                        if (lineNumber > 0)
                            noOfNamesRemovedNotInLineOne++;
                    }

                    if (st.Pre.Contains("- ") && lineNumber == 0)
                    {
                        st.Pre = st.Pre.Replace("- ", string.Empty);
                        removedDialogInFirstLine = true;
                    }

                    if (st.Pre.Contains("<i>") && stSub.Post.Contains("</i>"))
                        st.Pre = st.Pre.Replace("<i>", string.Empty);

                    if (s.Contains("<i>") && !s.Contains("</i>") && st.Post.Contains("</i>"))
                        st.Post = st.Post.Replace("</i>", string.Empty);
                }
                lineNumber++;
            }

            text = st.Pre + sb.ToString().Trim() + st.Post;
            text = text.Replace("<i></i>", string.Empty).Trim();
            text = RemoveColon(text);
            text = RemoveLineIfAllUppercase(text);
            text = RemoveHearImpairedtagsInsideLine(text);
            if (Settings.RemoveInterjections)
                text = RemoveInterjections(text);

            st = new StripableText(text, " >-\"'‘`´♪¿¡.…—", " -\"'`´♪.!?:…—");
            text = st.StrippedText;
            if (StartAndEndsWithHearImpariedTags(text))
            {
                text = RemoveStartEndTags(text);
            }

            text = RemoveHearImpairedTags(text);

            // fix 3 lines to two liners - if only two lines
            if (noOfNamesRemoved >= 1 && Utilities.CountTagInText(text, Environment.NewLine) == 2)
            {
                string[] a = Utilities.RemoveHtmlTags(text).Replace(" ", string.Empty).Split("!?.".ToCharArray(), StringSplitOptions.RemoveEmptyEntries);
                if (a.Length == 2)
                {
                    StripableText temp = new StripableText(text);
                    temp.StrippedText = temp.StrippedText.Replace(Environment.NewLine, " ");
                    int splitIndex = temp.StrippedText.LastIndexOf('!');
                    if (splitIndex == -1)
                        splitIndex = temp.StrippedText.LastIndexOf('?');
                    if (splitIndex == -1)
                        splitIndex = temp.StrippedText.LastIndexOf('.');
                    if (splitIndex > 0)
                    {
                        text = temp.Pre + temp.StrippedText.Insert(splitIndex + 1, Environment.NewLine) + temp.Post;
                    }
                }
            }

            if (!text.StartsWith('-') && noOfNamesRemoved >= 1 && Utilities.CountTagInText(text, Environment.NewLine) == 1)
            {
                string[] arr = text.Split(Environment.NewLine.ToCharArray());
                string part0 = arr[0].Trim().Replace("</i>", string.Empty).Trim();
                if (!part0.EndsWith(',') && (!part0.EndsWith('-') || noOfNamesRemovedNotInLineOne > 0))
                {
                    if (part0.Length > 0 && ".!?".Contains(part0.Substring(part0.Length - 1)))
                    {
                        if (noOfNamesRemovedNotInLineOne > 0)
                        {
                            if (!st.Pre.Contains('-'))
                                text = "- " + text.Replace(Environment.NewLine, Environment.NewLine + "- ");
                            if (!text.Contains(Environment.NewLine + "-") && !text.Contains(Environment.NewLine + "<i>-"))
                                text = text.Replace(Environment.NewLine, Environment.NewLine + "- ");
                        }
                    }
                }
            }

            if (!string.IsNullOrEmpty(text))
                text = st.Pre + text + st.Post;

            if (oldText.Trim().StartsWith("- ") &&
                (oldText.Contains(Environment.NewLine + "- ") || oldText.Contains(Environment.NewLine + " - ")) && !text.Contains(Environment.NewLine))
            {
                text = text.TrimStart().TrimStart('-').TrimStart();
            }

            if (oldText != text)
            {
                // insert spaces before "-"
                text = text.Replace(Environment.NewLine + "- <i>", Environment.NewLine + "<i>- ");
                text = text.Replace(Environment.NewLine + "-<i>", Environment.NewLine + "<i>- ");
                if (text.StartsWith('-') && text.Length > 2 && text[1] != ' ' && text[1] != '-')
                    text = text.Insert(1, " ");
                if (text.StartsWith("<i>-") && text.Length > 5 && text[4] != ' ' && text[4] != '-')
                    text = text.Insert(4, " ");
                if (text.Contains(Environment.NewLine + "-"))
                {
                    int index = text.IndexOf(Environment.NewLine + "-", StringComparison.Ordinal);
                    if (index + 4 < text.Length && text[index + Environment.NewLine.Length + 1] != ' ' && text[index + Environment.NewLine.Length + 1] != '-')
                        text = text.Insert(index + Environment.NewLine.Length + 1, " ");
                }
                if (text.Contains(Environment.NewLine + "<i>-"))
                {
                    int index = text.IndexOf(Environment.NewLine + "<i>-", StringComparison.Ordinal);
                    if (index + 5 < text.Length && text[index + Environment.NewLine.Length + 4] != ' ' && text[index + Environment.NewLine.Length + 4] != '-')
                        text = text.Insert(index + Environment.NewLine.Length + 4, " ");
                }
            }
            return text.Trim();
        }
Exemplo n.º 41
0
        private string FixLowercaseIToUppercaseI(string input, string lastLine)
        {
            var sb = new StringBuilder();
            var lines = input.SplitToLines();
            for (int i = 0; i < lines.Length; i++)
            {
                string l = lines[i];

                if (i > 0)
                {
                    lastLine = lines[i - 1];
                }

                lastLine = HtmlUtil.RemoveHtmlTags(lastLine);

                if (string.IsNullOrEmpty(lastLine) ||
                    lastLine.EndsWith('.') ||
                    lastLine.EndsWith('!') ||
                    lastLine.EndsWith('?'))
                {
                    var st = new StripableText(l);
                    if (st.StrippedText.StartsWith('i') && !st.Pre.EndsWith('[') && !st.Pre.EndsWith('(') && !st.Pre.EndsWith("...", StringComparison.Ordinal))
                    {
                        if (string.IsNullOrEmpty(lastLine) || (!lastLine.EndsWith("...", StringComparison.Ordinal) && !EndsWithAbbreviation(lastLine, abbreviationList)))
                        {
                            l = st.Pre + "I" + st.StrippedText.Remove(0, 1) + st.Post;
                        }
                    }
                }

                sb.AppendLine(l);
            }

            return sb.ToString().TrimEnd('\r', '\n');
        }
Exemplo n.º 42
0
        public string RemoveInterjections(string text)
        {
            string oldText = text;

            string[] arr = Configuration.Settings.Tools.Interjections.Split(";".ToCharArray(), StringSplitOptions.RemoveEmptyEntries);
            if (_interjectionList == null)
            {
                _interjectionList = new List<string>();
                foreach (string s in arr)
                {
                    if (s.Length > 0)
                    {
                        if (!_interjectionList.Contains(s))
                            _interjectionList.Add(s);

                        string lower = s.ToLower();
                        if (!_interjectionList.Contains(lower))
                            _interjectionList.Add(lower);

                        string upper = s.ToUpper();
                        if (!_interjectionList.Contains(upper))
                            _interjectionList.Add(upper);

                        string pascalCasing = s.Substring(0, 1).ToUpper() + s.Remove(0, 1);
                        if (!_interjectionList.Contains(pascalCasing))
                            _interjectionList.Add(pascalCasing);
                    }
                }
                _interjectionList.Sort(CompareLength);
            }

            bool doRepeat = true;
            while (doRepeat)
            {
                doRepeat = false;
                foreach (string s in _interjectionList)
                {
                    if (text.Contains(s))
                    {
                        var regex = new Regex("\\b" + s + "\\b");
                        var match = regex.Match(text);
                        if (match.Success)
                        {
                            int index = match.Index;
                            string temp = text.Remove(index, s.Length);

                            string pre = string.Empty;
                            if (index > 0)
                                doRepeat = true;

                            bool removeAfter = true;

                            if (temp.Length > index - s.Length + 3 && index > s.Length)
                            {
                                if (temp.Substring(index - s.Length + 1, 3) == ", !")
                                {
                                    temp = temp.Remove(index - s.Length + 1, 2);
                                    removeAfter = false;
                                }
                                else if (temp.Substring(index - s.Length + 1, 3) == ", ?")
                                {
                                    temp = temp.Remove(index - s.Length + 1, 2);
                                    removeAfter = false;
                                }
                                else if (temp.Substring(index - s.Length + 1, 3) == ", .")
                                {
                                    temp = temp.Remove(index - s.Length + 1, 2);
                                    removeAfter = false;
                                }
                            }
                            if (removeAfter && temp.Length > index - s.Length + 2 && index > s.Length)
                            {
                                if (temp.Substring(index - s.Length, 3) == ", !")
                                {
                                    temp = temp.Remove(index - s.Length, 2);
                                    removeAfter = false;
                                }
                                else if (temp.Substring(index - s.Length, 3) == ", ?")
                                {
                                    temp = temp.Remove(index - s.Length, 2);
                                    removeAfter = false;
                                }
                                else if (temp.Substring(index - s.Length, 3) == ", .")
                                {
                                    temp = temp.Remove(index - s.Length, 2);
                                    removeAfter = false;
                                }
                            }
                            if (removeAfter && temp.Length > index - s.Length + 2 && index > s.Length)
                            {
                                if (temp.Substring(index - s.Length + 1, 2) == "-!")
                                {
                                    temp = temp.Remove(index - s.Length + 1, 1);
                                    removeAfter = false;
                                }
                                else if (temp.Substring(index - s.Length + 1, 2) == "-?")
                                {
                                    temp = temp.Remove(index - s.Length + 1, 1);
                                    removeAfter = false;
                                }
                                else if (temp.Substring(index - s.Length + 1, 2) == "-.")
                                {
                                    temp = temp.Remove(index - s.Length + 1, 1);
                                    removeAfter = false;
                                }
                            }

                            if (removeAfter)
                            {
                                if (index == 0)
                                {
                                    if (!string.IsNullOrEmpty(temp) && temp.StartsWith('-'))
                                        temp = temp.Remove(0, 1).Trim();
                                }
                                else if (index == 3 && !string.IsNullOrEmpty(temp) && temp.StartsWith("<i>-"))
                                {
                                    temp = temp.Remove(3, 1);
                                }
                                else if (index > 0)
                                {
                                    pre = text.Substring(0, index);
                                    temp = temp.Remove(0, index);
                                    if (pre.EndsWith('-') && temp.StartsWith('-'))
                                        temp = temp.Remove(0, 1);
                                    if (pre.EndsWith("- ") && temp.StartsWith('-'))
                                        temp = temp.Remove(0, 1);
                                }

                                while (temp.Length > 0 && (temp.StartsWith(' ') || temp.StartsWith(',') || temp.StartsWith('.') || temp.StartsWith('!') || temp.StartsWith('?')))
                                {
                                    temp = temp.Remove(0, 1);
                                    doRepeat = true;
                                }
                                if (temp.Length > 0 && s[0].ToString(CultureInfo.InvariantCulture) != s[0].ToString(CultureInfo.InvariantCulture).ToLower())
                                {
                                    temp = temp.Remove(0, 1).Insert(0, temp[0].ToString(CultureInfo.InvariantCulture).ToUpper());
                                    doRepeat = true;
                                }

                                if (pre.EndsWith(' ') && temp.StartsWith('-'))
                                    temp = temp.Remove(0, 1);

                                temp = pre + temp;
                            }

                            if (temp.EndsWith(Environment.NewLine + "- "))
                                temp = temp.Remove(temp.Length - 4, 4);

                            var st = new StripableText(temp);
                            if (st.StrippedText.Length == 0)
                                return string.Empty;

                            if (!temp.Contains(Environment.NewLine) && text.Contains(Environment.NewLine) && temp.StartsWith('-'))
                                temp = temp.Remove(0, 1).Trim();

                            text = temp;
                        }
                    }
                }
            }
            string[] lines = text.Split(Environment.NewLine.ToCharArray(), StringSplitOptions.RemoveEmptyEntries);
            if (text != oldText && lines.Length == 2)
            {
                if (lines[0] == "-" && lines[1] == "-")
                    return string.Empty;
                if (lines[0].StartsWith('-') && lines[0].Length > 1 && lines[1].Trim() == "-")
                    return lines[0].Remove(0, 1).Trim();
                if (lines[1].StartsWith('-') && lines[1].Length > 1 && lines[0].Trim() == "-")
                    return lines[1].Remove(0, 1).Trim();
                if (lines[0].Length > 1 && (lines[1] == "-") || lines[1] == "." || lines[1] == "!" || lines[1] == "?")
                {
                    if (oldText.Contains(Environment.NewLine + "-") && lines[0].StartsWith('-'))
                        lines[0] = lines[0].Remove(0, 1);
                    return lines[0].Trim();
                }
            }

            return text;
        }
        public string RemoveColon(string text)
        {
            if (!(Settings.RemoveTextBeforeColon && text.Contains(':')))
                return text;

            string preAssTag = string.Empty;
            if (text.StartsWith("{\\", StringComparison.Ordinal))
            {
                int indexOfEndBracketSuccessor = text.IndexOf('}') + 1;
                if (indexOfEndBracketSuccessor > 0)
                {
                    preAssTag = text.Substring(0, indexOfEndBracketSuccessor);
                    text = text.Remove(0, indexOfEndBracketSuccessor).TrimStart();
                }
            }

            // House 7x01 line 52: and she would like you to do three things:
            // Okay or remove???
            string noTagText = HtmlUtil.RemoveHtmlTags(text);
            if (noTagText.Length > 10 && noTagText.IndexOf(':') == noTagText.Length - 1 && noTagText != noTagText.ToUpper())
                return text;

            string newText = string.Empty;
            var lines = text.Trim().SplitToLines();
            int noOfNames = 0;
            int count = 0;
            bool removedInFirstLine = false;
            bool removedInSecondLine = false;
            foreach (string line in lines)
            {
                int indexOfColon = line.IndexOf(':');
                if (indexOfColon <= 0 || IsInsideBrackets(line, indexOfColon))
                {
                    newText = (newText + Environment.NewLine + line).Trim();

                    if (newText.EndsWith("</i>", StringComparison.Ordinal) && text.StartsWith("<i>", StringComparison.Ordinal) && !newText.StartsWith("<i>", StringComparison.Ordinal))
                        newText = "<i>" + newText;
                    else if (newText.EndsWith("</b>", StringComparison.Ordinal) && text.StartsWith("<b>", StringComparison.Ordinal) && !newText.StartsWith("<b>", StringComparison.Ordinal))
                        newText = "<b>" + newText;
                }
                else
                {
                    var pre = line.Substring(0, indexOfColon);
                    var noTagPre = HtmlUtil.RemoveHtmlTags(pre, true);
                    if (Settings.RemoveTextBeforeColonOnlyUppercase && noTagPre != noTagPre.ToUpper())
                    {
                        string s = line;
                        string l1Trim = HtmlUtil.RemoveHtmlTags(lines[0]).TrimEnd('"');
                        if (count == 1 && lines.Length == 2 && !l1Trim.EndsWith('.') &&
                                                               !l1Trim.EndsWith('!') &&
                                                               !l1Trim.EndsWith('?'))
                        {
                            var indexOf = line.IndexOfAny(ExpectedStrings, StringComparison.Ordinal);
                            if (indexOf > 0 && indexOf < indexOfColon)
                            {
                                var toRemove = s.Substring(indexOf + 1, indexOfColon - indexOf).Trim();
                                if (toRemove.Length > 1 && toRemove == toRemove.ToUpper())
                                {
                                    s = s.Remove(indexOf + 1, indexOfColon - indexOf);
                                    s = s.Insert(indexOf + 1, " -");
                                    if (newText.StartsWith("<i>") && !newText.StartsWith("<i>-"))
                                        newText = "<i>- " + newText.Remove(0, 3);
                                    else if (!newText.StartsWith("-"))
                                        newText = "- " + newText;
                                }
                            }
                        }

                        newText = (newText + Environment.NewLine + s).Trim();
                    }
                    else
                    {
                        var newTextNoHtml = HtmlUtil.RemoveHtmlTags(newText);
                        if (Utilities.CountTagInText(line, ':') == 1)
                        {
                            if (count == 1 && newText.Length > 1 && removedInFirstLine &&
                                !".?!".Contains(newTextNoHtml[newTextNoHtml.Length - 1]) && newText.LineEndsWithHtmlTag(true) &&
                                line != line.ToUpper())
                            {
                                newText += Environment.NewLine;
                                if (pre.Contains("<i>") && line.Contains("</i>") && !line.Contains("<i>"))
                                    newText += "<i>" + line;
                                else if (pre.Contains("<b>") && line.Contains("</b>") && !line.Contains("<b>"))
                                    newText += "<b>" + line;
                                else if (pre.Contains("<u>") && line.Contains("</u>") && !line.Contains("<u>"))
                                    newText += "<u>" + line;
                                else if (pre.Contains('[') && line.Contains(']') && !line.Contains("["))
                                    newText += "[" + line;
                                else if (pre.Contains('(') && line.EndsWith(')') && !line.Contains("("))
                                    newText += "(" + line;
                                else
                                    newText += line;
                            }
                            else if (count == 1 && newText.Length > 1 && indexOfColon > 15 && line.Substring(0, indexOfColon).Contains(' ') &&
                                     !".?!".Contains(newTextNoHtml[newTextNoHtml.Length - 1]) && newText.LineEndsWithHtmlTag(true) &&
                                     line != line.ToUpper())
                            {
                                newText += Environment.NewLine;
                                if (pre.Contains("<i>") && line.Contains("</i>") && !line.Contains("<i>"))
                                    newText += "<i>" + line;
                                else if (pre.Contains("<b>") && line.Contains("</b>") && !line.Contains("<b>"))
                                    newText += "<b>" + line;
                                else if (pre.Contains("<u>") && line.Contains("</u>") && !line.Contains("<u>"))
                                    newText += "<u>" + line;
                                else if (pre.Contains('[') && line.Contains(']') && !line.Contains("["))
                                    newText += "[" + line;
                                else if (pre.Contains('(') && line.EndsWith(')') && !line.Contains("("))
                                    newText += "(" + line;
                                else
                                    newText += line;
                            }
                            else
                            {
                                var preStripable = new StripableText(pre);
                                var remove = true;

                                if (indexOfColon < line.Length - 1)
                                {
                                    if (Settings.ColonSeparateLine && !line.Substring(indexOfColon + 1).StartsWith(Environment.NewLine, StringComparison.Ordinal))
                                        remove = false;
                                    else if (Utilities.IsBetweenNumbers(line, indexOfColon))
                                        remove = false;
                                }

                                if (remove && !DoRemove(pre))
                                    remove = false;

                                string l1Trimmed = HtmlUtil.RemoveHtmlTags(lines[0]).TrimEnd('"');
                                if (count == 1 && lines.Length == 2 && !l1Trimmed.EndsWith('.') &&
                                                                       !l1Trimmed.EndsWith('!') &&
                                                                       !l1Trimmed.EndsWith('?'))
                                {
                                    remove = false;
                                }

                                if (remove)
                                {
                                    var content = line.Substring(indexOfColon + 1).Trim();
                                    if (content.Length > 0)
                                    {
                                        newText += Environment.NewLine;
                                        if (pre.Contains("<i>") && content.Contains("</i>"))
                                            newText += "<i>" + content;
                                        else if (pre.Contains("<b>") && content.Contains("</b>"))
                                            newText += "<b>" + content;
                                        else if (pre.Contains('[') && content.Contains(']'))
                                            newText += "[" + content;
                                        else if (pre.Contains('(') && content.EndsWith(')'))
                                            newText += "(" + content;
                                        else
                                            newText += content;

                                        if (count == 0)
                                            removedInFirstLine = true;
                                        else if (count == 1)
                                            removedInSecondLine = true;
                                    }
                                    newText = newText.Trim();

                                    if (text.StartsWith('(') && newText.EndsWith(')') && !newText.Contains('('))
                                        newText = newText.TrimEnd(')');
                                    else if (text.StartsWith('[') && newText.EndsWith(']') && !newText.Contains('['))
                                        newText = newText.TrimEnd(']');
                                    else if (newText.EndsWith("</i>", StringComparison.Ordinal) && text.StartsWith("<i>", StringComparison.Ordinal) && !newText.StartsWith("<i>", StringComparison.Ordinal))
                                        newText = "<i>" + newText;
                                    else if (newText.EndsWith("</b>", StringComparison.Ordinal) && text.StartsWith("<b>", StringComparison.Ordinal) && !newText.StartsWith("<b>", StringComparison.Ordinal))
                                        newText = "<b>" + newText;
                                    else if (newText.EndsWith("</u>", StringComparison.Ordinal) && text.StartsWith("<u>", StringComparison.Ordinal) && !newText.StartsWith("<u>", StringComparison.Ordinal))
                                        newText = "<u>" + newText;

                                    if (!IsHIDescription(preStripable.StrippedText))
                                        noOfNames++;
                                }
                                else
                                {
                                    string s = line;
                                    string l1Trim = HtmlUtil.RemoveHtmlTags(lines[0]).TrimEnd('"');
                                    if (count == 1 && lines.Length == 2 && !l1Trim.EndsWith('.') &&
                                                                           !l1Trim.EndsWith('!') &&
                                                                           !l1Trim.EndsWith('?'))
                                    {
                                        int indexOf = line.IndexOf(". ", StringComparison.Ordinal);
                                        if (indexOf == -1)
                                            indexOf = line.IndexOf("! ", StringComparison.Ordinal);
                                        if (indexOf == -1)
                                            indexOf = line.IndexOf("? ", StringComparison.Ordinal);
                                        if (indexOf > 0 && indexOf < indexOfColon)
                                        {
                                            s = s.Remove(indexOf + 1, indexOfColon - indexOf);
                                            s = s.Insert(indexOf + 1, " -");
                                            if (newText.StartsWith("<i>") && !newText.StartsWith("<i>-"))
                                                newText = "<i>- " + newText.Remove(0, 3);
                                            else if (!newText.StartsWith("-"))
                                                newText = "- " + newText;
                                        }
                                    }
                                    newText = (newText + Environment.NewLine + s).Trim();
                                    if (newText.EndsWith("</i>", StringComparison.Ordinal) && text.StartsWith("<i>", StringComparison.Ordinal) && !newText.StartsWith("<i>", StringComparison.Ordinal))
                                        newText = "<i>" + newText;
                                    else if (newText.EndsWith("</b>", StringComparison.Ordinal) && text.StartsWith("<b>", StringComparison.Ordinal) && !newText.StartsWith("<b>", StringComparison.Ordinal))
                                        newText = "<b>" + newText;
                                    else if ((newText.EndsWith("</u>", StringComparison.Ordinal) && text.StartsWith("<u>", StringComparison.Ordinal) && !newText.StartsWith("<u>", StringComparison.Ordinal)))
                                        newText = "<u>" + newText;
                                }
                            }
                        }
                        else
                        {
                            char[] endChars = { '.', '?', '!' };
                            string s2 = line;
                            for (int k = 0; k < 2; k++)
                            {
                                if (s2.Contains(':'))
                                {
                                    int colonIndex = s2.IndexOf(':');
                                    string start = s2.Substring(0, colonIndex);

                                    if (!Settings.RemoveTextBeforeColonOnlyUppercase || start == start.ToUpper())
                                    {
                                        int endIndex = start.LastIndexOfAny(endChars);
                                        if (colonIndex > 0 && colonIndex < s2.Length - 1)
                                        {
                                            if (char.IsDigit(s2[colonIndex - 1]) && char.IsDigit(s2[colonIndex + 1]))
                                                endIndex = 0;
                                        }
                                        if (endIndex < 0)
                                            s2 = s2.Remove(0, colonIndex - endIndex);
                                        else if (endIndex > 0)
                                            s2 = s2.Remove(endIndex + 1, colonIndex - endIndex);
                                    }

                                    if (count == 0)
                                        removedInFirstLine = true;
                                    else if (count == 1)
                                        removedInSecondLine = true;
                                }
                            }
                            newText = (newText + Environment.NewLine + s2).Trim();
                        }
                    }
                }
                count++;
            }
            newText = newText.Trim();
            if (noOfNames > 0 && Utilities.GetNumberOfLines(newText) == 2)
            {
                int indexOfDialogChar = newText.IndexOf('-');
                bool insertDash = true;
                var arr = newText.SplitToLines();
                if (arr.Length == 2 && arr[0].Length > 1 && arr[1].Length > 1)
                {
                    string arr0 = new StripableText(arr[0]).StrippedText;
                    var arr1Stripable = new StripableText(arr[1]);
                    string arr1 = arr1Stripable.StrippedText;

                    if (arr0.Length > 0 && arr1.Length > 1)
                    {
                        // line continuation?
                        if (Utilities.LowercaseLetters.Contains(arr1[0])) // second line starts with lower case letter
                        {
                            char c = arr0[arr0.Length - 1];
                            if (Utilities.LowercaseLetters.Contains(c) || c == ',') // first line ends with comma or lower case letter
                            {
                                if (!arr1Stripable.Pre.Contains("..."))
                                {
                                    insertDash = false;
                                }
                            }
                        }

                        if (insertDash)
                        {
                            string arr0QuoteTrimmed = arr[0].TrimEnd('"');
                            if (arr0QuoteTrimmed.Length > 0 && !".?!".Contains(arr0QuoteTrimmed[arr0QuoteTrimmed.Length - 1]) && !arr0QuoteTrimmed.EndsWith("</i>", StringComparison.Ordinal))
                            {
                                if (!arr1Stripable.Pre.Contains('-'))
                                {
                                    insertDash = false;
                                }
                            }
                        }
                    }

                    if (insertDash && removedInFirstLine && !removedInSecondLine && !text.StartsWith('-') && !text.StartsWith("<i>-", StringComparison.Ordinal))
                    {
                        if (!arr[1].StartsWith('-') && !arr[1].StartsWith("<i>-", StringComparison.Ordinal))
                            insertDash = false;
                    }
                }

                if (insertDash)
                {
                    if (indexOfDialogChar < 0 || indexOfDialogChar > 4)
                    {
                        var st = new StripableText(newText, string.Empty, string.Empty);
                        newText = st.Pre + "- " + st.StrippedText + st.Post;
                    }

                    int indexOfNewLine = newText.IndexOf(Environment.NewLine, StringComparison.Ordinal);
                    string second = newText.Substring(indexOfNewLine).Trim();
                    indexOfDialogChar = second.IndexOf('-');
                    if (indexOfDialogChar < 0 || indexOfDialogChar > 6)
                    {
                        var st = new StripableText(second, String.Empty, String.Empty);
                        second = st.Pre + "- " + st.StrippedText + st.Post;
                        newText = newText.Remove(indexOfNewLine) + Environment.NewLine + second;
                    }
                }
            }
            else if (newText.Contains('-') && !newText.Contains(Environment.NewLine))
            {
                var st = new StripableText(newText);
                if (st.Pre.Contains('-'))
                    newText = st.Pre.Replace("-", string.Empty) + st.StrippedText + st.Post;
            }
            else if (removedInSecondLine && !removedInFirstLine && Utilities.GetNumberOfLines(newText) == 2)
            {
                string noTags = HtmlUtil.RemoveHtmlTags(newText, true).Trim();
                bool insertDash = noTags.StartsWith('-') && Utilities.CountTagInText(noTags, '-') == 1;
                if (insertDash)
                {
                    if (newText.Contains(Environment.NewLine + "<i>"))
                        newText = newText.Replace(Environment.NewLine + "<i>", Environment.NewLine + "<i>- ");
                    else
                        newText = newText.Replace(Environment.NewLine, Environment.NewLine + "- ");
                }
            }
            if (text.Contains("<i>") && !newText.Contains("<i>") && newText.EndsWith("</i>", StringComparison.Ordinal))
                newText = "<i>" + newText;

            if (string.IsNullOrWhiteSpace(newText))
                return string.Empty;

            return preAssTag + newText;
        }
Exemplo n.º 44
0
        public string RemoveColon(string text)
        {
            if (!Settings.RemoveTextBeforeColon)
                return text;

            if (text.IndexOf(":", StringComparison.Ordinal) < 0)
                return text;

            // House 7x01 line 52: and she would like you to do three things:
            // Okay or remove???
            if (text.IndexOf(':') > 0 && text.IndexOf(':') == text.Length - 1 && text != text.ToUpper())
                return text;

            string newText = string.Empty;
            string[] parts = text.Trim().Split(Environment.NewLine.ToCharArray(), StringSplitOptions.RemoveEmptyEntries);
            int noOfNames = 0;
            int count = 0;
            bool removedInFirstLine = false;
            bool removedInSecondLine = false;
            foreach (string s in parts)
            {
                int indexOfColon = s.IndexOf(":", StringComparison.Ordinal);
                if (indexOfColon > 0)
                {
                    string pre = s.Substring(0, indexOfColon);
                    if (Settings.RemoveTextBeforeColonOnlyUppercase && pre.Replace("<i>", string.Empty) != pre.Replace("<i>", string.Empty).ToUpper())
                    {
                        newText = newText + Environment.NewLine + s;
                        newText = newText.Trim();
                    }
                    else
                    {
                        StripableText st = new StripableText(pre);
                        if (count == 1 && Utilities.CountTagInText(text, Environment.NewLine) == 1 && removedInFirstLine && Utilities.CountTagInText(s, ":") == 1 &&
                            !newText.EndsWith('.') && !newText.EndsWith('!') && !newText.EndsWith('?') && !newText.EndsWith(".</i>") && !newText.EndsWith("!</i>") && !newText.EndsWith("?</i>") &&
                            s != s.ToUpper())
                        {
                            if (pre.Contains("<i>") && s.Contains("</i>"))
                                newText = newText + Environment.NewLine + "<i>" + s;
                            else if (pre.Contains("<b>") && s.Contains("</b>"))
                                newText = newText + Environment.NewLine + "<b>" + s;
                            else if (pre.Contains('[') && s.Contains(']'))
                                newText = newText + Environment.NewLine + "[" + s;
                            else if (pre.Contains('(') && s.EndsWith(')'))
                                newText = newText + Environment.NewLine + "(" + s;
                            else
                                newText = newText + Environment.NewLine + s;
                        }
                        else if (count == 1 && Utilities.CountTagInText(text, Environment.NewLine) == 1 && indexOfColon > 15 && s.Substring(0, indexOfColon).Contains(' ') && Utilities.CountTagInText(s, ":") == 1 &&
                            !newText.EndsWith('.') && !newText.EndsWith('!') && !newText.EndsWith('?') && !newText.EndsWith(".</i>") && !newText.EndsWith("!</i>") && !newText.EndsWith("?</i>") &&
                            s != s.ToUpper())
                        {
                            if (pre.Contains("<i>") && s.Contains("</i>"))
                                newText = newText + Environment.NewLine + "<i>" + s;
                            else if (pre.Contains("<b>") && s.Contains("</b>"))
                                newText = newText + Environment.NewLine + "<b>" + s;
                            else if (pre.Contains('[') && s.Contains(']'))
                                newText = newText + Environment.NewLine + "[" + s;
                            else if (pre.Contains('(') && s.EndsWith(')'))
                                newText = newText + Environment.NewLine + "(" + s;
                            else
                                newText = newText + Environment.NewLine + s;
                        }
                        else if (Utilities.CountTagInText(s, ":") == 1)
                        {
                            bool remove = true;
                            if (indexOfColon > 0 && indexOfColon < s.Length - 1)
                            {
                                if ("1234567890".Contains(s.Substring(indexOfColon - 1, 1)) && "1234567890".Contains(s.Substring(indexOfColon + 1, 1)))
                                    remove = false;
                            }
                            if (s.StartsWith("Previously on") || s.StartsWith("<i>Previously on"))
                                remove = false;

                            if (remove && Settings.ColonSeparateLine)
                            {
                                if (indexOfColon == s.Length - 1 || s.Substring(indexOfColon + 1).StartsWith(Environment.NewLine))
                                    remove = true;
                                else
                                    remove = false;
                            }

                            if (remove)
                            {
                                string content = s.Substring(indexOfColon + 1).Trim();
                                if (content.Length > 0)
                                {
                                    if (pre.Contains("<i>") && content.Contains("</i>"))
                                        newText = newText + Environment.NewLine + "<i>" + content;
                                    else if (pre.Contains("<b>") && content.Contains("</b>"))
                                        newText = newText + Environment.NewLine + "<b>" + content;
                                    else if (pre.Contains('[') && content.Contains(']'))
                                        newText = newText + Environment.NewLine + "[" + content;
                                    else if (pre.Contains('(') && content.EndsWith(')'))
                                        newText = newText + Environment.NewLine + "(" + content;
                                    else
                                        newText = newText + Environment.NewLine + content;

                                    if (count == 0)
                                        removedInFirstLine = true;
                                    else if (count == 1)
                                        removedInSecondLine = true;
                                }
                                newText = newText.Trim();

                                if (text.StartsWith('(') && newText.EndsWith(')') && !newText.Contains('('))
                                    newText = newText.TrimEnd(')');
                                else if (newText.EndsWith("</i>") && text.StartsWith("<i>") && !newText.StartsWith("<i>"))
                                    newText = "<i>" + newText;
                                else if (newText.EndsWith("</b>") && text.StartsWith("<b>") && !newText.StartsWith("<b>"))
                                    newText = "<b>" + newText;

                                if (!IsHIDescription(st.StrippedText))
                                    noOfNames++;
                            }
                            else
                            {
                                newText = newText + Environment.NewLine + s;
                                newText = newText.Trim();
                                if (newText.EndsWith("</i>") && text.StartsWith("<i>") && !newText.StartsWith("<i>"))
                                    newText = "<i>" + newText;
                                else if (newText.EndsWith("</b>") && text.StartsWith("<b>") && !newText.StartsWith("<b>"))
                                    newText = "<b>" + newText;
                            }
                        }
                        else
                        {
                            string s2 = s;
                            for (int k = 0; k < 2; k++)
                            {
                                if (s2.Contains(':'))
                                {
                                    int colonIndex = s2.IndexOf(":", StringComparison.Ordinal);
                                    string start = s2.Substring(0, colonIndex);

                                    if (!Settings.RemoveTextBeforeColonOnlyUppercase || start == start.ToUpper())
                                    {
                                        int periodIndex = start.LastIndexOf(". ", StringComparison.Ordinal);
                                        int questIndex = start.LastIndexOf("? ", StringComparison.Ordinal);
                                        int exclaIndex = start.LastIndexOf("! ", StringComparison.Ordinal);
                                        int endIndex = periodIndex;
                                        if (endIndex == -1 || questIndex > endIndex)
                                            endIndex = questIndex;
                                        if (endIndex == -1 || exclaIndex > endIndex)
                                            endIndex = exclaIndex;
                                        if (colonIndex > 0 && colonIndex < s2.Length - 1)
                                        {
                                            if ("1234567890".Contains(s2.Substring(colonIndex - 1, 1)) && "1234567890".Contains(s2.Substring(colonIndex + 1, 1)))
                                                endIndex = -10;
                                        }
                                        if (endIndex == -1)
                                            s2 = s2.Remove(0, colonIndex - endIndex);
                                        else if (endIndex > 0)
                                            s2 = s2.Remove(endIndex + 1, colonIndex - endIndex);
                                    }

                                    if (count == 0)
                                        removedInFirstLine = true;
                                    else if (count == 1)
                                        removedInSecondLine = true;
                                }
                            }
                            newText = newText + Environment.NewLine + s2;
                            newText = newText.Trim();
                        }
                    }
                }
                else
                {
                    newText = newText + Environment.NewLine + s;
                    newText = newText.Trim();

                    if (newText.EndsWith("</i>") && text.StartsWith("<i>") && !newText.StartsWith("<i>"))
                        newText = "<i>" + newText;
                    else if (newText.EndsWith("</b>") && text.StartsWith("<b>") && !newText.StartsWith("<b>"))
                        newText = "<b>" + newText;
                }
                count++;
            }
            newText = newText.Trim();
            if (noOfNames > 0 && Utilities.CountTagInText(newText, Environment.NewLine) == 1)
            {
                int indexOfDialogChar = newText.IndexOf('-');
                bool insertDash = true;
                string[] arr = newText.Split(Environment.NewLine.ToCharArray(), StringSplitOptions.RemoveEmptyEntries);
                if (arr.Length == 2 && arr[0].Length > 1 && arr[1].Length > 1)
                {
                    string arr0 = new StripableText(arr[0]).StrippedText;
                    string arr1 = new StripableText(arr[1]).StrippedText;

                    //line continuation?
                    if (arr0.Length > 0 && arr1.Length > 1 && (Utilities.LowercaseLetters + ",").Contains(arr0.Substring(arr0.Length - 1)) &&
                        Utilities.LowercaseLetters.Contains(arr1.Substring(0, 1)))
                    {
                        if (new StripableText(arr[1]).Pre.Contains("...") == false)
                            insertDash = false;
                    }

                    if (arr0.Length > 0 && arr1.Length > 1 && !(arr[0].EndsWith('.') || arr[0].EndsWith('!') || arr[0].EndsWith('?') || arr[0].EndsWith("</i>")) &&
                        !(new StripableText(arr[1]).Pre.Contains('-')))
                    {
                        insertDash = false;
                    }

                    if (removedInFirstLine && !removedInSecondLine && !text.StartsWith('-') && !text.StartsWith("<i>-"))
                    {
                        if (!insertDash || (!arr[1].StartsWith('-') && !arr[1].StartsWith("<i>-")))
                            insertDash = false;
                    }
                }

                if (insertDash)
                {
                    if (indexOfDialogChar < 0 || indexOfDialogChar > 4)
                    {
                        StripableText st = new StripableText(newText, "", "");
                        newText = st.Pre + "- " + st.StrippedText + st.Post;
                    }

                    int indexOfNewLine = newText.IndexOf(Environment.NewLine, StringComparison.Ordinal);
                    string second = newText.Substring(indexOfNewLine).Trim();
                    indexOfDialogChar = second.IndexOf('-');
                    if (indexOfDialogChar < 0 || indexOfDialogChar > 6)
                    {
                        StripableText st = new StripableText(second, "", "");
                        second = st.Pre + "- " + st.StrippedText + st.Post;
                        newText = newText.Remove(indexOfNewLine) + Environment.NewLine + second;
                    }
                }
            }
            else if (!newText.Contains(Environment.NewLine) && newText.Contains('-'))
            {
                StripableText st = new StripableText(newText);
                if (st.Pre.Contains('-'))
                    newText = st.Pre.Replace("-", string.Empty) + st.StrippedText + st.Post;
            }
            else if (Utilities.CountTagInText(newText, Environment.NewLine) == 1 && removedInFirstLine == false && removedInSecondLine)
            {
                string noTags = Utilities.RemoveHtmlTags(newText, true).Trim();
                bool insertDash = noTags.StartsWith('-') && Utilities.CountTagInText(noTags, "-") == 1;
                if (insertDash)
                {
                    if (newText.Contains(Environment.NewLine + "<i>"))
                        newText = newText.Replace(Environment.NewLine + "<i>", Environment.NewLine + "<i>- ");
                    else
                        newText = newText.Replace(Environment.NewLine, Environment.NewLine + "- ");
                }
            }
            if (text.Contains("<i>") && !newText.Contains("<i>") && newText.EndsWith("</i>"))
                newText = "<i>" + newText;
            return newText;
        }
Exemplo n.º 45
0
        public string RemoveTextFromHearImpaired(string text)
        {
            if (Settings.RemoveWhereContains && Settings.RemoveIfTextContains.Length > 0 && text.Contains(Settings.RemoveIfTextContains))
            {
                return(string.Empty);
            }

            string oldText = text;

            text = RemoveColon(text);
            string pre  = " >-\"'‘`´♪¿¡.…—";
            string post = " -\"'`´♪.!?:…—";

            if (Settings.RemoveTextBetweenCustomTags)
            {
                pre  = pre.Replace(Settings.CustomStart, string.Empty);
                post = post.Replace(Settings.CustomEnd, string.Empty);
            }
            var st = new StripableText(text, pre, post);
            var sb = new StringBuilder();

            string[] parts      = st.StrippedText.Trim().Split(Utilities.NewLineChars, StringSplitOptions.RemoveEmptyEntries);
            int      lineNumber = 0;
            bool     removedDialogInFirstLine     = false;
            int      noOfNamesRemoved             = 0;
            int      noOfNamesRemovedNotInLineOne = 0;

            foreach (string s in parts)
            {
                StripableText stSub = new StripableText(s, pre, post);
                if (!StartAndEndsWithHearImpariedTags(stSub.StrippedText))
                {
                    if (removedDialogInFirstLine && stSub.Pre.Contains("- "))
                    {
                        stSub.Pre = stSub.Pre.Replace("- ", string.Empty);
                    }

                    string newText = stSub.StrippedText;

                    newText = RemoveHearImpairedTags(newText);

                    if (stSub.StrippedText.Length - newText.Length > 2)
                    {
                        string removedText = GetRemovedString(stSub.StrippedText, newText);
                        if (!IsHIDescription(removedText))
                        {
                            noOfNamesRemoved++;
                            if (lineNumber > 0)
                            {
                                noOfNamesRemovedNotInLineOne++;
                            }
                        }
                    }
                    sb.AppendLine(stSub.Pre + newText + stSub.Post);
                }
                else
                {
                    if (!IsHIDescription(stSub.StrippedText))
                    {
                        noOfNamesRemoved++;
                        if (lineNumber > 0)
                        {
                            noOfNamesRemovedNotInLineOne++;
                        }
                    }

                    if (st.Pre.Contains("- ") && lineNumber == 0)
                    {
                        st.Pre = st.Pre.Replace("- ", string.Empty);
                        removedDialogInFirstLine = true;
                    }

                    if (st.Pre.Contains("<i>") && stSub.Post.Contains("</i>"))
                    {
                        st.Pre = st.Pre.Replace("<i>", string.Empty);
                    }

                    if (s.Contains("<i>") && !s.Contains("</i>") && st.Post.Contains("</i>"))
                    {
                        st.Post = st.Post.Replace("</i>", string.Empty);
                    }
                }
                lineNumber++;
            }

            text = st.Pre + sb.ToString().Trim() + st.Post;
            text = text.Replace("<i></i>", string.Empty).Trim();
            text = RemoveColon(text);
            text = RemoveLineIfAllUppercase(text);
            text = RemoveHearImpairedtagsInsideLine(text);
            if (Settings.RemoveInterjections)
            {
                text = RemoveInterjections(text);
            }

            st   = new StripableText(text, " >-\"'‘`´♪¿¡.…—", " -\"'`´♪.!?:…—");
            text = st.StrippedText;
            if (StartAndEndsWithHearImpariedTags(text))
            {
                text = RemoveStartEndTags(text);
            }

            text = RemoveHearImpairedTags(text);

            // fix 3 lines to two liners - if only two lines
            if (noOfNamesRemoved >= 1 && Utilities.CountTagInText(text, Environment.NewLine) == 2)
            {
                string[] a = Utilities.RemoveHtmlTags(text).Replace(" ", string.Empty).Split(new[] { '!', '?', '.' }, StringSplitOptions.RemoveEmptyEntries);
                if (a.Length == 2)
                {
                    StripableText temp = new StripableText(text);
                    temp.StrippedText = temp.StrippedText.Replace(Environment.NewLine, " ");
                    int splitIndex = temp.StrippedText.LastIndexOf('!');
                    if (splitIndex == -1)
                    {
                        splitIndex = temp.StrippedText.LastIndexOf('?');
                    }
                    if (splitIndex == -1)
                    {
                        splitIndex = temp.StrippedText.LastIndexOf('.');
                    }
                    if (splitIndex > 0)
                    {
                        text = temp.Pre + temp.StrippedText.Insert(splitIndex + 1, Environment.NewLine) + temp.Post;
                    }
                }
            }

            if (!text.StartsWith('-') && noOfNamesRemoved >= 1 && Utilities.CountTagInText(text, Environment.NewLine) == 1)
            {
                string[] arr   = text.Split(Utilities.NewLineChars);
                string   part0 = arr[0].Trim().Replace("</i>", string.Empty).Trim();
                if (!part0.EndsWith(',') && (!part0.EndsWith('-') || noOfNamesRemovedNotInLineOne > 0))
                {
                    if (part0.Length > 0 && ".!?".Contains(part0.Substring(part0.Length - 1)))
                    {
                        if (noOfNamesRemovedNotInLineOne > 0)
                        {
                            if (!st.Pre.Contains('-'))
                            {
                                text = "- " + text.Replace(Environment.NewLine, Environment.NewLine + "- ");
                            }
                            if (!text.Contains(Environment.NewLine + "-") && !text.Contains(Environment.NewLine + "<i>-"))
                            {
                                text = text.Replace(Environment.NewLine, Environment.NewLine + "- ");
                            }
                        }
                    }
                }
            }

            if (!string.IsNullOrEmpty(text))
            {
                text = st.Pre + text + st.Post;
            }

            if (oldText.Trim().StartsWith("- ") &&
                (oldText.Contains(Environment.NewLine + "- ") || oldText.Contains(Environment.NewLine + " - ")) && !text.Contains(Environment.NewLine))
            {
                text = text.TrimStart().TrimStart('-').TrimStart();
            }

            if (oldText != text)
            {
                // insert spaces before "-"
                text = text.Replace(Environment.NewLine + "- <i>", Environment.NewLine + "<i>- ");
                text = text.Replace(Environment.NewLine + "-<i>", Environment.NewLine + "<i>- ");
                if (text.StartsWith('-') && text.Length > 2 && text[1] != ' ' && text[1] != '-')
                {
                    text = text.Insert(1, " ");
                }
                if (text.StartsWith("<i>-") && text.Length > 5 && text[4] != ' ' && text[4] != '-')
                {
                    text = text.Insert(4, " ");
                }
                if (text.Contains(Environment.NewLine + "-"))
                {
                    int index = text.IndexOf(Environment.NewLine + "-", StringComparison.Ordinal);
                    if (index + 4 < text.Length && text[index + Environment.NewLine.Length + 1] != ' ' && text[index + Environment.NewLine.Length + 1] != '-')
                    {
                        text = text.Insert(index + Environment.NewLine.Length + 1, " ");
                    }
                }
                if (text.Contains(Environment.NewLine + "<i>-"))
                {
                    int index = text.IndexOf(Environment.NewLine + "<i>-", StringComparison.Ordinal);
                    if (index + 5 < text.Length && text[index + Environment.NewLine.Length + 4] != ' ' && text[index + Environment.NewLine.Length + 4] != '-')
                    {
                        text = text.Insert(index + Environment.NewLine.Length + 4, " ");
                    }
                }
            }
            return(text.Trim());
        }
Exemplo n.º 46
0
        public string RemoveColon(string text)
        {
            if (!Settings.RemoveTextBeforeColon)
            {
                return(text);
            }

            if (text.IndexOf(":", StringComparison.Ordinal) < 0)
            {
                return(text);
            }

            // House 7x01 line 52: and she would like you to do three things:
            // Okay or remove???
            if (text.IndexOf(':') > 0 && text.IndexOf(':') == text.Length - 1 && text != text.ToUpper())
            {
                return(text);
            }

            string newText = string.Empty;

            string[] parts               = text.Trim().Split(Utilities.NewLineChars, StringSplitOptions.RemoveEmptyEntries);
            int      noOfNames           = 0;
            int      count               = 0;
            bool     removedInFirstLine  = false;
            bool     removedInSecondLine = false;

            foreach (string s in parts)
            {
                int indexOfColon = s.IndexOf(":", StringComparison.Ordinal);
                if (indexOfColon > 0)
                {
                    string pre = s.Substring(0, indexOfColon);
                    if (Settings.RemoveTextBeforeColonOnlyUppercase && pre.Replace("<i>", string.Empty) != pre.Replace("<i>", string.Empty).ToUpper())
                    {
                        newText = newText + Environment.NewLine + s;
                        newText = newText.Trim();
                    }
                    else
                    {
                        StripableText st = new StripableText(pre);
                        if (count == 1 && Utilities.CountTagInText(text, Environment.NewLine) == 1 && removedInFirstLine && Utilities.CountTagInText(s, ":") == 1 &&
                            !newText.EndsWith('.') && !newText.EndsWith('!') && !newText.EndsWith('?') && !newText.EndsWith(".</i>") && !newText.EndsWith("!</i>") && !newText.EndsWith("?</i>") &&
                            s != s.ToUpper())
                        {
                            if (pre.Contains("<i>") && s.Contains("</i>"))
                            {
                                newText = newText + Environment.NewLine + "<i>" + s;
                            }
                            else if (pre.Contains("<b>") && s.Contains("</b>"))
                            {
                                newText = newText + Environment.NewLine + "<b>" + s;
                            }
                            else if (pre.Contains('[') && s.Contains(']'))
                            {
                                newText = newText + Environment.NewLine + "[" + s;
                            }
                            else if (pre.Contains('(') && s.EndsWith(')'))
                            {
                                newText = newText + Environment.NewLine + "(" + s;
                            }
                            else
                            {
                                newText = newText + Environment.NewLine + s;
                            }
                        }
                        else if (count == 1 && Utilities.CountTagInText(text, Environment.NewLine) == 1 && indexOfColon > 15 && s.Substring(0, indexOfColon).Contains(' ') && Utilities.CountTagInText(s, ":") == 1 &&
                                 !newText.EndsWith('.') && !newText.EndsWith('!') && !newText.EndsWith('?') && !newText.EndsWith(".</i>") && !newText.EndsWith("!</i>") && !newText.EndsWith("?</i>") &&
                                 s != s.ToUpper())
                        {
                            if (pre.Contains("<i>") && s.Contains("</i>"))
                            {
                                newText = newText + Environment.NewLine + "<i>" + s;
                            }
                            else if (pre.Contains("<b>") && s.Contains("</b>"))
                            {
                                newText = newText + Environment.NewLine + "<b>" + s;
                            }
                            else if (pre.Contains('[') && s.Contains(']'))
                            {
                                newText = newText + Environment.NewLine + "[" + s;
                            }
                            else if (pre.Contains('(') && s.EndsWith(')'))
                            {
                                newText = newText + Environment.NewLine + "(" + s;
                            }
                            else
                            {
                                newText = newText + Environment.NewLine + s;
                            }
                        }
                        else if (Utilities.CountTagInText(s, ":") == 1)
                        {
                            bool remove = true;
                            if (indexOfColon > 0 && indexOfColon < s.Length - 1)
                            {
                                if ("1234567890".Contains(s.Substring(indexOfColon - 1, 1)) && "1234567890".Contains(s.Substring(indexOfColon + 1, 1)))
                                {
                                    remove = false;
                                }
                            }
                            if (s.StartsWith("Previously on") || s.StartsWith("<i>Previously on"))
                            {
                                remove = false;
                            }

                            if (remove && Settings.ColonSeparateLine)
                            {
                                if (indexOfColon == s.Length - 1 || s.Substring(indexOfColon + 1).StartsWith(Environment.NewLine))
                                {
                                    remove = true;
                                }
                                else
                                {
                                    remove = false;
                                }
                            }

                            if (remove)
                            {
                                string content = s.Substring(indexOfColon + 1).Trim();
                                if (content.Length > 0)
                                {
                                    if (pre.Contains("<i>") && content.Contains("</i>"))
                                    {
                                        newText = newText + Environment.NewLine + "<i>" + content;
                                    }
                                    else if (pre.Contains("<b>") && content.Contains("</b>"))
                                    {
                                        newText = newText + Environment.NewLine + "<b>" + content;
                                    }
                                    else if (pre.Contains('[') && content.Contains(']'))
                                    {
                                        newText = newText + Environment.NewLine + "[" + content;
                                    }
                                    else if (pre.Contains('(') && content.EndsWith(')'))
                                    {
                                        newText = newText + Environment.NewLine + "(" + content;
                                    }
                                    else
                                    {
                                        newText = newText + Environment.NewLine + content;
                                    }

                                    if (count == 0)
                                    {
                                        removedInFirstLine = true;
                                    }
                                    else if (count == 1)
                                    {
                                        removedInSecondLine = true;
                                    }
                                }
                                newText = newText.Trim();

                                if (text.StartsWith('(') && newText.EndsWith(')') && !newText.Contains('('))
                                {
                                    newText = newText.TrimEnd(')');
                                }
                                else if (newText.EndsWith("</i>") && text.StartsWith("<i>") && !newText.StartsWith("<i>"))
                                {
                                    newText = "<i>" + newText;
                                }
                                else if (newText.EndsWith("</b>") && text.StartsWith("<b>") && !newText.StartsWith("<b>"))
                                {
                                    newText = "<b>" + newText;
                                }

                                if (!IsHIDescription(st.StrippedText))
                                {
                                    noOfNames++;
                                }
                            }
                            else
                            {
                                newText = newText + Environment.NewLine + s;
                                newText = newText.Trim();
                                if (newText.EndsWith("</i>") && text.StartsWith("<i>") && !newText.StartsWith("<i>"))
                                {
                                    newText = "<i>" + newText;
                                }
                                else if (newText.EndsWith("</b>") && text.StartsWith("<b>") && !newText.StartsWith("<b>"))
                                {
                                    newText = "<b>" + newText;
                                }
                            }
                        }
                        else
                        {
                            string s2 = s;
                            for (int k = 0; k < 2; k++)
                            {
                                if (s2.Contains(':'))
                                {
                                    int    colonIndex = s2.IndexOf(":", StringComparison.Ordinal);
                                    string start      = s2.Substring(0, colonIndex);

                                    if (!Settings.RemoveTextBeforeColonOnlyUppercase || start == start.ToUpper())
                                    {
                                        int periodIndex = start.LastIndexOf(". ", StringComparison.Ordinal);
                                        int questIndex  = start.LastIndexOf("? ", StringComparison.Ordinal);
                                        int exclaIndex  = start.LastIndexOf("! ", StringComparison.Ordinal);
                                        int endIndex    = periodIndex;
                                        if (endIndex == -1 || questIndex > endIndex)
                                        {
                                            endIndex = questIndex;
                                        }
                                        if (endIndex == -1 || exclaIndex > endIndex)
                                        {
                                            endIndex = exclaIndex;
                                        }
                                        if (colonIndex > 0 && colonIndex < s2.Length - 1)
                                        {
                                            if ("1234567890".Contains(s2.Substring(colonIndex - 1, 1)) && "1234567890".Contains(s2.Substring(colonIndex + 1, 1)))
                                            {
                                                endIndex = -10;
                                            }
                                        }
                                        if (endIndex == -1)
                                        {
                                            s2 = s2.Remove(0, colonIndex - endIndex);
                                        }
                                        else if (endIndex > 0)
                                        {
                                            s2 = s2.Remove(endIndex + 1, colonIndex - endIndex);
                                        }
                                    }

                                    if (count == 0)
                                    {
                                        removedInFirstLine = true;
                                    }
                                    else if (count == 1)
                                    {
                                        removedInSecondLine = true;
                                    }
                                }
                            }
                            newText = newText + Environment.NewLine + s2;
                            newText = newText.Trim();
                        }
                    }
                }
                else
                {
                    newText = newText + Environment.NewLine + s;
                    newText = newText.Trim();

                    if (newText.EndsWith("</i>") && text.StartsWith("<i>") && !newText.StartsWith("<i>"))
                    {
                        newText = "<i>" + newText;
                    }
                    else if (newText.EndsWith("</b>") && text.StartsWith("<b>") && !newText.StartsWith("<b>"))
                    {
                        newText = "<b>" + newText;
                    }
                }
                count++;
            }
            newText = newText.Trim();
            if (noOfNames > 0 && Utilities.CountTagInText(newText, Environment.NewLine) == 1)
            {
                int      indexOfDialogChar = newText.IndexOf('-');
                bool     insertDash        = true;
                string[] arr = newText.Split(Utilities.NewLineChars, StringSplitOptions.RemoveEmptyEntries);
                if (arr.Length == 2 && arr[0].Length > 1 && arr[1].Length > 1)
                {
                    string arr0 = new StripableText(arr[0]).StrippedText;
                    string arr1 = new StripableText(arr[1]).StrippedText;

                    //line continuation?
                    if (arr0.Length > 0 && arr1.Length > 1 && (Utilities.LowercaseLetters + ",").Contains(arr0.Substring(arr0.Length - 1)) &&
                        Utilities.LowercaseLetters.Contains(arr1.Substring(0, 1)))
                    {
                        if (new StripableText(arr[1]).Pre.Contains("...") == false)
                        {
                            insertDash = false;
                        }
                    }

                    if (arr0.Length > 0 && arr1.Length > 1 && !(arr[0].EndsWith('.') || arr[0].EndsWith('!') || arr[0].EndsWith('?') || arr[0].EndsWith("</i>")) &&
                        !(new StripableText(arr[1]).Pre.Contains('-')))
                    {
                        insertDash = false;
                    }

                    if (removedInFirstLine && !removedInSecondLine && !text.StartsWith('-') && !text.StartsWith("<i>-"))
                    {
                        if (!insertDash || (!arr[1].StartsWith('-') && !arr[1].StartsWith("<i>-")))
                        {
                            insertDash = false;
                        }
                    }
                }

                if (insertDash)
                {
                    if (indexOfDialogChar < 0 || indexOfDialogChar > 4)
                    {
                        StripableText st = new StripableText(newText, "", "");
                        newText = st.Pre + "- " + st.StrippedText + st.Post;
                    }

                    int    indexOfNewLine = newText.IndexOf(Environment.NewLine, StringComparison.Ordinal);
                    string second         = newText.Substring(indexOfNewLine).Trim();
                    indexOfDialogChar = second.IndexOf('-');
                    if (indexOfDialogChar < 0 || indexOfDialogChar > 6)
                    {
                        StripableText st = new StripableText(second, "", "");
                        second  = st.Pre + "- " + st.StrippedText + st.Post;
                        newText = newText.Remove(indexOfNewLine) + Environment.NewLine + second;
                    }
                }
            }
            else if (!newText.Contains(Environment.NewLine) && newText.Contains('-'))
            {
                StripableText st = new StripableText(newText);
                if (st.Pre.Contains('-'))
                {
                    newText = st.Pre.Replace("-", string.Empty) + st.StrippedText + st.Post;
                }
            }
            else if (Utilities.CountTagInText(newText, Environment.NewLine) == 1 && removedInFirstLine == false && removedInSecondLine)
            {
                string noTags     = Utilities.RemoveHtmlTags(newText, true).Trim();
                bool   insertDash = noTags.StartsWith('-') && Utilities.CountTagInText(noTags, "-") == 1;
                if (insertDash)
                {
                    if (newText.Contains(Environment.NewLine + "<i>"))
                    {
                        newText = newText.Replace(Environment.NewLine + "<i>", Environment.NewLine + "<i>- ");
                    }
                    else
                    {
                        newText = newText.Replace(Environment.NewLine, Environment.NewLine + "- ");
                    }
                }
            }
            if (text.Contains("<i>") && !newText.Contains("<i>") && newText.EndsWith("</i>"))
            {
                newText = "<i>" + newText;
            }
            return(newText);
        }
Exemplo n.º 47
0
        private void MergeLinesWithContinuation()
        {
            var temp = new Subtitle();
            bool skipNext = false;
            for (int i = 0; i < _subtitle.Paragraphs.Count; i++)
            {
                Paragraph p = _subtitle.Paragraphs[i];
                if (!skipNext)
                {
                    Paragraph next = _subtitle.GetParagraphOrDefault(i + 1);

                    bool merge = !(p.Text.Contains(Environment.NewLine) || next == null);

                    if (merge && (p.Text.TrimEnd().EndsWith('!') || p.Text.TrimEnd().EndsWith('.')))
                    {
                        var st = new StripableText(p.Text);
                        if (st.StrippedText.Length > 0 && Utilities.UppercaseLetters.Contains(st.StrippedText[0].ToString(CultureInfo.InvariantCulture)))
                            merge = false;
                    }

                    if (merge && (p.Text.Length >= Configuration.Settings.General.SubtitleLineMaximumLength - 5 || next.Text.Length >= Configuration.Settings.General.SubtitleLineMaximumLength - 5))
                        merge = false;

                    if (merge)
                    {
                        temp.Paragraphs.Add(new Paragraph { Text = p.Text + Environment.NewLine + next.Text });
                        skipNext = true;
                    }
                    else
                    {
                        temp.Paragraphs.Add(new Paragraph(p));
                    }
                }
                else
                {
                    skipNext = false;
                }
            }
            _subtitle = temp;
        }
        public void Fix(Subtitle subtitle, IFixCallbacks callbacks)
        {
            var    language  = Configuration.Settings.Language.FixCommonErrors;
            string fixAction = language.FixUppercaseIInsideLowercaseWord;
            int    uppercaseIsInsideLowercaseWords = 0;

            for (int i = 0; i < subtitle.Paragraphs.Count; i++)
            {
                Paragraph p       = subtitle.Paragraphs[i];
                string    oldText = p.Text;

                Match match = ReAfterLowercaseLetter.Match(p.Text);
                while (match.Success)
                {
                    if (!(match.Index > 1 && p.Text.Substring(match.Index - 1, 2) == "Mc") && // irish names, McDonalds etc.
                        p.Text[match.Index + 1] == 'I' &&
                        callbacks.AllowFix(p, fixAction))
                    {
                        p.Text = p.Text.Substring(0, match.Index + 1) + "l";
                        if (match.Index + 2 < oldText.Length)
                        {
                            p.Text += oldText.Substring(match.Index + 2);
                        }

                        uppercaseIsInsideLowercaseWords++;
                        callbacks.AddFixToListView(p, fixAction, oldText, p.Text);
                    }
                    match = match.NextMatch();
                }

                var st = new StripableText(p.Text);
                match = ReBeforeLowercaseLetter.Match(st.StrippedText);
                while (match.Success)
                {
                    string word = GetWholeWord(st.StrippedText, match.Index);
                    if (!callbacks.IsName(word))
                    {
                        if (callbacks.AllowFix(p, fixAction))
                        {
                            if (word.Equals("internal", StringComparison.OrdinalIgnoreCase) ||
                                word.Equals("island", StringComparison.OrdinalIgnoreCase) ||
                                word.Equals("islands", StringComparison.OrdinalIgnoreCase))
                            {
                            }
                            else if (match.Index == 0)
                            {  // first letter in paragraph
                               //too risky! - perhaps if periods is fixed at the same time... or too complicated!?
                               //if (isLineContinuation)
                               //{
                               //    st.StrippedText = st.StrippedText.Remove(match.Index, 1).Insert(match.Index, "l");
                               //    p.Text = st.MergedString;
                               //    uppercaseIsInsideLowercaseWords++;
                               //    AddFixToListView(p, fixAction, oldText, p.Text);
                               //}
                            }
                            else
                            {
                                if (match.Index > 2 && st.StrippedText[match.Index - 1] == ' ')
                                {
                                    if ((Utilities.AllLettersAndNumbers + @",").Contains(st.StrippedText[match.Index - 2]) &&
                                        match.Length >= 2 && Utilities.LowercaseVowels.Contains(char.ToLower(match.Value[1])))
                                    {
                                        st.StrippedText = st.StrippedText.Remove(match.Index, 1).Insert(match.Index, "l");
                                        p.Text          = st.MergedString;
                                        uppercaseIsInsideLowercaseWords++;
                                        callbacks.AddFixToListView(p, fixAction, oldText, p.Text);
                                    }
                                }
                                else if (match.Index > Environment.NewLine.Length + 1 && Environment.NewLine.Contains(st.StrippedText[match.Index - 1]))
                                {
                                    if ((Utilities.AllLettersAndNumbers + @",").Contains(st.StrippedText[match.Index - Environment.NewLine.Length + 1]) &&
                                        match.Length >= 2 && Utilities.LowercaseVowels.Contains(match.Value[1]))
                                    {
                                        st.StrippedText = st.StrippedText.Remove(match.Index, 1).Insert(match.Index, "l");
                                        p.Text          = st.MergedString;
                                        uppercaseIsInsideLowercaseWords++;
                                        callbacks.AddFixToListView(p, fixAction, oldText, p.Text);
                                    }
                                }
                                else if (match.Index > 1 && ((st.StrippedText[match.Index - 1] == '\"') || (st.StrippedText[match.Index - 1] == '\'') ||
                                                             (st.StrippedText[match.Index - 1] == '>') || (st.StrippedText[match.Index - 1] == '-')))
                                {
                                }
                                else
                                {
                                    var before = '\0';
                                    var after  = '\0';
                                    if (match.Index > 0)
                                    {
                                        before = st.StrippedText[match.Index - 1];
                                    }
                                    if (match.Index < st.StrippedText.Length - 2)
                                    {
                                        after = st.StrippedText[match.Index + 1];
                                    }
                                    if (before != '\0' && char.IsUpper(before) && after != '\0' && char.IsLower(after) &&
                                        !Utilities.LowercaseVowels.Contains(char.ToLower(before)) && !Utilities.LowercaseVowels.Contains(after))
                                    {
                                        st.StrippedText = st.StrippedText.Remove(match.Index, 1).Insert(match.Index, "i");
                                        p.Text          = st.MergedString;
                                        uppercaseIsInsideLowercaseWords++;
                                        callbacks.AddFixToListView(p, fixAction, oldText, p.Text);
                                    }
                                    else if (@"‘’¡¿„“()[]♪'. @".Contains(before) && !Utilities.LowercaseVowels.Contains(char.ToLower(after)))
                                    {
                                    }
                                    else
                                    {
                                        st.StrippedText = st.StrippedText.Remove(match.Index, 1).Insert(match.Index, "l");
                                        p.Text          = st.MergedString;
                                        uppercaseIsInsideLowercaseWords++;
                                        callbacks.AddFixToListView(p, fixAction, oldText, p.Text);
                                    }
                                }
                            }
                        }
                    }
                    match = match.NextMatch();
                }
            }
            callbacks.UpdateFixStatus(uppercaseIsInsideLowercaseWords, language.FixUppercaseIInsindeLowercaseWords, language.XUppercaseIsFoundInsideLowercaseWords);
        }
Exemplo n.º 49
0
        public string RemoveInterjections(string text)
        {
            string oldText = text;

            string[] arr = Configuration.Settings.Tools.Interjections.Split(new[] { ';' }, StringSplitOptions.RemoveEmptyEntries);
            if (_interjectionList == null)
            {
                _interjectionList = new List <string>();
                foreach (string s in arr)
                {
                    if (s.Length > 0)
                    {
                        if (!_interjectionList.Contains(s))
                        {
                            _interjectionList.Add(s);
                        }

                        string lower = s.ToLower();
                        if (!_interjectionList.Contains(lower))
                        {
                            _interjectionList.Add(lower);
                        }

                        string upper = s.ToUpper();
                        if (!_interjectionList.Contains(upper))
                        {
                            _interjectionList.Add(upper);
                        }

                        string pascalCasing = s.Substring(0, 1).ToUpper() + s.Remove(0, 1);
                        if (!_interjectionList.Contains(pascalCasing))
                        {
                            _interjectionList.Add(pascalCasing);
                        }
                    }
                }
                _interjectionList.Sort(CompareLength);
            }

            bool doRepeat = true;

            while (doRepeat)
            {
                doRepeat = false;
                foreach (string s in _interjectionList)
                {
                    if (text.Contains(s))
                    {
                        var regex = new Regex("\\b" + s + "\\b");
                        var match = regex.Match(text);
                        if (match.Success)
                        {
                            int    index = match.Index;
                            string temp  = text.Remove(index, s.Length);

                            string pre = string.Empty;
                            if (index > 0)
                            {
                                doRepeat = true;
                            }

                            bool removeAfter = true;

                            if (temp.Length > index - s.Length + 3 && index > s.Length)
                            {
                                if (temp.Substring(index - s.Length + 1, 3) == ", !")
                                {
                                    temp        = temp.Remove(index - s.Length + 1, 2);
                                    removeAfter = false;
                                }
                                else if (temp.Substring(index - s.Length + 1, 3) == ", ?")
                                {
                                    temp        = temp.Remove(index - s.Length + 1, 2);
                                    removeAfter = false;
                                }
                                else if (temp.Substring(index - s.Length + 1, 3) == ", .")
                                {
                                    temp        = temp.Remove(index - s.Length + 1, 2);
                                    removeAfter = false;
                                }
                            }
                            if (removeAfter && temp.Length > index - s.Length + 2 && index > s.Length)
                            {
                                if (temp.Substring(index - s.Length, 3) == ", !")
                                {
                                    temp        = temp.Remove(index - s.Length, 2);
                                    removeAfter = false;
                                }
                                else if (temp.Substring(index - s.Length, 3) == ", ?")
                                {
                                    temp        = temp.Remove(index - s.Length, 2);
                                    removeAfter = false;
                                }
                                else if (temp.Substring(index - s.Length, 3) == ", .")
                                {
                                    temp        = temp.Remove(index - s.Length, 2);
                                    removeAfter = false;
                                }
                            }
                            if (removeAfter && temp.Length > index - s.Length + 2 && index > s.Length)
                            {
                                if (temp.Substring(index - s.Length + 1, 2) == "-!")
                                {
                                    temp        = temp.Remove(index - s.Length + 1, 1);
                                    removeAfter = false;
                                }
                                else if (temp.Substring(index - s.Length + 1, 2) == "-?")
                                {
                                    temp        = temp.Remove(index - s.Length + 1, 1);
                                    removeAfter = false;
                                }
                                else if (temp.Substring(index - s.Length + 1, 2) == "-.")
                                {
                                    temp        = temp.Remove(index - s.Length + 1, 1);
                                    removeAfter = false;
                                }
                            }

                            if (removeAfter)
                            {
                                if (index == 0)
                                {
                                    if (!string.IsNullOrEmpty(temp) && temp.StartsWith('-'))
                                    {
                                        temp = temp.Remove(0, 1).Trim();
                                    }
                                }
                                else if (index == 3 && !string.IsNullOrEmpty(temp) && temp.StartsWith("<i>-"))
                                {
                                    temp = temp.Remove(3, 1);
                                }
                                else if (index > 0)
                                {
                                    pre  = text.Substring(0, index);
                                    temp = temp.Remove(0, index);
                                    if (pre.EndsWith('-') && temp.StartsWith('-'))
                                    {
                                        temp = temp.Remove(0, 1);
                                    }
                                    if (pre.EndsWith("- ") && temp.StartsWith('-'))
                                    {
                                        temp = temp.Remove(0, 1);
                                    }
                                }

                                while (temp.Length > 0 && (temp.StartsWith(' ') || temp.StartsWith(',') || temp.StartsWith('.') || temp.StartsWith('!') || temp.StartsWith('?')))
                                {
                                    temp     = temp.Remove(0, 1);
                                    doRepeat = true;
                                }
                                if (temp.Length > 0 && s[0].ToString(CultureInfo.InvariantCulture) != s[0].ToString(CultureInfo.InvariantCulture).ToLower())
                                {
                                    temp     = temp.Remove(0, 1).Insert(0, temp[0].ToString(CultureInfo.InvariantCulture).ToUpper());
                                    doRepeat = true;
                                }

                                if (pre.EndsWith(' ') && temp.StartsWith('-'))
                                {
                                    temp = temp.Remove(0, 1);
                                }

                                temp = pre + temp;
                            }

                            if (temp.EndsWith(Environment.NewLine + "- "))
                            {
                                temp = temp.Remove(temp.Length - 4, 4);
                            }

                            var st = new StripableText(temp);
                            if (st.StrippedText.Length == 0)
                            {
                                return(string.Empty);
                            }

                            if (!temp.Contains(Environment.NewLine) && text.Contains(Environment.NewLine) && temp.StartsWith('-'))
                            {
                                temp = temp.Remove(0, 1).Trim();
                            }

                            text = temp;
                        }
                    }
                }
            }
            string[] lines = text.Split(Utilities.NewLineChars, StringSplitOptions.RemoveEmptyEntries);
            if (text != oldText && lines.Length == 2)
            {
                if (lines[0] == "-" && lines[1] == "-")
                {
                    return(string.Empty);
                }
                if (lines[0].StartsWith('-') && lines[0].Length > 1 && lines[1].Trim() == "-")
                {
                    return(lines[0].Remove(0, 1).Trim());
                }
                if (lines[1].StartsWith('-') && lines[1].Length > 1 && lines[0].Trim() == "-")
                {
                    return(lines[1].Remove(0, 1).Trim());
                }
                if (lines[0].Length > 1 && (lines[1] == "-") || lines[1] == "." || lines[1] == "!" || lines[1] == "?")
                {
                    if (oldText.Contains(Environment.NewLine + "-") && lines[0].StartsWith('-'))
                    {
                        lines[0] = lines[0].Remove(0, 1);
                    }
                    return(lines[0].Trim());
                }
            }

            return(text);
        }
        public void Fix(Subtitle subtitle, IFixCallbacks callbacks)
        {
            var language = Configuration.Settings.Language.FixCommonErrors;
            string fixAction = language.FixMissingPeriodAtEndOfLine;
            int missigPeriodsAtEndOfLine = 0;

            for (int i = 0; i < subtitle.Paragraphs.Count; i++)
            {
                Paragraph p = subtitle.Paragraphs[i];
                Paragraph next = subtitle.GetParagraphOrDefault(i + 1);
                string nextText = string.Empty;
                if (next != null)
                {
                    nextText = HtmlUtil.RemoveHtmlTags(next.Text, true).TrimStart('-', '"', '„').TrimStart();
                }
                bool isNextClose = next != null && next.StartTime.TotalMilliseconds - p.EndTime.TotalMilliseconds < 400;
                string tempNoHtml = HtmlUtil.RemoveHtmlTags(p.Text).TrimEnd();

                if (IsOneLineUrl(p.Text) || p.Text.Contains(ExpectedChars) || p.Text.EndsWith('\''))
                {
                    // ignore urls
                }
                else if (!string.IsNullOrEmpty(nextText) && next != null &&
                    next.Text.Length > 0 &&
                    Utilities.UppercaseLetters.Contains(nextText[0]) &&
                    tempNoHtml.Length > 0 &&
                    !ExpectedString1.Contains(tempNoHtml[tempNoHtml.Length - 1]))
                {
                    string tempTrimmed = tempNoHtml.TrimEnd().TrimEnd('\'', '"', '“', '”').TrimEnd();
                    if (tempTrimmed.Length > 0 && !ExpectedString2.Contains(tempTrimmed[tempTrimmed.Length - 1]) && p.Text != p.Text.ToUpper())
                    {
                        //don't end the sentence if the next word is an I word as they're always capped.
                        bool isNextCloseAndStartsWithI = isNextClose && (nextText.StartsWith("I ", StringComparison.Ordinal) ||
                                                                         nextText.StartsWith("I'", StringComparison.Ordinal));

                        if (!isNextCloseAndStartsWithI)
                        {
                            //test to see if the first word of the next line is a name
                            if (!callbacks.IsName(next.Text.Split(WordSplitChars)[0]) && callbacks.AllowFix(p, fixAction))
                            {
                                string oldText = p.Text;
                                if (p.Text.EndsWith('>'))
                                {
                                    int lastLessThan = p.Text.LastIndexOf('<');
                                    if (lastLessThan > 0)
                                        p.Text = p.Text.Insert(lastLessThan, ".");
                                }
                                else
                                {
                                    if (p.Text.EndsWith('“') && tempNoHtml.StartsWith('„'))
                                        p.Text = p.Text.TrimEnd('“') + ".“";
                                    else if (p.Text.EndsWith('"') && tempNoHtml.StartsWith('"'))
                                        p.Text = p.Text.TrimEnd('"') + ".\"";
                                    else
                                        p.Text += ".";
                                }
                                if (p.Text != oldText)
                                {
                                    missigPeriodsAtEndOfLine++;
                                    callbacks.AddFixToListView(p, fixAction, oldText, p.Text);
                                }
                            }
                        }
                    }
                }
                else if (next != null && !string.IsNullOrEmpty(p.Text) && Utilities.AllLettersAndNumbers.Contains(p.Text[p.Text.Length - 1]))
                {
                    if (p.Text != p.Text.ToUpper())
                    {
                        var st = new StripableText(next.Text);
                        if (st.StrippedText.Length > 0 && st.StrippedText != st.StrippedText.ToUpper() &&
                            Utilities.UppercaseLetters.Contains(st.StrippedText[0]))
                        {
                            if (callbacks.AllowFix(p, fixAction))
                            {
                                int j = p.Text.Length - 1;
                                while (j >= 0 && !@".!?¿¡".Contains(p.Text[j]))
                                    j--;
                                string endSign = ".";
                                if (j >= 0 && p.Text[j] == '¿')
                                    endSign = "?";
                                if (j >= 0 && p.Text[j] == '¡')
                                    endSign = "!";

                                string oldText = p.Text;
                                missigPeriodsAtEndOfLine++;
                                p.Text += endSign;
                                callbacks.AddFixToListView(p, fixAction, oldText, p.Text);
                            }
                        }
                    }
                }

                if (p.Text.Length > 4)
                {
                    int indexOfNewLine = p.Text.IndexOf(Environment.NewLine + " -", 3, StringComparison.Ordinal);
                    if (indexOfNewLine < 0)
                        indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "-", 3, StringComparison.Ordinal);
                    if (indexOfNewLine < 0)
                        indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "<i>-", 3, StringComparison.Ordinal);
                    if (indexOfNewLine < 0)
                        indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "<i> -", 3, StringComparison.Ordinal);
                    if (indexOfNewLine > 0 && Configuration.Settings.General.UppercaseLetters.Contains(char.ToUpper(p.Text[indexOfNewLine - 1])) && callbacks.AllowFix(p, fixAction))
                    {
                        string oldText = p.Text;

                        string text = p.Text.Substring(0, indexOfNewLine);
                        var st = new StripableText(text);
                        if (st.Pre.TrimEnd().EndsWith('¿')) // Spanish ¿
                            p.Text = p.Text.Insert(indexOfNewLine, "?");
                        else if (st.Pre.TrimEnd().EndsWith('¡')) // Spanish ¡
                            p.Text = p.Text.Insert(indexOfNewLine, "!");
                        else
                            p.Text = p.Text.Insert(indexOfNewLine, ".");

                        missigPeriodsAtEndOfLine++;
                        callbacks.AddFixToListView(p, fixAction, oldText, p.Text);
                    }
                }
            }
            callbacks.UpdateFixStatus(missigPeriodsAtEndOfLine, language.AddPeriods, language.XPeriodsAdded);
        }
Exemplo n.º 51
0
        private void FixSpanishInvertedLetter(char mark, string inverseMark, Paragraph p, Paragraph last, ref bool wasLastLineClosed, string fixAction, ref int fixCount, IFixCallbacks callbacks)
        {
            if (p.Text.Contains(mark))
            {
                bool skip = false;
                if (last != null && p.Text.Contains(mark) && !p.Text.Contains(inverseMark) && last.Text.Contains(inverseMark) && !last.Text.Contains(mark))
                {
                    skip = true;
                }

                if (!skip && Utilities.CountTagInText(p.Text, mark) == Utilities.CountTagInText(p.Text, inverseMark) &&
                    HtmlUtil.RemoveHtmlTags(p.Text).TrimStart(inverseMark[0]).Contains(inverseMark) == false &&
                    HtmlUtil.RemoveHtmlTags(p.Text).TrimEnd(mark).Contains(mark) == false)
                {
                    skip = true;
                }

                if (!skip)
                {
                    int startIndex = 0;
                    int markIndex  = p.Text.IndexOf(mark);
                    if (!wasLastLineClosed && ((p.Text.IndexOf('!') > 0 && p.Text.IndexOf('!') < markIndex) ||
                                               (p.Text.IndexOf('?') > 0 && p.Text.IndexOf('?') < markIndex) ||
                                               (p.Text.IndexOf('.') > 0 && p.Text.IndexOf('.') < markIndex)))
                    {
                        wasLastLineClosed = true;
                    }
                    while (markIndex > 0 && startIndex < p.Text.Length)
                    {
                        int inverseMarkIndex = p.Text.IndexOf(inverseMark, startIndex, StringComparison.Ordinal);
                        if (wasLastLineClosed && (inverseMarkIndex < 0 || inverseMarkIndex > markIndex))
                        {
                            if (callbacks.AllowFix(p, fixAction))
                            {
                                int j = markIndex - 1;

                                while (j > startIndex && (p.Text[j] == '.' || p.Text[j] == '!' || p.Text[j] == '?'))
                                {
                                    j--;
                                }

                                while (j > startIndex &&
                                       (p.Text[j] != '.' || IsSpanishAbbreviation(p.Text, j, callbacks)) &&
                                       p.Text[j] != '!' &&
                                       p.Text[j] != '?' &&
                                       !(j > 3 && p.Text.Substring(j - 3, 3) == Environment.NewLine + "-") &&
                                       !(j > 4 && p.Text.Substring(j - 4, 4) == Environment.NewLine + " -") &&
                                       !(j > 6 && p.Text.Substring(j - 6, 6) == Environment.NewLine + "<i>-"))
                                {
                                    j--;
                                }

                                if (@".!?".Contains(p.Text[j]))
                                {
                                    j++;
                                }
                                if (j + 3 < p.Text.Length && p.Text.Substring(j + 1, 2) == Environment.NewLine)
                                {
                                    j += 3;
                                }
                                else if (j + 2 < p.Text.Length && p.Text.Substring(j, 2) == Environment.NewLine)
                                {
                                    j += 2;
                                }
                                if (j >= startIndex)
                                {
                                    string part = p.Text.Substring(j, markIndex - j + 1);

                                    string speaker    = string.Empty;
                                    int    speakerEnd = part.IndexOf(')');
                                    if (part.StartsWith('(') && speakerEnd > 0 && speakerEnd < part.IndexOf(mark))
                                    {
                                        while (Environment.NewLine.Contains(part[speakerEnd + 1]))
                                        {
                                            speakerEnd++;
                                        }
                                        speaker = part.Substring(0, speakerEnd + 1);
                                        part    = part.Substring(speakerEnd + 1);
                                    }
                                    speakerEnd = part.IndexOf(']');
                                    if (part.StartsWith('[') && speakerEnd > 0 && speakerEnd < part.IndexOf(mark))
                                    {
                                        while (Environment.NewLine.Contains(part[speakerEnd + 1]))
                                        {
                                            speakerEnd++;
                                        }
                                        speaker = part.Substring(0, speakerEnd + 1);
                                        part    = part.Substring(speakerEnd + 1);
                                    }

                                    var st = new StripableText(part);
                                    if (j == 0 && mark == '!' && st.Pre == "¿" && Utilities.CountTagInText(p.Text, mark) == 1 && HtmlUtil.RemoveHtmlTags(p.Text).EndsWith(mark))
                                    {
                                        p.Text = inverseMark + p.Text;
                                    }
                                    else if (j == 0 && mark == '?' && st.Pre == "¡" && Utilities.CountTagInText(p.Text, mark) == 1 && HtmlUtil.RemoveHtmlTags(p.Text).EndsWith(mark))
                                    {
                                        p.Text = inverseMark + p.Text;
                                    }
                                    else
                                    {
                                        string temp       = inverseMark;
                                        int    addToIndex = 0;
                                        while (p.Text.Length > markIndex + 1 && p.Text[markIndex + 1] == mark &&
                                               Utilities.CountTagInText(p.Text, mark) > Utilities.CountTagInText(p.Text + temp, inverseMark))
                                        {
                                            temp    += inverseMark;
                                            st.Post += mark;
                                            markIndex++;
                                            addToIndex++;
                                        }

                                        p.Text     = p.Text.Remove(j, markIndex - j + 1).Insert(j, speaker + st.Pre + temp + st.StrippedText + st.Post);
                                        markIndex += addToIndex;
                                    }
                                }
                            }
                        }
                        else if (last != null && !wasLastLineClosed && inverseMarkIndex == p.Text.IndexOf(mark) && !last.Text.Contains(inverseMark))
                        {
                            string lastOldtext = last.Text;
                            int    idx         = last.Text.Length - 2;
                            while (idx > 0 && (last.Text.Substring(idx, 2) != ". ") && (last.Text.Substring(idx, 2) != "! ") && (last.Text.Substring(idx, 2) != "? "))
                            {
                                idx--;
                            }

                            last.Text = last.Text.Insert(idx, inverseMark);
                            fixCount++;
                            callbacks.AddFixToListView(last, fixAction, lastOldtext, last.Text);
                        }

                        startIndex = markIndex + 2;
                        if (startIndex < p.Text.Length)
                        {
                            markIndex = p.Text.IndexOf(mark, startIndex);
                        }
                        else
                        {
                            markIndex = -1;
                        }
                        wasLastLineClosed = true;
                    }
                }
                if (p.Text.EndsWith(mark + "...", StringComparison.Ordinal) && p.Text.Length > 4)
                {
                    p.Text = p.Text.Remove(p.Text.Length - 4, 4) + "..." + mark;
                }
            }
            else if (Utilities.CountTagInText(p.Text, inverseMark) == 1)
            {
                int idx = p.Text.IndexOf(inverseMark, StringComparison.Ordinal);
                while (idx < p.Text.Length && !@".!?".Contains(p.Text[idx]))
                {
                    idx++;
                }
                if (idx < p.Text.Length)
                {
                    p.Text = p.Text.Insert(idx, mark.ToString(CultureInfo.InvariantCulture));
                    if (p.Text.Contains("¡¿") && p.Text.Contains("!?"))
                    {
                        p.Text = p.Text.Replace("!?", "?!");
                    }
                    if (p.Text.Contains("¿¡") && p.Text.Contains("?!"))
                    {
                        p.Text = p.Text.Replace("?!", "!?");
                    }
                }
            }
        }
        public void Fix(Subtitle subtitle, IFixCallbacks callbacks)
        {
            var    language  = Configuration.Settings.Language.FixCommonErrors;
            string fixAction = language.StartWithUppercaseLetterAfterColon;
            int    noOfFixes = 0;

            for (int i = 0; i < subtitle.Paragraphs.Count; i++)
            {
                var       p         = new Paragraph(subtitle.Paragraphs[i]);
                Paragraph last      = subtitle.GetParagraphOrDefault(i - 1);
                string    oldText   = p.Text;
                int       skipCount = 0;

                if (last != null)
                {
                    string lastText = HtmlUtil.RemoveHtmlTags(last.Text);
                    if (lastText.EndsWith(':') || lastText.EndsWith(';'))
                    {
                        var st = new StripableText(p.Text);
                        if (st.StrippedText.Length > 0 && st.StrippedText[0] != char.ToUpper(st.StrippedText[0]))
                        {
                            p.Text = st.Pre + char.ToUpper(st.StrippedText[0]) + st.StrippedText.Substring(1) + st.Post;
                        }
                    }
                }

                if (oldText.Contains(ExpectedChars))
                {
                    bool lastWasColon = false;
                    for (int j = 0; j < p.Text.Length; j++)
                    {
                        var s = p.Text[j];
                        if (s == ':' || s == ';')
                        {
                            lastWasColon = true;
                        }
                        else if (lastWasColon)
                        {
                            // skip whitespace index
                            if (j + 2 < p.Text.Length && p.Text[j] == ' ')
                            {
                                s = p.Text[++j];
                            }

                            var startFromJ = p.Text.Substring(j);
                            if (startFromJ.Length > 3 && startFromJ[0] == '<' && startFromJ[2] == '>' && (startFromJ[1] == 'i' || startFromJ[1] == 'b' || startFromJ[1] == 'u'))
                            {
                                skipCount = 2;
                            }
                            else if (startFromJ.StartsWith("<font ", StringComparison.OrdinalIgnoreCase) && p.Text.Substring(j).Contains('>'))
                            {
                                skipCount = (j + startFromJ.IndexOf('>', 6)) - j;
                            }
                            else if (Helper.IsTurkishLittleI(s, callbacks.Encoding, callbacks.Language))
                            {
                                p.Text       = p.Text.Remove(j, 1).Insert(j, Helper.GetTurkishUppercaseLetter(s, callbacks.Encoding).ToString(CultureInfo.InvariantCulture));
                                lastWasColon = false;
                            }
                            else if (char.IsLower(s))
                            {
                                // iPhone
                                bool change = true;
                                if (s == 'i' && p.Text.Length > j + 1)
                                {
                                    if (p.Text[j + 1] == char.ToUpper(p.Text[j + 1]))
                                    {
                                        change = false;
                                    }
                                }
                                if (change)
                                {
                                    p.Text = p.Text.Remove(j, 1).Insert(j, char.ToUpper(s).ToString(CultureInfo.InvariantCulture));
                                }
                                lastWasColon = false;
                            }
                            else if (!(" " + Environment.NewLine).Contains(s))
                            {
                                lastWasColon = false;
                            }

                            // move the: 'j' pointer and reset skipCount to 0
                            if (skipCount > 0)
                            {
                                j        += skipCount;
                                skipCount = 0;
                            }
                        }
                    }
                }

                if (oldText != p.Text && callbacks.AllowFix(p, fixAction))
                {
                    noOfFixes++;
                    subtitle.Paragraphs[i].Text = p.Text;
                    callbacks.AddFixToListView(p, fixAction, oldText, p.Text);
                }
            }
            callbacks.UpdateFixStatus(noOfFixes, language.StartWithUppercaseLetterAfterColon, noOfFixes.ToString(CultureInfo.InvariantCulture));
        }