public void StrippableTextItalic()
     var st = new StrippableText("<i>Hi!</i>");
     Assert.AreEqual(st.Pre, "<i>");
     Assert.AreEqual(st.Post, "!</i>");
     Assert.AreEqual(st.StrippedText, "Hi");
 public void StrippableTextFontDontTouch()
     var st = new StrippableText("{MAN} Hi, how are you today!");
     Assert.AreEqual(st.Pre, "");
     Assert.AreEqual(st.Post, "!");
     Assert.AreEqual(st.StrippedText, "{MAN} Hi, how are you today");
 public void StrippableTextFont()
     var st = new StrippableText("<font color=\"red\">Hi!</font>");
     Assert.AreEqual(st.Pre, "<font color=\"red\">");
     Assert.AreEqual(st.Post, "!</font>");
     Assert.AreEqual(st.StrippedText, "Hi");
 public void StrippableTextAss()
     var st = new StrippableText("{\\an9}Hi!");
     Assert.AreEqual(st.Pre, "{\\an9}");
     Assert.AreEqual(st.Post, "!");
     Assert.AreEqual(st.StrippedText, "Hi");
 public void StrippableOnlyText()
     var st = new StrippableText("H");
     Assert.AreEqual(st.Pre, "");
     Assert.AreEqual(st.Post, "");
     Assert.AreEqual(st.StrippedText, "H");
        public string UppercaseNouns(string text)
            var textNoTags = HtmlUtil.RemoveHtmlTags(text, true);

            if (textNoTags != textNoTags.ToUpperInvariant() && !string.IsNullOrEmpty(text))
                var st = new StrippableText(text);

                st.FixCasing(_germanNouns, true, false, false, string.Empty);

                foreach (var regex in _regularExpressionList.Keys)
                    st.StrippedText = regex.Replace(st.StrippedText, _regularExpressionList[regex]);


        private string Fix(string text, string lastLine, List <string> nameList, CultureInfo subtitleCulture, double millisecondsFromLast)
            string original = text;

            if (FixNormal)
                if (FixNormalOnlyAllUppercase && text != text.ToUpper(subtitleCulture))

                if (text.Length > 1)
                    // first all to lower
                    text = text.ToLower(subtitleCulture).Trim();
                    text = text.FixExtraSpaces();
                    var st = new StrippableText(text);
                    st.FixCasing(nameList, false, true, true, lastLine, millisecondsFromLast); // fix all casing but names (that's a seperate option)
                    text = st.MergedString;
            else if (FixMakeUppercase)
                var st = new StrippableText(text);
                text = st.Pre + st.StrippedText.ToUpper(subtitleCulture) + st.Post;
                text = HtmlUtil.FixUpperTags(text); // tags inside text
            else if (FixMakeLowercase)
                text = text.ToLower(subtitleCulture);
            if (original != text)

        private void MergeLinesWithContinuation()
            var temp = new Subtitle();
            bool skipNext = false;
            for (int i = 0; i < _subtitle.Paragraphs.Count; i++)
                Paragraph p = _subtitle.Paragraphs[i];
                if (!skipNext)
                    Paragraph next = _subtitle.GetParagraphOrDefault(i + 1);

                    bool merge = !(p.Text.Contains(Environment.NewLine) || next == null) && Configuration.Settings.Tools.ListViewSyntaxMoreThanXLinesX > 1;

                    if (merge && (p.Text.TrimEnd().EndsWith('!') || p.Text.TrimEnd().EndsWith('.')))
                        var st = new StrippableText(p.Text);
                        if (st.StrippedText.Length > 0 && char.IsUpper(st.StrippedText[0]))
                            merge = false;

                    if (merge && (p.Text.Length >= Configuration.Settings.General.SubtitleLineMaximumLength - 5 || next.Text.Length >= Configuration.Settings.General.SubtitleLineMaximumLength - 5))
                        merge = false;

                    if (merge)
                        temp.Paragraphs.Add(new Paragraph { Text = p.Text + Environment.NewLine + next.Text });
                        skipNext = true;
                        temp.Paragraphs.Add(new Paragraph(p));
                    skipNext = false;
            _subtitle = temp;
        private void GeneratePreview()
            Cursor = Cursors.WaitCursor;
            foreach (Paragraph p in _subtitle.Paragraphs)
                string text = p.Text;
                foreach (ListViewItem item in listViewNames.Items)
                    string name = item.SubItems[1].Text;

                    string textNoTags = HtmlUtil.RemoveHtmlTags(text);
                    if (textNoTags != textNoTags.ToUpper())
                        if (item.Checked && text != null && text.Contains(name, StringComparison.OrdinalIgnoreCase) && name.Length > 1 && name != name.ToLower())
                            var st = new StrippableText(text);
                            st.FixCasing(new List<string> { name }, true, false, false, string.Empty);
                            text = st.MergedString;
                if (text != p.Text)
                    AddToPreviewListView(p, text);
            groupBoxLinesFound.Text = string.Format(Configuration.Settings.Language.ChangeCasingNames.LinesFoundX, listViewFixes.Items.Count);
            Cursor = Cursors.Default;
        private string FixCasing(string text, string lastLine, List<string> namesEtc)
            string original = text;
            if (radioButtonNormal.Checked)
                if (checkBoxOnlyAllUpper.Checked && text != text.ToUpper())
                    return text;

                if (text.Length > 1)
                    // first all to lower
                    text = text.ToLower().Trim();
                    text = text.FixExtraSpaces();
                    var st = new StrippableText(text);
                    st.FixCasing(namesEtc, false, true, true, lastLine); // fix all casing but names (that's a seperate option)
                    text = st.MergedString;
            else if (radioButtonUppercase.Checked)
                var st = new StrippableText(text);
                text = st.Pre + st.StrippedText.ToUpper() + st.Post;
                text = HtmlUtil.FixUpperTags(text); // tags inside text
            else if (radioButtonLowercase.Checked)
                text = text.ToLower();
            if (original != text)
            return text;
        public string FixOcrErrorsViaHardcodedRules(string input, string lastLine, HashSet<string> abbreviationList)
            if (!Configuration.Settings.Tools.OcrFixUseHardcodedRules)
                return input;

            input = input.Replace(",...", "...");

            if (input.StartsWith("..") && !input.StartsWith("...", StringComparison.Ordinal))
                input = "." + input;

            string pre = string.Empty;
            if (input.StartsWith("- ", StringComparison.Ordinal))
                pre = "- ";
                input = input.Remove(0, 2);
            else if (input.StartsWith('-'))
                pre = "-";
                input = input.Remove(0, 1);

            bool hasDotDot = input.Contains("..") || input.Contains(". .");
            if (hasDotDot)
                if (input.Length > 5 && input.StartsWith("..", StringComparison.Ordinal) && Utilities.AllLettersAndNumbers.Contains(input[2]))
                    input = "..." + input.Remove(0, 2);
                if (input.Length > 7 && input.StartsWith("<i>..", StringComparison.Ordinal) && Utilities.AllLettersAndNumbers.Contains(input[5]))
                    input = "<i>..." + input.Remove(0, 5);

                if (input.Length > 5 && input.StartsWith(".. ") && Utilities.AllLettersAndNumbers.Contains(input[3]))
                    input = "..." + input.Remove(0, 3);
                if (input.Length > 7 && input.StartsWith("<i>.. ", StringComparison.Ordinal) && Utilities.AllLettersAndNumbers.Contains(input[6]))
                    input = "<i>..." + input.Remove(0, 6);
                if (input.Contains(Environment.NewLine + ".. "))
                    input = input.Replace(Environment.NewLine + ".. ", Environment.NewLine + "...");
                if (input.Contains(Environment.NewLine + "<i>.. "))
                    input = input.Replace(Environment.NewLine + "<i>.. ", Environment.NewLine + "<i>...");

                if (input.StartsWith(". ..", StringComparison.Ordinal))
                    input = "..." + input.Remove(0, 4);
                if (input.StartsWith(".. .", StringComparison.Ordinal))
                    input = "..." + input.Remove(0, 4);
                if (input.StartsWith(". . ."))
                    input = "..." + input.Remove(0, 5);
                if (input.StartsWith("... ", StringComparison.Ordinal))
                    input = input.Remove(3, 1);

            input = pre + input;

            if (hasDotDot)
                if (input.StartsWith("<i>. ..", StringComparison.Ordinal))
                    input = "<i>..." + input.Remove(0, 7);
                if (input.StartsWith("<i>.. .", StringComparison.Ordinal))
                    input = "<i>..." + input.Remove(0, 7);

                if (input.StartsWith("<i>. . .", StringComparison.Ordinal))
                    input = "<i>..." + input.Remove(0, 8);
                if (input.StartsWith("<i>... ", StringComparison.Ordinal))
                    input = input.Remove(6, 1);
                if (input.StartsWith(". . <i>.", StringComparison.Ordinal))
                    input = "<i>..." + input.Remove(0, 8);

                if (input.StartsWith("...<i>", StringComparison.Ordinal) && (input.IndexOf("</i>", StringComparison.Ordinal) > input.IndexOf(' ')))
                    input = "<i>..." + input.Remove(0, 6);

                if (input.EndsWith(". ..", StringComparison.Ordinal))
                    input = input.Remove(input.Length - 4, 4) + "...";
                if (input.EndsWith(".. .", StringComparison.Ordinal))
                    input = input.Remove(input.Length - 4, 4) + "...";
                if (input.EndsWith(". . .", StringComparison.Ordinal))
                    input = input.Remove(input.Length - 5, 5) + "...";
                if (input.EndsWith(". ...", StringComparison.Ordinal))
                    input = input.Remove(input.Length - 5, 5) + "...";

                if (input.EndsWith(". ..</i>", StringComparison.Ordinal))
                    input = input.Remove(input.Length - 8, 8) + "...</i>";
                if (input.EndsWith(".. .</i>", StringComparison.Ordinal))
                    input = input.Remove(input.Length - 8, 8) + "...</i>";
                if (input.EndsWith(". . .</i>", StringComparison.Ordinal))
                    input = input.Remove(input.Length - 9, 9) + "...</i>";
                if (input.EndsWith(". ...</i>", StringComparison.Ordinal))
                    input = input.Remove(input.Length - 9, 9) + "...</i>";

                if (input.EndsWith(".</i> . .", StringComparison.Ordinal))
                    input = input.Remove(input.Length - 9, 9) + "...</i>";
                if (input.EndsWith(".</i>..", StringComparison.Ordinal))
                    input = input.Remove(input.Length - 7, 7) + "...</i>";
                input = input.Replace(".</i> . ." + Environment.NewLine, "...</i>" + Environment.NewLine);

                input = input.Replace(".. ?", "..?");
                input = input.Replace("..?", "...?");
                input = input.Replace("....?", "...?");

                input = input.Replace(".. !", "..!");
                input = input.Replace("..!", "...!");
                input = input.Replace("....!", "...!");

                input = input.Replace("... ?", "...?");
                input = input.Replace("... !", "...!");

                input = input.Replace("....", "...");
                input = input.Replace("....", "...");

                if (input.StartsWith("- ...", StringComparison.Ordinal) && lastLine != null && lastLine.EndsWith("...", StringComparison.Ordinal) && !(input.Contains(Environment.NewLine + "-")))
                    input = input.Remove(0, 2);
                if (input.StartsWith("-...", StringComparison.Ordinal) && lastLine != null && lastLine.EndsWith("...", StringComparison.Ordinal) && !(input.Contains(Environment.NewLine + "-")))
                    input = input.Remove(0, 1);

            if (input.Length > 2 && input[0] == '-' && char.IsUpper(input[1]))
                input = input.Insert(1, " ");

            if (input.Length > 5 && input.StartsWith("<i>-", StringComparison.Ordinal) && char.IsUpper(input[4]))
                input = input.Insert(4, " ");

            int nlLen = Environment.NewLine.Length;
            int idx = input.IndexOf(Environment.NewLine + "-", StringComparison.Ordinal);
            if (idx > 0 && idx + nlLen + 1 < input.Length && char.IsUpper(input[idx + nlLen + 1]))
                input = input.Insert(idx + Environment.NewLine.Length + 1, " ");

            idx = input.IndexOf(Environment.NewLine + "<i>-", StringComparison.Ordinal);
            if (idx > 0 && idx + nlLen + 4 < input.Length && char.IsUpper(input[idx + nlLen + 4]))
                input = input.Insert(idx + nlLen + 4, " ");

            if (string.IsNullOrEmpty(lastLine) ||
                lastLine.EndsWith('.') ||
                lastLine.EndsWith('!') ||
                lastLine.EndsWith('?') ||
                lastLine.EndsWith(']') ||
                lastLine = HtmlUtil.RemoveHtmlTags(lastLine);
                var st = new StrippableText(input);
                if (lastLine == null || (!lastLine.EndsWith("...", StringComparison.Ordinal) && !EndsWithAbbreviation(lastLine, abbreviationList)))
                    if (st.StrippedText.Length > 0 && !char.IsUpper(st.StrippedText[0]) && !st.Pre.EndsWith('[') && !st.Pre.EndsWith('(') && !st.Pre.EndsWith("..."))
                        if (!HtmlUtil.StartsWithUrl(st.StrippedText))
                            var uppercaseLetter = char.ToUpper(st.StrippedText[0]);
                            if (st.StrippedText.Length > 1 && uppercaseLetter == 'L' && @"abcdfghjklmnpqrstvwxz".Contains(st.StrippedText[1]))
                                uppercaseLetter = 'I';
                            if ((st.StrippedText.StartsWith("lo ", StringComparison.Ordinal) || st.StrippedText.Equals("lo.", StringComparison.Ordinal)) && _threeLetterIsoLanguageName.Equals("ita", StringComparison.Ordinal))
                                uppercaseLetter = 'I';
                            if ((st.StrippedText.StartsWith("k ", StringComparison.Ordinal) || st.StrippedText.StartsWith("m ", StringComparison.Ordinal) || st.StrippedText.StartsWith("n ", StringComparison.Ordinal) || st.StrippedText.StartsWith("r ", StringComparison.Ordinal) || st.StrippedText.StartsWith("s ", StringComparison.Ordinal) || st.StrippedText.StartsWith("t ", StringComparison.Ordinal)) &&
                                st.Pre.EndsWith('\'') && _threeLetterIsoLanguageName.Equals("nld", StringComparison.Ordinal))
                                uppercaseLetter = st.StrippedText[0];
                            if ((st.StrippedText.StartsWith("l-I'll ", StringComparison.Ordinal) || st.StrippedText.StartsWith("l-l'll ", StringComparison.Ordinal)) && _threeLetterIsoLanguageName.Equals("eng", StringComparison.Ordinal))
                                uppercaseLetter = 'I';
                                st.StrippedText = "I-I" + st.StrippedText.Remove(0, 3);
                            st.StrippedText = uppercaseLetter + st.StrippedText.Substring(1);
                            input = st.Pre + st.StrippedText + st.Post;

            // lines ending with ". should often end at ... (of no other quotes exists near by)
            if ((lastLine == null || !lastLine.Contains('"')) &&
                input.EndsWith("\".", StringComparison.Ordinal) && input.IndexOf('"') == input.LastIndexOf('"') && input.Length > 3)
                var lastChar = input[input.Length - 3];
                if (!char.IsDigit(lastChar))
                    int position = input.Length - 2;
                    input = input.Remove(position).Insert(position, "...");

            // change '<number><space>1' to '<number>1'
            if (input.Contains('1'))
                var match = RegexNumber1.Match(input);
                while (match.Success)
                    input = input.Remove(match.Index, 1);
                    match = RegexNumber1.Match(input, match.Index);

            // change '' to "
            input = input.Replace("''", "\"");

            // change 'sequeI of' to 'sequel of'
            if (input.Contains('I'))
                var match = RegexUppercaseI.Match(input);
                while (match.Success)
                    bool doFix = true;
                    if (match.Index >= 1 && input.Substring(match.Index - 1).StartsWith("Mc", StringComparison.Ordinal))
                        doFix = false;
                    if (match.Index >= 2 && input.Substring(match.Index - 2).StartsWith("Mac", StringComparison.Ordinal))
                        doFix = false;

                    if (doFix)
                        input = input.Substring(0, match.Index + 1) + "l" + input.Substring(match.Index + 2);

                    if (match.Index + 1 < input.Length)
                        match = RegexUppercaseI.Match(input, match.Index + 1);
                        break; // end while

            // change 'NlCE' to 'NICE'
            if (input.Contains('l'))
                var match = RegexLowercaseL.Match(input);
                while (match.Success)
                    input = input.Substring(0, match.Index + 1) + "I" + input.Substring(match.Index + 2);
                    match = RegexLowercaseL.Match(input);

            return input;
        private string FixLowercaseIToUppercaseI(string input, string lastLine)
            var sb = new StringBuilder();
            var lines = input.SplitToLines();
            for (int i = 0; i < lines.Length; i++)
                string l = lines[i];

                if (i > 0)
                    lastLine = lines[i - 1];
                lastLine = HtmlUtil.RemoveHtmlTags(lastLine);

                if (string.IsNullOrEmpty(lastLine) ||
                    lastLine.EndsWith('.') ||
                    lastLine.EndsWith('!') ||
                    var st = new StrippableText(l);
                    if (st.StrippedText.StartsWith('i') && !st.Pre.EndsWith('[') && !st.Pre.EndsWith('(') && !st.Pre.EndsWith("...", StringComparison.Ordinal))
                        if (string.IsNullOrEmpty(lastLine) || (!lastLine.EndsWith("...", StringComparison.Ordinal) && !EndsWithAbbreviation(lastLine, _abbreviationList)))
                            l = st.Pre + "I" + st.StrippedText.Remove(0, 1) + st.Post;
            return sb.ToString().TrimEnd('\r', '\n');
 public void StrippableTextItalic2()
     var st = new StrippableText("<i>O</i>");
     Assert.AreEqual(st.Pre, "<i>");
     Assert.AreEqual(st.Post, "</i>");
     Assert.AreEqual(st.StrippedText, "O");