public void StrippableTextChangeCasing3() { var st = new StrippableText("www.nikse.dk"); st.FixCasing(new System.Collections.Generic.List <string>(), false, true, true, "Bye."); Assert.AreEqual("www.nikse.dk", st.MergedString); }
public void StrippableTextChangeEllipsis() { var st = new StrippableText("…but never could."); st.FixCasing(new System.Collections.Generic.List <string>(), true, true, true, "Bye."); Assert.AreEqual("…but never could.", st.MergedString); }
public void StrippableTextChangeCasing2() { var st = new StrippableText("this is for www.nikse.dk! thank you."); st.FixCasing(new System.Collections.Generic.List <string>(), false, true, true, "Bye."); Assert.AreEqual(st.MergedString, "This is for www.nikse.dk! Thank you."); }
private void GeneratePreview() { Cursor = Cursors.WaitCursor; listViewFixes.BeginUpdate(); listViewFixes.Items.Clear(); foreach (var p in _subtitle.Paragraphs) { string text = p.Text; foreach (ListViewItem item in listViewNames.Items) { string name = item.SubItems[1].Text; string textNoTags = HtmlUtil.RemoveHtmlTags(text, true); if (textNoTags != textNoTags.ToUpperInvariant()) { if (item.Checked && text != null && text.Contains(name, StringComparison.OrdinalIgnoreCase) && name.Length > 1 && name != name.ToLowerInvariant()) { var st = new StrippableText(text); st.FixCasing(new List <string> { name }, true, false, false, string.Empty); text = st.MergedString; } } } if (text != p.Text) { AddToPreviewListView(p, text); } } listViewFixes.EndUpdate(); groupBoxLinesFound.Text = string.Format(Configuration.Settings.Language.ChangeCasingNames.LinesFoundX, listViewFixes.Items.Count); Cursor = Cursors.Default; }
private static void FixCasing(Subtitle subtitle, string language) { // fix casing normal var fixCasing = new FixCasing(language); fixCasing.Fix(subtitle); // fix casing for names var nameList = new NameList(Configuration.DictionariesDirectory, language, Configuration.Settings.WordLists.UseOnlineNames, Configuration.Settings.WordLists.NamesUrl); var nameListInclMulti = nameList.GetAllNames(); foreach (var paragraph in subtitle.Paragraphs) { string text = paragraph.Text; string textNoTags = HtmlUtil.RemoveHtmlTags(text, true); if (textNoTags != textNoTags.ToUpperInvariant()) { if (!string.IsNullOrEmpty(text)) { var st = new StrippableText(text); st.FixCasing(nameListInclMulti, true, false, false, string.Empty); paragraph.Text = st.MergedString; } } } }
public void Fix(Subtitle subtitle, IFixCallbacks callbacks) { var language = Configuration.Settings.Language.FixCommonErrors; string fixAction = language.StartWithUppercaseLetterAfterPeriodInsideParagraph; int noOfFixes = 0; for (int i = 0; i < subtitle.Paragraphs.Count; i++) { Paragraph p = subtitle.Paragraphs[i]; string oldText = p.Text; if (p.Text.Length > 3 && callbacks.AllowFix(p, fixAction)) { var st = new StrippableText(p.Text); string text = st.StrippedText; int start = text.IndexOfAny(ExpectedChars); while (start > 0 && start < text.Length) { char charAtPosition = text[start]; // Allow fixing lowercase letter after recursive ??? or !!!. if (charAtPosition != '.') // Dot is not include 'cause I don't capitalize word after the ellipses (...), right? { while (start + 1 < text.Length && text[start + 1] == charAtPosition) { start++; } } // Try to reach the last dot if char at *start is '.'. if (charAtPosition == '.') { while (start + 1 < text.Length && text[start + 1] == '.') { start++; } } if ((start + 3 < text.Length) && (text[start + 1] == ' ') && !IsAbbreviation(text, start, callbacks)) { var textBefore = text.Substring(0, start + 1); var subText = new StrippableText(text.Substring(start + 2)); text = text.Substring(0, start + 2) + subText.CombineWithPrePost(ToUpperFirstLetter(textBefore, subText.StrippedText, callbacks)); } start += 3; if (start < text.Length) { start = text.IndexOfAny(ExpectedChars, start); } } text = st.CombineWithPrePost(text); if (oldText != text) { p.Text = text; noOfFixes++; callbacks.AddFixToListView(p, fixAction, oldText, p.Text); } } } callbacks.UpdateFixStatus(noOfFixes, language.StartWithUppercaseLetterAfterPeriodInsideParagraph, noOfFixes.ToString(CultureInfo.InvariantCulture)); }
public void StrippableTextChangeCasing4() { var st = new StrippableText("- hi joe!" + Environment.NewLine + "- hi jane."); st.FixCasing(new System.Collections.Generic.List <string>(), false, true, true, "Bye."); Assert.AreEqual("- Hi joe!" + Environment.NewLine + "- Hi jane.", st.MergedString); }
public void StrippableTextItalic2() { var st = new StrippableText("<i>O</i>"); Assert.AreEqual("<i>", st.Pre); Assert.AreEqual("</i>", st.Post); Assert.AreEqual("O", st.StrippedText); }
public void StrippableTextItalic3() { var st = new StrippableText("<i>Hi!"); Assert.AreEqual("<i>", st.Pre); Assert.AreEqual("!", st.Post); Assert.AreEqual("Hi", st.StrippedText); }
public void StrippableOnlyText() { var st = new StrippableText("H"); Assert.AreEqual(st.Pre, ""); Assert.AreEqual(st.Post, ""); Assert.AreEqual(st.StrippedText, "H"); }
public void StrippableTextAss() { var st = new StrippableText("{\\an9}Hi!"); Assert.AreEqual(st.Pre, "{\\an9}"); Assert.AreEqual(st.Post, "!"); Assert.AreEqual(st.StrippedText, "Hi"); }
public void StrippableTextFont() { var st = new StrippableText("<font color=\"red\">Hi!</font>"); Assert.AreEqual("<font color=\"red\">", st.Pre); Assert.AreEqual("!</font>", st.Post); Assert.AreEqual("Hi", st.StrippedText); }
public void StrippableTextFontDontTouch() { var st = new StrippableText("{MAN} Hi, how are you today!"); Assert.AreEqual("", st.Pre); Assert.AreEqual("!", st.Post); Assert.AreEqual("{MAN} Hi, how are you today", st.StrippedText); }
public void StrippableOnlyPre3() { var st = new StrippableText("<i>"); Assert.AreEqual("<i>", st.Pre); Assert.AreEqual("", st.Post); Assert.AreEqual("", st.StrippedText); }
public void StrippableTextAss() { var st = new StrippableText("{\\an9}Hi!"); Assert.AreEqual("{\\an9}", st.Pre); Assert.AreEqual("!", st.Post); Assert.AreEqual("Hi", st.StrippedText); }
public void StrippableTextItalicAndMore() { var st = new StrippableText("<i>...<b>Hi!</b></i>"); Assert.AreEqual("<i>...<b>", st.Pre); Assert.AreEqual("!</b></i>", st.Post); Assert.AreEqual("Hi", st.StrippedText); }
public void StrippableTextItalicAndFont() { var st = new StrippableText("<i><font color=\"red\">Hi!</font></i>"); Assert.AreEqual(st.Pre, "<i><font color=\"red\">"); Assert.AreEqual(st.Post, "!</font></i>"); Assert.AreEqual(st.StrippedText, "Hi"); }
public void StrippableTextItalic() { var st = new StrippableText("<i>Hi!</i>"); Assert.AreEqual(st.Pre, "<i>"); Assert.AreEqual(st.Post, "!</i>"); Assert.AreEqual(st.StrippedText, "Hi"); }
public void StrippableTextChangeCasing7() { var st = new StrippableText("[ newsreel narrator ] ominous clouds of war."); st.FixCasing(new System.Collections.Generic.List <string> { "Joe", "Jane" }, true, true, true, "Bye."); Assert.AreEqual("[ Newsreel narrator ] Ominous clouds of war.", st.MergedString); }
public void StrippableTextChangeCasing10() { var st = new StrippableText("Uh, “thor and doctor jones”"); st.FixCasing(new System.Collections.Generic.List <string> { "Thor", "Jones" }, true, true, true, "Bye."); Assert.AreEqual("Uh, “Thor and doctor Jones”", st.MergedString); }
public void StrippableTextChangeCasing8() { var st = new StrippableText("andy: dad!"); st.FixCasing(new System.Collections.Generic.List <string> { "Joe", "Jane" }, true, true, true, "Bye."); Assert.AreEqual("Andy: Dad!", st.MergedString); }
public void StrippableTextChangeCasing9() { var st = new StrippableText("- quit! wait outside!" + Environment.NewLine + "- girl: miss, i've got a headache."); st.FixCasing(new System.Collections.Generic.List <string> { "Joe", "Jane" }, true, true, true, "Bye."); Assert.AreEqual("- Quit! Wait outside!" + Environment.NewLine + "- Girl: Miss, i've got a headache.", st.MergedString); }
public void Fix(Subtitle subtitle, IFixCallbacks callbacks) { var language = Configuration.Settings.Language.FixCommonErrors; string fixAction = language.StartWithUppercaseLetterAfterPeriodInsideParagraph; int noOfFixes = 0; for (int i = 0; i < subtitle.Paragraphs.Count; i++) { Paragraph p = subtitle.Paragraphs[i]; string oldText = p.Text; if (p.Text.Length > 3 && callbacks.AllowFix(p, fixAction)) { var st = new StrippableText(p.Text); string text = st.StrippedText; int start = text.IndexOfAny(ExpectedChars); while (start > 0 && start < text.Length) { char charAtPosition = text[start]; // Allow fixing lowercase letter after recursive ??? or !!!. if (charAtPosition != '.') // Dot is not include 'cause I don't capitalize word after the ellipses (...), right? { while (start + 1 < text.Length && text[start + 1] == charAtPosition) { start++; } } if ((start + 3 < text.Length) && (text[start + 1] == ' ') && !IsAbbreviation(text, start, callbacks)) { var subText = new StrippableText(text.Substring(start + 2)); text = text.Substring(0, start + 2) + subText.CombineWithPrePost(ToUpperFirstLetter(subText.StrippedText, callbacks)); } // Try to reach the last dot if char at *start is '.'. if (charAtPosition == '.') { while (start + 1 < text.Length && text[start + 1] == '.') { start++; } } start += 3; if (start < text.Length) start = text.IndexOfAny(ExpectedChars, start); } text = st.CombineWithPrePost(text); if (oldText != text) { p.Text = text; noOfFixes++; callbacks.AddFixToListView(p, fixAction, oldText, p.Text); } } } callbacks.UpdateFixStatus(noOfFixes, language.StartWithUppercaseLetterAfterPeriodInsideParagraph, noOfFixes.ToString(CultureInfo.InvariantCulture)); }
public static string FixHyphensRemoveForSingleLine(Subtitle subtitle, string input, int i) { if (string.IsNullOrEmpty(input)) { return(input); } if (!IsOneSentence(input)) { var arr = input.SplitToLines(); if (arr.Count == 2) { if (arr[0].Trim() == "-" && arr[1].Length > 2) { return(arr[1].TrimStart('-').TrimStart()); } else if (arr[1].Trim() == "-" && arr[0].Length > 2) { return(arr[0].TrimStart('-').TrimStart()); } } return(input); } var text = input; if (HasDash(text, "-")) { text = FixDash(subtitle, i, text, "-"); } else if (HasDash(text, "‐")) // unicode dash { text = FixDash(subtitle, i, text, "‐"); // unicode dash } else if (text.StartsWith("<font ", StringComparison.Ordinal)) { var prev = subtitle.GetParagraphOrDefault(i - 1); if (prev == null || !HtmlUtil.RemoveHtmlTags(prev.Text).TrimEnd().EndsWith('-') || HtmlUtil.RemoveHtmlTags(prev.Text).TrimEnd().EndsWith("--", StringComparison.Ordinal)) { var st = new StrippableText(text); if (st.Pre.EndsWith('-') || st.Pre.EndsWith("- ", StringComparison.Ordinal)) { text = st.Pre.TrimEnd('-', ' ') + st.StrippedText + st.Post; } } } return(text); }
private void MergeLinesWithContinuation() { var temp = new Subtitle(); bool skipNext = false; for (int i = 0; i < _subtitle.Paragraphs.Count; i++) { Paragraph p = _subtitle.Paragraphs[i]; if (!skipNext) { Paragraph next = _subtitle.GetParagraphOrDefault(i + 1); bool merge = !(p.Text.Contains(Environment.NewLine) || next == null) && Configuration.Settings.Tools.ListViewSyntaxMoreThanXLinesX > 1; if (merge && (p.Text.TrimEnd().EndsWith('!') || p.Text.TrimEnd().EndsWith('.'))) { var st = new StrippableText(p.Text); if (st.StrippedText.Length > 0 && char.IsUpper(st.StrippedText[0])) { merge = false; } } if (merge && (p.Text.Length >= Configuration.Settings.General.SubtitleLineMaximumLength - 5 || next.Text.Length >= Configuration.Settings.General.SubtitleLineMaximumLength - 5)) { merge = false; } if (merge) { temp.Paragraphs.Add(new Paragraph { Text = p.Text + Environment.NewLine + next.Text }); skipNext = true; } else { temp.Paragraphs.Add(new Paragraph(p)); } } else { skipNext = false; } } _subtitle = temp; }
private static Subtitle FixCasing(Subtitle inputSubtitle, string language) { var subtitle = new Subtitle(inputSubtitle); // fix casing normal var fixCasing = new FixCasing(language); fixCasing.Fix(subtitle); // fix casing for names var nameList = new NameList(Configuration.DictionariesDirectory, language, Configuration.Settings.WordLists.UseOnlineNames, Configuration.Settings.WordLists.NamesUrl); var nameListInclMulti = nameList.GetAllNames(); foreach (var paragraph in subtitle.Paragraphs) { var text = paragraph.Text; var textNoTags = HtmlUtil.RemoveHtmlTags(text, true); if (textNoTags != textNoTags.ToUpperInvariant() && !string.IsNullOrEmpty(text)) { var st = new StrippableText(text); st.FixCasing(nameListInclMulti, true, false, false, string.Empty); paragraph.Text = st.MergedString; } } // fix german nouns if (language == "de") { var germanNouns = new GermanNouns(); foreach (var paragraph in subtitle.Paragraphs) { paragraph.Text = germanNouns.UppercaseNouns(paragraph.Text); } } return(subtitle); }
private string FixCasing(string text, string lastLine, List <string> nameList) { string original = text; if (radioButtonNormal.Checked) { if (checkBoxOnlyAllUpper.Checked && text != text.ToUpper()) { return(text); } if (text.Length > 1) { // first all to lower text = text.ToLower().Trim(); text = text.FixExtraSpaces(); var st = new StrippableText(text); st.FixCasing(nameList, false, true, true, lastLine); // fix all casing but names (that's a seperate option) text = st.MergedString; } } else if (radioButtonUppercase.Checked) { var st = new StrippableText(text); text = st.Pre + st.StrippedText.ToUpper() + st.Post; text = HtmlUtil.FixUpperTags(text); // tags inside text } else if (radioButtonLowercase.Checked) { text = text.ToLower(); } if (original != text) { _noOfLinesChanged++; } return(text); }
private static string DoFix(Paragraph p, Paragraph prev, Encoding encoding, string language) { if (p.Text != null && p.Text.Length > 1) { string text = p.Text; string pre = string.Empty; if (text.Length > 4 && text.StartsWith("<i> ", StringComparison.Ordinal)) { pre = "<i> "; text = text.Substring(4); } if (text.Length > 3 && text.StartsWith("<i>", StringComparison.Ordinal)) { pre = "<i>"; text = text.Substring(3); } if (text.Length > 4 && text.StartsWith("<I> ", StringComparison.Ordinal)) { pre = "<I> "; text = text.Substring(4); } if (text.Length > 3 && text.StartsWith("<I>", StringComparison.Ordinal)) { pre = "<I>"; text = text.Substring(3); } if (text.Length > 2 && text.StartsWith('♪')) { pre = pre + "♪"; text = text.Substring(1); } if (text.Length > 2 && text.StartsWith(' ')) { pre = pre + " "; text = text.Substring(1); } if (text.Length > 2 && text.StartsWith('♫')) { pre = pre + "♫"; text = text.Substring(1); } if (text.Length > 2 && text.StartsWith(' ')) { pre = pre + " "; text = text.Substring(1); } var firstLetter = text[0]; string prevText = " ."; if (prev != null) prevText = HtmlUtil.RemoveHtmlTags(prev.Text); bool isPrevEndOfLine = Helper.IsPreviousTextEndOfParagraph(prevText); if (prevText == " .") isPrevEndOfLine = true; if ((!text.StartsWith("www.", StringComparison.Ordinal) && !text.StartsWith("http:", StringComparison.Ordinal) && !text.StartsWith("https:", StringComparison.Ordinal)) && (char.IsLower(firstLetter) || Helper.IsTurkishLittleI(firstLetter, encoding, language)) && !char.IsDigit(firstLetter) && isPrevEndOfLine) { bool isMatchInKnowAbbreviations = language == "en" && (prevText.EndsWith(" o.r.", StringComparison.Ordinal) || prevText.EndsWith(" a.m.", StringComparison.Ordinal) || prevText.EndsWith(" p.m.", StringComparison.Ordinal)); if (!isMatchInKnowAbbreviations) { if (Helper.IsTurkishLittleI(firstLetter, encoding, language)) p.Text = pre + Helper.GetTurkishUppercaseLetter(firstLetter, encoding) + text.Substring(1); else if (language == "en" && (text.StartsWith("l ", StringComparison.Ordinal) || text.StartsWith("l-I", StringComparison.Ordinal) || text.StartsWith("ls ", StringComparison.Ordinal) || text.StartsWith("lnterested") || text.StartsWith("lsn't ", StringComparison.Ordinal) || text.StartsWith("ldiot", StringComparison.Ordinal) || text.StartsWith("ln", StringComparison.Ordinal) || text.StartsWith("lm", StringComparison.Ordinal) || text.StartsWith("ls", StringComparison.Ordinal) || text.StartsWith("lt", StringComparison.Ordinal) || text.StartsWith("lf ", StringComparison.Ordinal) || text.StartsWith("lc", StringComparison.Ordinal) || text.StartsWith("l'm ", StringComparison.Ordinal)) || text.StartsWith("l am ", StringComparison.Ordinal)) // l > I p.Text = pre + "I" + text.Substring(1); else p.Text = pre + char.ToUpper(firstLetter) + text.Substring(1); } } } if (p.Text != null && p.Text.Contains(Environment.NewLine)) { var arr = p.Text.SplitToLines(); if (arr.Length == 2 && arr[1].Length > 1) { string text = arr[1]; string pre = string.Empty; if (text.Length > 4 && text.StartsWith("<i> ", StringComparison.Ordinal)) { pre = "<i> "; text = text.Substring(4); } if (text.Length > 3 && text.StartsWith("<i>", StringComparison.Ordinal)) { pre = "<i>"; text = text.Substring(3); } if (text.Length > 4 && text.StartsWith("<I> ", StringComparison.Ordinal)) { pre = "<I> "; text = text.Substring(4); } if (text.Length > 3 && text.StartsWith("<I>", StringComparison.Ordinal)) { pre = "<I>"; text = text.Substring(3); } if (text.Length > 2 && text.StartsWith('♪')) { pre = pre + "♪"; text = text.Substring(1); } if (text.Length > 2 && text.StartsWith(' ')) { pre = pre + " "; text = text.Substring(1); } if (text.Length > 2 && text.StartsWith('♫')) { pre = pre + "♫"; text = text.Substring(1); } if (text.Length > 2 && text.StartsWith(' ')) { pre = pre + " "; text = text.Substring(1); } char firstLetter = text[0]; string prevText = HtmlUtil.RemoveHtmlTags(arr[0]); bool isPrevEndOfLine = Helper.IsPreviousTextEndOfParagraph(prevText); if ((!text.StartsWith("www.", StringComparison.Ordinal) && !text.StartsWith("http:", StringComparison.Ordinal) && !text.StartsWith("https:", StringComparison.Ordinal)) && (char.IsLower(firstLetter) || Helper.IsTurkishLittleI(firstLetter, encoding, language)) && !prevText.EndsWith("...", StringComparison.Ordinal) && isPrevEndOfLine) { bool isMatchInKnowAbbreviations = language == "en" && (prevText.EndsWith(" o.r.", StringComparison.Ordinal) || prevText.EndsWith(" a.m.", StringComparison.Ordinal) || prevText.EndsWith(" p.m.", StringComparison.Ordinal)); if (!isMatchInKnowAbbreviations) { if (Helper.IsTurkishLittleI(firstLetter, encoding, language)) text = pre + Helper.GetTurkishUppercaseLetter(firstLetter, encoding) + text.Substring(1); else if (language == "en" && (text.StartsWith("l ", StringComparison.Ordinal) || text.StartsWith("l-I", StringComparison.Ordinal) || text.StartsWith("ls ") || text.StartsWith("lnterested") || text.StartsWith("lsn't ", StringComparison.Ordinal) || text.StartsWith("ldiot", StringComparison.Ordinal) || text.StartsWith("ln", StringComparison.Ordinal) || text.StartsWith("lm", StringComparison.Ordinal) || text.StartsWith("ls", StringComparison.Ordinal) || text.StartsWith("lt", StringComparison.Ordinal) || text.StartsWith("lf ", StringComparison.Ordinal) || text.StartsWith("lc", StringComparison.Ordinal) || text.StartsWith("l'm ", StringComparison.Ordinal)) || text.StartsWith("l am ", StringComparison.Ordinal)) // l > I text = pre + "I" + text.Substring(1); else text = pre + char.ToUpper(firstLetter) + text.Substring(1); p.Text = arr[0] + Environment.NewLine + text; } } arr = p.Text.SplitToLines(); if ((arr[0].StartsWith('-') || arr[0].StartsWith("<i>-", StringComparison.Ordinal)) && (arr[1].StartsWith('-') || arr[1].StartsWith("<i>-", StringComparison.Ordinal)) && !arr[0].StartsWith("--", StringComparison.Ordinal) && !arr[0].StartsWith("<i>--", StringComparison.Ordinal) && !arr[1].StartsWith("--", StringComparison.Ordinal) && !arr[1].StartsWith("<i>--", StringComparison.Ordinal)) { if (isPrevEndOfLine && arr[1].StartsWith("<i>- ", StringComparison.Ordinal) && arr[1].Length > 6) { p.Text = arr[0] + Environment.NewLine + "<i>- " + char.ToUpper(arr[1][5]) + arr[1].Remove(0, 6); } else if (isPrevEndOfLine && arr[1].StartsWith("- ", StringComparison.Ordinal) && arr[1].Length > 3) { p.Text = arr[0] + Environment.NewLine + "- " + char.ToUpper(arr[1][2]) + arr[1].Remove(0, 3); } arr = p.Text.SplitToLines(); prevText = " ."; if (prev != null && p.StartTime.TotalMilliseconds - 10000 < prev.EndTime.TotalMilliseconds) prevText = HtmlUtil.RemoveHtmlTags(prev.Text); bool isPrevLineEndOfLine = Helper.IsPreviousTextEndOfParagraph(prevText); if (isPrevLineEndOfLine && arr[0].StartsWith("<i>- ", StringComparison.Ordinal) && arr[0].Length > 6) { p.Text = "<i>- " + char.ToUpper(arr[0][5]) + arr[0].Remove(0, 6) + Environment.NewLine + arr[1]; } else if (isPrevLineEndOfLine && arr[0].StartsWith("- ", StringComparison.Ordinal) && arr[0].Length > 3) { p.Text = "- " + char.ToUpper(arr[0][2]) + arr[0].Remove(0, 3) + Environment.NewLine + arr[1]; } } } } if (p.Text != null && p.Text.Length > 4) { int len = 0; int indexOfNewLine = p.Text.IndexOf(Environment.NewLine + " -", 1, StringComparison.Ordinal); if (indexOfNewLine < 0) { indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "- <i> ♪", 1, StringComparison.Ordinal); len = "- <i> ♪".Length; } if (indexOfNewLine < 0) { indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "-", 1, StringComparison.Ordinal); len = "-".Length; } if (indexOfNewLine < 0) { indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "<i>-", 1, StringComparison.Ordinal); len = "<i>-".Length; } if (indexOfNewLine < 0) { indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "<i> -", 1, StringComparison.Ordinal); len = "<i> -".Length; } if (indexOfNewLine < 0) { indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "♪ -", 1, StringComparison.Ordinal); len = "♪ -".Length; } if (indexOfNewLine < 0) { indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "♪ <i> -", 1, StringComparison.Ordinal); len = "♪ <i> -".Length; } if (indexOfNewLine < 0) { indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "♪ <i>-", 1, StringComparison.Ordinal); len = "♪ <i>-".Length; } if (indexOfNewLine > 0) { string text = p.Text.Substring(indexOfNewLine + len); var st = new StrippableText(text); if (st.StrippedText.Length > 0 && Helper.IsTurkishLittleI(st.StrippedText[0], encoding, language) && !st.Pre.EndsWith('[') && !st.Pre.Contains("...")) { text = st.Pre + Helper.GetTurkishUppercaseLetter(st.StrippedText[0], encoding) + st.StrippedText.Substring(1) + st.Post; p.Text = p.Text.Remove(indexOfNewLine + len).Insert(indexOfNewLine + len, text); } else if (st.StrippedText.Length > 0 && st.StrippedText[0] != char.ToUpper(st.StrippedText[0]) && !st.Pre.EndsWith('[') && !st.Pre.Contains("...")) { text = st.Pre + char.ToUpper(st.StrippedText[0]) + st.StrippedText.Substring(1) + st.Post; p.Text = p.Text.Remove(indexOfNewLine + len).Insert(indexOfNewLine + len, text); } } } return p.Text; }
public void Fix(Subtitle subtitle, IFixCallbacks callbacks) { var language = Configuration.Settings.Language.FixCommonErrors; string fixAction = language.FixUppercaseIInsideLowercaseWord; int uppercaseIsInsideLowercaseWords = 0; for (int i = 0; i < subtitle.Paragraphs.Count; i++) { Paragraph p = subtitle.Paragraphs[i]; string oldText = p.Text; var st = new StrippableText(p.Text); Match match = ReAfterLowercaseLetter.Match(st.StrippedText); while (match.Success) { if (!(match.Index > 1 && st.StrippedText.Substring(match.Index - 1, 2) == "Mc") && // irish names, McDonalds etc. st.StrippedText[match.Index + 1] == 'I' && callbacks.AllowFix(p, fixAction)) { string word = GetWholeWord(st.StrippedText, match.Index); if (!callbacks.IsName(word)) { var old = st.StrippedText; st.StrippedText = st.StrippedText.Substring(0, match.Index + 1) + "l"; if (match.Index + 2 < old.Length) { st.StrippedText += old.Substring(match.Index + 2); } p.Text = st.MergedString; st = new StrippableText(p.Text); uppercaseIsInsideLowercaseWords++; callbacks.AddFixToListView(p, fixAction, oldText, p.Text); match = ReAfterLowercaseLetter.Match(st.StrippedText, match.Index); } else { match = match.NextMatch(); } } else { match = match.NextMatch(); } } match = ReBeforeLowercaseLetter.Match(st.StrippedText); while (match.Success) { string word = GetWholeWord(st.StrippedText, match.Index); if (!callbacks.IsName(word)) { if (callbacks.AllowFix(p, fixAction)) { if (word.Equals("internal", StringComparison.OrdinalIgnoreCase) || word.Equals("island", StringComparison.OrdinalIgnoreCase) || word.Equals("islands", StringComparison.OrdinalIgnoreCase)) { } else if (match.Index == 0) { // first letter in paragraph //too risky! - perhaps if periods is fixed at the same time... or too complicated!? //if (isLineContinuation) //{ // st.StrippedText = st.StrippedText.Remove(match.Index, 1).Insert(match.Index, "l"); // p.Text = st.MergedString; // uppercaseIsInsideLowercaseWords++; // AddFixToListView(p, fixAction, oldText, p.Text); //} } else { if (match.Index > 2 && st.StrippedText[match.Index - 1] == ' ') { if ((Utilities.AllLettersAndNumbers + @",").Contains(st.StrippedText[match.Index - 2]) && match.Length >= 2 && Utilities.LowercaseVowels.Contains(char.ToLower(match.Value[1]))) { st.StrippedText = st.StrippedText.Remove(match.Index, 1).Insert(match.Index, "l"); p.Text = st.MergedString; uppercaseIsInsideLowercaseWords++; callbacks.AddFixToListView(p, fixAction, oldText, p.Text); } } else if (match.Index > Environment.NewLine.Length + 1 && Environment.NewLine.Contains(st.StrippedText[match.Index - 1])) { if ((Utilities.AllLettersAndNumbers + @",").Contains(st.StrippedText[match.Index - Environment.NewLine.Length + 1]) && match.Length >= 2 && Utilities.LowercaseVowels.Contains(match.Value[1])) { st.StrippedText = st.StrippedText.Remove(match.Index, 1).Insert(match.Index, "l"); p.Text = st.MergedString; uppercaseIsInsideLowercaseWords++; callbacks.AddFixToListView(p, fixAction, oldText, p.Text); } } else if (match.Index > 1 && "\"'<>()[]{}-—,.‘’¡¿„“()[]♪@".Contains(st.StrippedText[match.Index - 1])) { } else { var before = '\0'; var after = '\0'; if (match.Index > 0) { before = st.StrippedText[match.Index - 1]; } if (match.Index < st.StrippedText.Length - 2) { after = st.StrippedText[match.Index + 1]; } if (before != '\0' && char.IsUpper(before) && after != '\0' && char.IsLower(after) && !Utilities.LowercaseVowels.Contains(char.ToLower(before)) && !Utilities.LowercaseVowels.Contains(after)) { st.StrippedText = st.StrippedText.Remove(match.Index, 1).Insert(match.Index, "i"); p.Text = st.MergedString; uppercaseIsInsideLowercaseWords++; callbacks.AddFixToListView(p, fixAction, oldText, p.Text); } else if (@"‘’¡¿„“()[]♪'. @".Contains(before) && !Utilities.LowercaseVowels.Contains(char.ToLower(after))) { } else { var ok = true; if (match.Index >= 2 && st.StrippedText.Substring(match.Index - 2, 2) == "Mc") { ok = false; } if (ok) { st.StrippedText = st.StrippedText.Remove(match.Index, 1).Insert(match.Index, "l"); p.Text = st.MergedString; uppercaseIsInsideLowercaseWords++; callbacks.AddFixToListView(p, fixAction, oldText, p.Text); } } } } } } match = match.NextMatch(); } } callbacks.UpdateFixStatus(uppercaseIsInsideLowercaseWords, language.FixUppercaseIInsindeLowercaseWords, language.XUppercaseIsFoundInsideLowercaseWords); }
public void Fix(Subtitle subtitle, IFixCallbacks callbacks) { var language = Configuration.Settings.Language.FixCommonErrors; string fixAction = language.FixMissingPeriodAtEndOfLine; int missigPeriodsAtEndOfLine = 0; for (int i = 0; i < subtitle.Paragraphs.Count; i++) { Paragraph p = subtitle.Paragraphs[i]; Paragraph next = subtitle.GetParagraphOrDefault(i + 1); string nextText = string.Empty; if (next != null) { nextText = HtmlUtil.RemoveHtmlTags(next.Text, true).TrimStart('-', '"', '„').TrimStart(); } bool isNextClose = next != null && next.StartTime.TotalMilliseconds - p.EndTime.TotalMilliseconds < 400; string tempNoHtml = HtmlUtil.RemoveHtmlTags(p.Text).TrimEnd(); if (IsOneLineUrl(p.Text) || p.Text.Contains(ExpectedChars) || p.Text.EndsWith('\'')) { // ignore urls } else if (!string.IsNullOrEmpty(nextText) && next != null && next.Text.Length > 0 && char.IsUpper(nextText[0]) && tempNoHtml.Length > 0 && !ExpectedString1.Contains(tempNoHtml[tempNoHtml.Length - 1])) { string tempTrimmed = tempNoHtml.TrimEnd().TrimEnd('\'', '"', '“', '”').TrimEnd(); if (tempTrimmed.Length > 0 && !ExpectedString2.Contains(tempTrimmed[tempTrimmed.Length - 1]) && p.Text != p.Text.ToUpper()) { //don't end the sentence if the next word is an I word as they're always capped. bool isNextCloseAndStartsWithI = isNextClose && (nextText.StartsWith("I ", StringComparison.Ordinal) || nextText.StartsWith("I'", StringComparison.Ordinal)); if (!isNextCloseAndStartsWithI) { //test to see if the first word of the next line is a name if (!callbacks.IsName(next.Text.Split(WordSplitChars)[0]) && callbacks.AllowFix(p, fixAction)) { string oldText = p.Text; if (p.Text.EndsWith('>')) { int lastLessThan = p.Text.LastIndexOf('<'); if (lastLessThan > 0) p.Text = p.Text.Insert(lastLessThan, "."); } else { if (p.Text.EndsWith('“') && tempNoHtml.StartsWith('„')) p.Text = p.Text.TrimEnd('“') + ".“"; else if (p.Text.EndsWith('"') && tempNoHtml.StartsWith('"')) p.Text = p.Text.TrimEnd('"') + ".\""; else p.Text += "."; } if (p.Text != oldText) { missigPeriodsAtEndOfLine++; callbacks.AddFixToListView(p, fixAction, oldText, p.Text); } } } } } else if (next != null && !string.IsNullOrEmpty(p.Text) && Utilities.AllLettersAndNumbers.Contains(p.Text[p.Text.Length - 1])) { if (p.Text != p.Text.ToUpper()) { var st = new StrippableText(next.Text); if (st.StrippedText.Length > 0 && st.StrippedText != st.StrippedText.ToUpper() && char.IsUpper(st.StrippedText[0])) { if (callbacks.AllowFix(p, fixAction)) { int j = p.Text.Length - 1; while (j >= 0 && !@".!?¿¡".Contains(p.Text[j])) j--; string endSign = "."; if (j >= 0 && p.Text[j] == '¿') endSign = "?"; if (j >= 0 && p.Text[j] == '¡') endSign = "!"; string oldText = p.Text; missigPeriodsAtEndOfLine++; p.Text += endSign; callbacks.AddFixToListView(p, fixAction, oldText, p.Text); } } } } if (p.Text.Length > 4) { int indexOfNewLine = p.Text.IndexOf(Environment.NewLine + " -", 3, StringComparison.Ordinal); if (indexOfNewLine < 0) indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "-", 3, StringComparison.Ordinal); if (indexOfNewLine < 0) indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "<i>-", 3, StringComparison.Ordinal); if (indexOfNewLine < 0) indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "<i> -", 3, StringComparison.Ordinal); if (indexOfNewLine > 0 && char.IsUpper(char.ToUpper(p.Text[indexOfNewLine - 1])) && callbacks.AllowFix(p, fixAction)) { string oldText = p.Text; string text = p.Text.Substring(0, indexOfNewLine); var st = new StrippableText(text); if (st.Pre.TrimEnd().EndsWith('¿')) // Spanish ¿ p.Text = p.Text.Insert(indexOfNewLine, "?"); else if (st.Pre.TrimEnd().EndsWith('¡')) // Spanish ¡ p.Text = p.Text.Insert(indexOfNewLine, "!"); else p.Text = p.Text.Insert(indexOfNewLine, "."); missigPeriodsAtEndOfLine++; callbacks.AddFixToListView(p, fixAction, oldText, p.Text); } } } callbacks.UpdateFixStatus(missigPeriodsAtEndOfLine, language.AddPeriods, language.XPeriodsAdded); }
private void FixSpanishInvertedLetter(char mark, string inverseMark, Paragraph p, Paragraph last, ref bool wasLastLineClosed, string fixAction, ref int fixCount, IFixCallbacks callbacks) { if (p.Text.Contains(mark)) { bool skip = last != null && !p.Text.Contains(inverseMark) && last.Text.Contains(inverseMark) && !last.Text.Contains(mark); if (!skip && Utilities.CountTagInText(p.Text, mark) == Utilities.CountTagInText(p.Text, inverseMark) && HtmlUtil.RemoveHtmlTags(p.Text).TrimStart(inverseMark[0]).Contains(inverseMark) == false && HtmlUtil.RemoveHtmlTags(p.Text).TrimEnd(mark).Contains(mark) == false) { skip = true; } if (!skip) { int startIndex = 0; int markIndex = p.Text.IndexOf(mark); if (!wasLastLineClosed && ((p.Text.IndexOf('!') > 0 && p.Text.IndexOf('!') < markIndex) || (p.Text.IndexOf('?') > 0 && p.Text.IndexOf('?') < markIndex) || (p.Text.IndexOf('.') > 0 && p.Text.IndexOf('.') < markIndex))) { wasLastLineClosed = true; } while (markIndex > 0 && startIndex < p.Text.Length) { int inverseMarkIndex = p.Text.IndexOf(inverseMark, startIndex, StringComparison.Ordinal); if (wasLastLineClosed && (inverseMarkIndex < 0 || inverseMarkIndex > markIndex)) { if (callbacks.AllowFix(p, fixAction)) { int j = markIndex - 1; while (j > startIndex && (p.Text[j] == '.' || p.Text[j] == '!' || p.Text[j] == '?')) { j--; } while (j > startIndex && (p.Text[j] != '.' || IsSpanishAbbreviation(p.Text, j, callbacks)) && p.Text[j] != '!' && p.Text[j] != '?' && !(j > 3 && p.Text.Substring(j - 3, 3) == Environment.NewLine + "-") && !(j > 4 && p.Text.Substring(j - 4, 4) == Environment.NewLine + " -") && !(j > 6 && p.Text.Substring(j - 6, 6) == Environment.NewLine + "<i>-")) { j--; } if (@".!?".Contains(p.Text[j])) { j++; } if (j + 3 < p.Text.Length && p.Text.Substring(j + 1, 2) == Environment.NewLine) { j += 3; } else if (j + 2 < p.Text.Length && p.Text.Substring(j, 2) == Environment.NewLine) { j += 2; } if (j >= startIndex) { string part = p.Text.Substring(j, markIndex - j + 1); string speaker = string.Empty; int speakerEnd = part.IndexOf(')'); if (part.StartsWith('(') && speakerEnd > 0 && speakerEnd < part.IndexOf(mark)) { while (Environment.NewLine.Contains(part[speakerEnd + 1])) { speakerEnd++; } speaker = part.Substring(0, speakerEnd + 1); part = part.Substring(speakerEnd + 1); } speakerEnd = part.IndexOf(']'); if (part.StartsWith('[') && speakerEnd > 0 && speakerEnd < part.IndexOf(mark)) { while (Environment.NewLine.Contains(part[speakerEnd + 1])) { speakerEnd++; } speaker = part.Substring(0, speakerEnd + 1); part = part.Substring(speakerEnd + 1); } var st = new StrippableText(part); if (j == 0 && mark == '!' && st.Pre == "¿" && Utilities.CountTagInText(p.Text, mark) == 1 && HtmlUtil.RemoveHtmlTags(p.Text).EndsWith(mark)) { p.Text = inverseMark + p.Text; } else if (j == 0 && mark == '?' && st.Pre == "¡" && Utilities.CountTagInText(p.Text, mark) == 1 && HtmlUtil.RemoveHtmlTags(p.Text).EndsWith(mark)) { p.Text = inverseMark + p.Text; } else { string temp = inverseMark; int addToIndex = 0; while (p.Text.Length > markIndex + 1 && p.Text[markIndex + 1] == mark && Utilities.CountTagInText(p.Text, mark) > Utilities.CountTagInText(p.Text + temp, inverseMark)) { temp += inverseMark; st.Post += mark; markIndex++; addToIndex++; } p.Text = p.Text.Remove(j, markIndex - j + 1).Insert(j, speaker + st.Pre + temp + st.StrippedText + st.Post); markIndex += addToIndex; } } } } else if (last != null && !wasLastLineClosed && inverseMarkIndex == p.Text.IndexOf(mark) && !last.Text.Contains(inverseMark)) { string lastOldtext = last.Text; int idx = last.Text.Length - 2; while (idx > 0 && (last.Text.Substring(idx, 2) != ". ") && (last.Text.Substring(idx, 2) != "! ") && (last.Text.Substring(idx, 2) != "? ")) { idx--; } last.Text = last.Text.Insert(idx, inverseMark); fixCount++; callbacks.AddFixToListView(last, fixAction, lastOldtext, last.Text); } startIndex = markIndex + 2; if (startIndex < p.Text.Length) { markIndex = p.Text.IndexOf(mark, startIndex); } else { markIndex = -1; } wasLastLineClosed = true; } } if (p.Text.EndsWith(mark + "...", StringComparison.Ordinal) && p.Text.Length > 4) { p.Text = p.Text.Remove(p.Text.Length - 4, 4) + "..." + mark; } } else if (Utilities.CountTagInText(p.Text, inverseMark) == 1) { int idx = p.Text.IndexOf(inverseMark, StringComparison.Ordinal); while (idx < p.Text.Length && !@".!?".Contains(p.Text[idx])) { idx++; } if (idx < p.Text.Length) { p.Text = p.Text.Insert(idx, mark.ToString(CultureInfo.InvariantCulture)); if (p.Text.Contains("¡¿") && p.Text.Contains("!?")) { p.Text = p.Text.Replace("!?", "?!"); } if (p.Text.Contains("¿¡") && p.Text.Contains("?!")) { p.Text = p.Text.Replace("?!", "!?"); } } } }
public void Fix(Subtitle subtitle, IFixCallbacks callbacks) { var language = Configuration.Settings.Language.FixCommonErrors; string fixAction = language.StartWithUppercaseLetterAfterColon; int noOfFixes = 0; for (int i = 0; i < subtitle.Paragraphs.Count; i++) { var p = new Paragraph(subtitle.Paragraphs[i]); Paragraph last = subtitle.GetParagraphOrDefault(i - 1); string oldText = p.Text; int skipCount = 0; if (last != null) { string lastText = HtmlUtil.RemoveHtmlTags(last.Text); if (lastText.EndsWith(':') || lastText.EndsWith(';')) { var st = new StrippableText(p.Text); if (st.StrippedText.Length > 0 && st.StrippedText[0] != char.ToUpper(st.StrippedText[0])) p.Text = st.Pre + char.ToUpper(st.StrippedText[0]) + st.StrippedText.Substring(1) + st.Post; } } if (oldText.Contains(ExpectedChars)) { bool lastWasColon = false; for (int j = 0; j < p.Text.Length; j++) { var s = p.Text[j]; if (s == ':' || s == ';') { lastWasColon = true; } else if (lastWasColon) { // skip whitespace index if (j + 2 < p.Text.Length && p.Text[j] == ' ') { s = p.Text[++j]; } var startFromJ = p.Text.Substring(j); if (startFromJ.Length > 3 && startFromJ[0] == '<' && startFromJ[2] == '>' && (startFromJ[1] == 'i' || startFromJ[1] == 'b' || startFromJ[1] == 'u')) { skipCount = 2; } else if (startFromJ.StartsWith("<font ", StringComparison.OrdinalIgnoreCase) && p.Text.Substring(j).Contains('>')) { skipCount = (j + startFromJ.IndexOf('>', 6)) - j; } else if (Helper.IsTurkishLittleI(s, callbacks.Encoding, callbacks.Language)) { p.Text = p.Text.Remove(j, 1).Insert(j, Helper.GetTurkishUppercaseLetter(s, callbacks.Encoding).ToString(CultureInfo.InvariantCulture)); lastWasColon = false; } else if (char.IsLower(s)) { // iPhone bool change = true; if (s == 'i' && p.Text.Length > j + 1) { if (p.Text[j + 1] == char.ToUpper(p.Text[j + 1])) change = false; } if (change) p.Text = p.Text.Remove(j, 1).Insert(j, char.ToUpper(s).ToString(CultureInfo.InvariantCulture)); lastWasColon = false; } else if (!(" " + Environment.NewLine).Contains(s)) { lastWasColon = false; } // move the: 'j' pointer and reset skipCount to 0 if (skipCount > 0) { j += skipCount; skipCount = 0; } } } } if (oldText != p.Text && callbacks.AllowFix(p, fixAction)) { noOfFixes++; subtitle.Paragraphs[i].Text = p.Text; callbacks.AddFixToListView(p, fixAction, oldText, p.Text); } } callbacks.UpdateFixStatus(noOfFixes, language.StartWithUppercaseLetterAfterColon, noOfFixes.ToString(CultureInfo.InvariantCulture)); }
public string Invoke(InterjectionRemoveContext context) { if (string.IsNullOrWhiteSpace(context.Text)) { return(context.Text); } string text = context.Text; string oldText = text; bool doRepeat = true; while (doRepeat) { doRepeat = false; foreach (string s in context.Interjections) { if (text.Contains(s)) { var regex = new Regex("\\b" + Regex.Escape(s) + "\\b"); var match = regex.Match(text); if (match.Success) { int index = match.Index; string temp = text.Remove(index, s.Length); if (index == 0 && temp.StartsWith("... ", StringComparison.Ordinal)) { temp = temp.Remove(0, 4); } if (index == 3 && temp.StartsWith("<i>... ", StringComparison.Ordinal)) { temp = temp.Remove(3, 4); } if (index > 2 && " \r\n".Contains(text.Substring(index - 1, 1)) && temp.Substring(index).StartsWith("... ", StringComparison.Ordinal)) { temp = temp.Remove(index, 4); } if (index > 4 && temp.Substring(index - 4).StartsWith("\n<i>... ", StringComparison.Ordinal)) { temp = temp.Remove(index, 4); } if (temp.Remove(0, index) == " —" && temp.EndsWith("— —", StringComparison.Ordinal)) { temp = temp.Remove(temp.Length - 3); if (temp.EndsWith(Environment.NewLine + "—", StringComparison.Ordinal)) { temp = temp.Remove(temp.Length - 1).TrimEnd(); } } else if (temp.Remove(0, index) == " —" && temp.EndsWith("- —", StringComparison.Ordinal)) { temp = temp.Remove(temp.Length - 3); if (temp.EndsWith(Environment.NewLine + "-", StringComparison.Ordinal)) { temp = temp.Remove(temp.Length - 1).TrimEnd(); } } else if (index == 2 && temp.StartsWith("- —", StringComparison.Ordinal)) { temp = temp.Remove(2, 2); } else if (index == 2 && temp.StartsWith("- —", StringComparison.Ordinal)) { temp = temp.Remove(2, 1); } else if (index == 0 && temp.StartsWith(" —", StringComparison.Ordinal)) { temp = temp.Remove(0, 2); } else if (index == 0 && temp.StartsWith('—')) { temp = temp.Remove(0, 1); } else if (index > 3 && (temp.Substring(index - 2) == ". —" || temp.Substring(index - 2) == "! —" || temp.Substring(index - 2) == "? —")) { temp = temp.Remove(index - 2, 1).Replace(" ", " "); } else if (index > 3 && temp.Length == index + 1 && ".!?".Contains(temp[index - 2]) && temp[index - 1] == ' ' && ".!?".Contains(temp[index])) { temp = temp.Remove(index, 1).TrimEnd(); } string pre = string.Empty; if (index > 0) { doRepeat = true; } bool removeAfter = true; if (index > 2 && temp.Length > index) { var ending = temp.Substring(index - 2, 3); if (ending == ", ." || ending == ", !" || ending == ", ?" || ending == ", …") { temp = temp.Remove(index - 2, 2); removeAfter = false; } } if (removeAfter && index > s.Length) { if (temp.Length > index - s.Length + 3) { int subIndex = index - s.Length + 1; string subTemp = temp.Substring(subIndex, 3); if (subTemp == ", !" || subTemp == ", ?" || subTemp == ", .") { temp = temp.Remove(subIndex, 2); removeAfter = false; } else if (subIndex > 3 && ".!?".Contains(temp.Substring(subIndex - 1, 1))) { subTemp = temp.Substring(subIndex); if (subTemp == " ..." || subTemp.StartsWith(" ..." + Environment.NewLine, StringComparison.InvariantCulture)) { temp = temp.Remove(subIndex, 4).Trim(); removeAfter = false; } } } if (removeAfter && temp.Length > index - s.Length + 2) { int subIndex = index - s.Length; string subTemp = temp.Substring(subIndex, 3); if (subTemp == ", !" || subTemp == ", ?" || subTemp == ", .") { temp = temp.Remove(subIndex, 2); removeAfter = false; } else { subTemp = temp.Substring(subIndex); if (subTemp.StartsWith(", -—", StringComparison.Ordinal)) { temp = temp.Remove(subIndex, 3); removeAfter = false; } else if (subTemp.StartsWith(", --", StringComparison.Ordinal)) { temp = temp.Remove(subIndex, 2); removeAfter = false; } else if (index > 2 && subTemp.StartsWith("- —", StringComparison.Ordinal)) { temp = temp.Remove(subIndex + 2, 2).Replace(" ", " "); removeAfter = false; } } } if (removeAfter && temp.Length > index - s.Length + 2) { int subIndex = index - s.Length + 1; string subTemp = temp.Substring(subIndex, 2); if (subTemp == "-!" || subTemp == "-?" || subTemp == "-.") { temp = temp.Remove(subIndex, 1); removeAfter = false; } subTemp = temp.Substring(subIndex); if (subTemp == " !" || subTemp == " ?" || subTemp == " .") { temp = temp.Remove(subIndex, 1); removeAfter = false; } } } if (index > 3 && index - 2 < temp.Length) { string subTemp = temp.Substring(index - 2); if (subTemp.StartsWith(", —", StringComparison.Ordinal) || subTemp.StartsWith(", —", StringComparison.Ordinal)) { temp = temp.Remove(index - 2, 1); index--; } if (subTemp.StartsWith("- ...", StringComparison.Ordinal)) { removeAfter = false; } } if (removeAfter) { if (index == 0) { if (temp.StartsWith('-')) { temp = temp.Remove(0, 1).Trim(); } } else if (index == 3 && temp.StartsWith("<i>-", StringComparison.Ordinal)) { temp = temp.Remove(3, 1); } else if (index > 0 && temp.Length > index) { pre = text.Substring(0, index); temp = temp.Remove(0, index); if (temp.StartsWith('-') && pre.EndsWith('-')) { temp = temp.Remove(0, 1); } if (temp.StartsWith('-') && pre.EndsWith("- ", StringComparison.Ordinal)) { temp = temp.Remove(0, 1); } } if (temp.StartsWith("...", StringComparison.Ordinal)) { pre = pre.Trim(); } else { while (temp.Length > 0 && " ,.?!".Contains(temp[0])) { temp = temp.Remove(0, 1); doRepeat = true; } } if (temp.Length > 0 && s[0].ToString(CultureInfo.InvariantCulture) != s[0].ToString(CultureInfo.InvariantCulture).ToLowerInvariant()) { temp = char.ToUpper(temp[0]) + temp.Substring(1); doRepeat = true; } if (temp.StartsWith('-') && pre.EndsWith(' ')) { temp = temp.Remove(0, 1); } if (temp.StartsWith('—') && pre.EndsWith(',')) { pre = pre.TrimEnd(',') + " "; } temp = pre + temp; } if (temp.EndsWith(Environment.NewLine + "- ", StringComparison.Ordinal)) { temp = temp.Remove(temp.Length - 2).TrimEnd(); } var st = new StrippableText(temp); if (st.StrippedText.Length == 0) { return(string.Empty); } if (temp.StartsWith('-') && !temp.Contains(Environment.NewLine) && text.Contains(Environment.NewLine)) { temp = temp.Remove(0, 1).Trim(); } text = temp; } } } } var lines = text.SplitToLines(); if (lines.Count == 2 && text != oldText) { if (lines[0] == "-" && lines[1] == "-") { return(string.Empty); } if (lines[0].Length > 1 && lines[0][0] == '-' && lines[1].Trim() == "-") { return(lines[0].Remove(0, 1).Trim()); } if (lines[1].Length > 1 && lines[1][0] == '-' && lines[0].Trim() == "-") { return(lines[1].Remove(0, 1).Trim()); } if (lines[1].Length > 4 && lines[1].StartsWith("<i>-", StringComparison.Ordinal) && lines[0].Trim() == "-") { return("<i>" + lines[1].Remove(0, 4).Trim()); } if (lines[0].Length > 1 && lines[1] == "-" || lines[1] == "." || lines[1] == "!" || lines[1] == "?") { if (lines[0].StartsWith('-') && oldText.Contains(Environment.NewLine + "-")) { lines[0] = lines[0].Remove(0, 1); } return(lines[0].Trim()); } var noTags0 = HtmlUtil.RemoveHtmlTags(lines[0]).Trim(); var noTags1 = HtmlUtil.RemoveHtmlTags(lines[1]).Trim(); if (noTags0 == "-") { if (noTags1 == noTags0) { return(string.Empty); } if (lines[1].Length > 1 && lines[1][0] == '-') { return(lines[1].Remove(0, 1).Trim()); } if (lines[1].Length > 4 && lines[1].StartsWith("<i>-", StringComparison.Ordinal)) { return("<i>" + lines[1].Remove(0, 4).Trim()); } return(lines[1]); } if (noTags1 == "-") { if (lines[0].Length > 1 && lines[0][0] == '-') { return(lines[0].Remove(0, 1).Trim()); } if (lines[0].Length > 4 && lines[0].StartsWith("<i>-", StringComparison.Ordinal)) { if (!lines[0].Contains("</i>") && lines[1].Contains("</i>")) { return("<i>" + lines[0].Remove(0, 4).Trim() + "</i>"); } return("<i>" + lines[0].Remove(0, 4).Trim()); } return(lines[0]); } } if (lines.Count == 2) { if (string.IsNullOrWhiteSpace(lines[1].RemoveChar('.').RemoveChar('?').RemoveChar('!').RemoveChar('-').RemoveChar('—'))) { text = lines[0]; lines = text.SplitToLines(); } else if (string.IsNullOrWhiteSpace(lines[0].RemoveChar('.').RemoveChar('?').RemoveChar('!').RemoveChar('-').RemoveChar('—'))) { text = lines[1]; lines = text.SplitToLines(); } } if (lines.Count == 1 && text != oldText && Utilities.GetNumberOfLines(oldText) == 2) { if ((oldText.StartsWith('-') || oldText.StartsWith("<i>-", StringComparison.Ordinal)) && (oldText.Contains("." + Environment.NewLine) || oldText.Contains(".</i>" + Environment.NewLine) || oldText.Contains("!" + Environment.NewLine) || oldText.Contains("!</i>" + Environment.NewLine) || oldText.Contains("?" + Environment.NewLine) || oldText.Contains("?</i>" + Environment.NewLine))) { if (text.StartsWith("<i>-", StringComparison.Ordinal)) { text = "<i>" + text.Remove(0, 4).TrimStart(); } else { text = text.TrimStart('-').TrimStart(); } } else if ((oldText.Contains(Environment.NewLine + "-") || oldText.Contains(Environment.NewLine + "<i>-")) && (oldText.Contains("." + Environment.NewLine) || oldText.Contains(".</i>" + Environment.NewLine) || oldText.Contains("!" + Environment.NewLine) || oldText.Contains("!</i>" + Environment.NewLine) || oldText.Contains("?" + Environment.NewLine) || oldText.Contains("?</i>" + Environment.NewLine))) { if (text.StartsWith("<i>-", StringComparison.Ordinal)) { text = "<i>" + text.Remove(0, 4).TrimStart(); } else { text = text.TrimStart('-').TrimStart(); } } } if (oldText != text) { text = text.Replace(Environment.NewLine + "<i>" + Environment.NewLine, Environment.NewLine + "<i>"); text = text.Replace(Environment.NewLine + "</i>" + Environment.NewLine, "</i>" + Environment.NewLine); if (text.StartsWith("<i>" + Environment.NewLine, StringComparison.Ordinal)) { text = text.Remove(3, Environment.NewLine.Length); } if (text.EndsWith(Environment.NewLine + "</i>", StringComparison.Ordinal)) { text = text.Remove(text.Length - (Environment.NewLine.Length + 4), Environment.NewLine.Length); } text = text.Replace(Environment.NewLine + "</i>" + Environment.NewLine, "</i>" + Environment.NewLine); if (context.OnlySeparatedLines) { if (string.IsNullOrEmpty(text)) { return(text); } var oldLines = oldText.SplitToLines(); var newLines = text.SplitToLines(); if (oldLines.Count == 2 && newLines.Count == 1 && (oldLines[0] == newLines[0] || oldLines[1] == newLines[0])) { return(text); } return(oldText); } } if (!oldText.Contains(" ")) { while (text.Contains(" ")) { text = text.Replace(" ", " "); } } return(text); }
public string RemoveTextFromHearImpaired(string text) { if (StartsAndEndsWithHearImpairedTags(HtmlUtil.RemoveHtmlTags(text, true).TrimStart(TrimStartNoiseChar))) { return string.Empty; } if (Settings.RemoveWhereContains) { foreach (var removeIfTextContain in Settings.RemoveIfTextContains) { if (text.Contains(removeIfTextContain)) return string.Empty; } } string oldText = text; text = RemoveColon(text); string pre = " >-\"'‘`´♪¿¡.…—"; string post = " -\"'`´♪.!?:…—"; if (Settings.RemoveTextBetweenCustomTags) { pre = pre.Replace(Settings.CustomStart, string.Empty); post = post.Replace(Settings.CustomEnd, string.Empty); } var st = new StrippableText(text, pre, post); var sb = new StringBuilder(); var parts = st.StrippedText.Trim().SplitToLines(); int lineNumber = 0; bool removedDialogInFirstLine = false; int noOfNamesRemoved = 0; int noOfNamesRemovedNotInLineOne = 0; foreach (string s in parts) { var stSub = new StrippableText(s, pre, post); string strippedText = stSub.StrippedText; if ((lineNumber == parts.Length - 1 && st.Post.Contains('?')) || stSub.Post.Contains('?')) strippedText += "?"; if (!StartsAndEndsWithHearImpairedTags(strippedText)) { if (removedDialogInFirstLine && stSub.Pre.Contains("- ")) stSub.Pre = stSub.Pre.Replace("- ", string.Empty); string newText = stSub.StrippedText; newText = RemoveHearImpairedTags(newText); if (stSub.StrippedText.Length - newText.Length > 2) { string removedText = GetRemovedString(stSub.StrippedText, newText); if (!IsHIDescription(removedText)) { noOfNamesRemoved++; if (lineNumber > 0) noOfNamesRemovedNotInLineOne++; } } if (stSub.Pre == "<i>- " && newText.StartsWith("</i>", StringComparison.Ordinal)) sb.AppendLine("- " + newText.Remove(0, 4).Trim() + stSub.Post); else sb.AppendLine(stSub.Pre + newText + stSub.Post); } else { if (!IsHIDescription(stSub.StrippedText)) { noOfNamesRemoved++; if (lineNumber > 0) noOfNamesRemovedNotInLineOne++; } if (lineNumber == 0) { if (st.Pre.Contains("- ")) { st.Pre = st.Pre.Replace("- ", string.Empty); removedDialogInFirstLine = true; } else if (st.Pre == "-") { st.Pre = string.Empty; removedDialogInFirstLine = true; } } if (st.Pre.Contains("<i>") && stSub.Post.Contains("</i>")) st.Pre = st.Pre.Replace("<i>", string.Empty); if (s.Contains("<i>") && !s.Contains("</i>") && st.Post.Contains("</i>")) st.Post = st.Post.Replace("</i>", string.Empty); if (lineNumber == parts.Length - 1) { if (st.Post.Replace("♪", string.Empty).Replace("♫", string.Empty).Trim().Length == 0) { st.Post = string.Empty; } } } lineNumber++; } text = st.Pre + sb.ToString().Trim() + st.Post; text = text.Replace(" ", " ").Trim(); text = text.Replace("<i></i>", string.Empty); text = text.Replace("<i> </i>", " "); text = text.Replace("<b></b>", string.Empty); text = text.Replace("<b> </b>", " "); text = text.Replace("<u></u>", string.Empty); text = text.Replace("<u> </u>", " "); text = RemoveEmptyFontTag(text); text = text.Replace(" ", " ").Trim(); text = RemoveColon(text); text = RemoveLineIfAllUppercase(text); text = RemoveHearImpairedtagsInsideLine(text); if (Settings.RemoveInterjections) text = RemoveInterjections(text); st = new StrippableText(text, " >-\"'‘`´♪¿¡.…—", " -\"'`´♪.!?:…—"); text = st.StrippedText; if (StartsAndEndsWithHearImpairedTags(text)) { text = RemoveStartEndTags(text); } text = RemoveHearImpairedTags(text); // fix 3 lines to two liners - if only two lines if (noOfNamesRemoved >= 1 && Utilities.GetNumberOfLines(text) == 3) { var splitChars = new[] { '.', '?', '!' }; var splitParts = HtmlUtil.RemoveHtmlTags(text).Replace(" ", string.Empty).Split(splitChars, StringSplitOptions.RemoveEmptyEntries); if (splitParts.Length == 2) { var temp = new StrippableText(text); temp.StrippedText = temp.StrippedText.Replace(Environment.NewLine, " "); int splitIndex = temp.StrippedText.LastIndexOfAny(splitChars); if (splitIndex > 0) { text = temp.Pre + temp.StrippedText.Insert(splitIndex + 1, Environment.NewLine) + temp.Post; } } } if (!text.StartsWith('-') && noOfNamesRemoved >= 1 && Utilities.GetNumberOfLines(text) == 2) { var lines = text.SplitToLines(); var part0 = lines[0].Trim().Replace("</i>", string.Empty).Trim(); if (!part0.EndsWith(',') && (!part0.EndsWith('-') || noOfNamesRemovedNotInLineOne > 0)) { if (part0.Length > 0 && ".?!".Contains(part0[part0.Length - 1])) { if (noOfNamesRemovedNotInLineOne > 0) { if (!st.Pre.Contains('-') && !text.Contains(Environment.NewLine + "-")) text = "- " + text.Replace(Environment.NewLine, Environment.NewLine + "- "); if (!text.Contains(Environment.NewLine + "-") && !text.Contains(Environment.NewLine + "<i>-")) text = text.Replace(Environment.NewLine, Environment.NewLine + "- "); } } } } if (!string.IsNullOrEmpty(text) || (st.Pre.Contains('♪') || st.Post.Contains('♪'))) text = st.Pre + text + st.Post; if (oldText.TrimStart().StartsWith("- ", StringComparison.Ordinal) && text != null && !text.Contains(Environment.NewLine) && (oldText.Contains(Environment.NewLine + "- ") || oldText.Contains(Environment.NewLine + " - ") || oldText.Contains(Environment.NewLine + "<i>- ") || oldText.Contains(Environment.NewLine + "<i> - "))) { if (text.StartsWith("<i>-", StringComparison.Ordinal)) { text = "<i>" + text.Remove(0,4).Trim(); } else { text = text.TrimStart().TrimStart('-').TrimStart(); } } if (oldText.TrimStart().StartsWith('-') && !oldText.TrimStart().StartsWith("--", StringComparison.Ordinal) && text != null && !text.Contains(Environment.NewLine) && (oldText.Contains(Environment.NewLine + "-") && !oldText.Contains(Environment.NewLine + "--") || oldText.Contains(Environment.NewLine + " - ") || oldText.Contains(Environment.NewLine + "<i>- ") || oldText.Contains(Environment.NewLine + "<i> - "))) { text = text.TrimStart().TrimStart('-').TrimStart(); } if (oldText.TrimStart().StartsWith("<i>- ", StringComparison.Ordinal) && text != null && text.StartsWith("<i>- ", StringComparison.Ordinal) && !text.Contains(Environment.NewLine) && (oldText.Contains(Environment.NewLine + "- ") || oldText.Contains(Environment.NewLine + " - ") || oldText.Contains(Environment.NewLine + "<i>- ") || oldText.Contains(Environment.NewLine + "<i> - "))) { text = text.Remove(3, 2); } if (text != null && !text.Contains(Environment.NewLine) && (oldText.Contains(':') && !text.Contains(':') || oldText.Contains('[') && !text.Contains('[') || oldText.Contains('(') && !text.Contains('(') || oldText.Contains('{') && !text.Contains('{')) && (oldText.Contains(Environment.NewLine + "- ") || oldText.Contains(Environment.NewLine + " - ") || oldText.Contains(Environment.NewLine + "<i>- ") || oldText.Contains(Environment.NewLine + "<i> - "))) { text = text.TrimStart().TrimStart('-').TrimStart(); } string removeText = "<i>- </i>" + Environment.NewLine + "-"; if (text.StartsWith(removeText)) { text = text.Remove(0, removeText.Length).TrimStart(' '); } removeText = "<i>-</i>" + Environment.NewLine + "-"; if (text.StartsWith(removeText)) { text = text.Remove(0, removeText.Length).TrimStart(' '); } removeText = "<i>-</i>" + Environment.NewLine + "<i>-"; if (text.StartsWith(removeText)) { text = "<i>" + text.Remove(0, removeText.Length).TrimStart(' '); } removeText = "<i>- </i>" + Environment.NewLine + "<i>-"; if (text.StartsWith(removeText)) { text = "<i>" + text.Remove(0, removeText.Length).TrimStart(' '); } if (oldText != text) { // insert spaces before "-" text = text.Replace(Environment.NewLine + "- <i>", Environment.NewLine + "<i>- "); text = text.Replace(Environment.NewLine + "-<i>", Environment.NewLine + "<i>- "); if (text.Length > 2 && text[0] == '-' && text[1] != ' ' && text[1] != '-') text = text.Insert(1, " "); if (text.Length > 5 && text.StartsWith("<i>-", StringComparison.Ordinal) && text[4] != ' ' && text[4] != '-') text = text.Insert(4, " "); int index = text.IndexOf(Environment.NewLine + "-", StringComparison.Ordinal); if (index >= 0 && text.Length - index > 4) { index += Environment.NewLine.Length + 1; if (text[index] != ' ' && text[index] != '-') text = text.Insert(index, " "); } index = text.IndexOf(Environment.NewLine + "<i>-", StringComparison.Ordinal); if (index >= 0 && text.Length - index > 5) { index += Environment.NewLine.Length + 4; if (text[index] != ' ' && text[index] != '-') text = text.Insert(index, " "); } } return text.Trim(); }
public string RemoveColon(string text) { if (!(Settings.RemoveTextBeforeColon && text.Contains(':'))) return text; string preAssTag = string.Empty; if (text.StartsWith("{\\", StringComparison.Ordinal)) { int indexOfEndBracketSuccessor = text.IndexOf('}') + 1; if (indexOfEndBracketSuccessor > 0) { preAssTag = text.Substring(0, indexOfEndBracketSuccessor); text = text.Remove(0, indexOfEndBracketSuccessor).TrimStart(); } } // House 7x01 line 52: and she would like you to do three things: // Okay or remove??? string noTagText = HtmlUtil.RemoveHtmlTags(text); if (noTagText.Length > 10 && noTagText.IndexOf(':') == noTagText.Length - 1 && noTagText != noTagText.ToUpper()) return text; string newText = string.Empty; var lines = text.Trim().SplitToLines(); int noOfNames = 0; int count = 0; bool removedInFirstLine = false; bool removedInSecondLine = false; foreach (string line in lines) { int indexOfColon = line.IndexOf(':'); bool isLastColon = count == lines.Length - 1 && !HtmlUtil.RemoveHtmlTags(line).TrimEnd(':').Contains(':'); if (indexOfColon <= 0 || IsInsideBrackets(line, indexOfColon) || (isLastColon && Utilities.CountTagInText(HtmlUtil.RemoveHtmlTags(line), ' ') > 1)) { newText = (newText + Environment.NewLine + line).Trim(); if (newText.EndsWith("</i>", StringComparison.Ordinal) && text.StartsWith("<i>", StringComparison.Ordinal) && !newText.StartsWith("<i>", StringComparison.Ordinal)) newText = "<i>" + newText; else if (newText.EndsWith("</b>", StringComparison.Ordinal) && text.StartsWith("<b>", StringComparison.Ordinal) && !newText.StartsWith("<b>", StringComparison.Ordinal)) newText = "<b>" + newText; } else { var pre = line.Substring(0, indexOfColon); var noTagPre = HtmlUtil.RemoveHtmlTags(pre, true); if (Settings.RemoveTextBeforeColonOnlyUppercase && noTagPre != noTagPre.ToUpper()) { string s = line; string l1Trim = HtmlUtil.RemoveHtmlTags(lines[0]).TrimEnd('"'); if (count == 1 && lines.Length == 2 && !l1Trim.EndsWith('.') && !l1Trim.EndsWith('!') && !l1Trim.EndsWith('?')) { var indexOf = line.IndexOfAny(ExpectedStrings, StringComparison.Ordinal); if (indexOf > 0 && indexOf < indexOfColon) { var toRemove = s.Substring(indexOf + 1, indexOfColon - indexOf).Trim(); if (toRemove.Length > 1 && toRemove == toRemove.ToUpper()) { s = s.Remove(indexOf + 1, indexOfColon - indexOf); s = s.Insert(indexOf + 1, " -"); if (newText.StartsWith("<i>", StringComparison.Ordinal) && !newText.StartsWith("<i>-", StringComparison.Ordinal)) newText = "<i>- " + newText.Remove(0, 3); else if (!newText.StartsWith("-")) newText = "- " + newText; } } } newText = (newText + Environment.NewLine + s).Trim(); } else { var newTextNoHtml = HtmlUtil.RemoveHtmlTags(newText); if (Utilities.CountTagInText(line, ':') == 1) { if (count == 1 && newText.Length > 1 && removedInFirstLine && !".?!".Contains(newTextNoHtml[newTextNoHtml.Length - 1]) && newText.LineEndsWithHtmlTag(true) && line != line.ToUpper()) { newText += Environment.NewLine; if (pre.Contains("<i>") && line.Contains("</i>") && !line.Contains("<i>")) newText += "<i>" + line; else if (pre.Contains("<b>") && line.Contains("</b>") && !line.Contains("<b>")) newText += "<b>" + line; else if (pre.Contains("<u>") && line.Contains("</u>") && !line.Contains("<u>")) newText += "<u>" + line; else if (pre.Contains('[') && line.Contains(']') && !line.Contains("[")) newText += "[" + line; else if (pre.Contains('(') && line.EndsWith(')') && !line.Contains("(")) newText += "(" + line; else newText += line; } else if (count == 1 && newText.Length > 1 && indexOfColon > 15 && line.Substring(0, indexOfColon).Contains(' ') && !".?!".Contains(newTextNoHtml[newTextNoHtml.Length - 1]) && newText.LineEndsWithHtmlTag(true) && line != line.ToUpper()) { newText += Environment.NewLine; if (pre.Contains("<i>") && line.Contains("</i>") && !line.Contains("<i>")) newText += "<i>" + line; else if (pre.Contains("<b>") && line.Contains("</b>") && !line.Contains("<b>")) newText += "<b>" + line; else if (pre.Contains("<u>") && line.Contains("</u>") && !line.Contains("<u>")) newText += "<u>" + line; else if (pre.Contains('[') && line.Contains(']') && !line.Contains("[")) newText += "[" + line; else if (pre.Contains('(') && line.EndsWith(')') && !line.Contains("(")) newText += "(" + line; else newText += line; } else { var preStrippable = new StrippableText(pre); var remove = true; if (indexOfColon < line.Length - 1) { if (Settings.ColonSeparateLine && !line.Substring(indexOfColon + 1).StartsWith(Environment.NewLine, StringComparison.Ordinal)) remove = false; else if (Utilities.IsBetweenNumbers(line, indexOfColon)) remove = false; } if (remove && !DoRemove(pre)) remove = false; string l1Trimmed = HtmlUtil.RemoveHtmlTags(lines[0]).TrimEnd('"'); if (count == 1 && lines.Length == 2 && !l1Trimmed.EndsWith('.') && !l1Trimmed.EndsWith('!') && !l1Trimmed.EndsWith('?') && !l1Trimmed.EndsWith("--", StringComparison.Ordinal) && !l1Trimmed.EndsWith("—", StringComparison.Ordinal)) { remove = false; } if (remove) { var content = line.Substring(indexOfColon + 1).Trim(); if (content.Length > 0) { newText += Environment.NewLine; if (pre.Contains("<i>") && content.Contains("</i>")) newText += "<i>" + content; else if (pre.Contains("<b>") && content.Contains("</b>")) newText += "<b>" + content; else if (pre.Contains('[') && content.Contains(']')) newText += "[" + content; else if (pre.Contains('(') && content.EndsWith(')')) newText += "(" + content; else newText += content; if (count == 0) removedInFirstLine = true; else if (count == 1) removedInSecondLine = true; } newText = newText.Trim(); if (text.StartsWith('(') && newText.EndsWith(')') && !newText.Contains('(')) newText = newText.TrimEnd(')'); else if (text.StartsWith('[') && newText.EndsWith(']') && !newText.Contains('[')) newText = newText.TrimEnd(']'); else if (newText.EndsWith("</i>", StringComparison.Ordinal) && text.StartsWith("<i>", StringComparison.Ordinal) && !newText.StartsWith("<i>", StringComparison.Ordinal)) newText = "<i>" + newText; else if (newText.EndsWith("</b>", StringComparison.Ordinal) && text.StartsWith("<b>", StringComparison.Ordinal) && !newText.StartsWith("<b>", StringComparison.Ordinal)) newText = "<b>" + newText; else if (newText.EndsWith("</u>", StringComparison.Ordinal) && text.StartsWith("<u>", StringComparison.Ordinal) && !newText.StartsWith("<u>", StringComparison.Ordinal)) newText = "<u>" + newText; if (!IsHIDescription(preStrippable.StrippedText)) noOfNames++; } else { string s = line; string l1Trim = HtmlUtil.RemoveHtmlTags(lines[0]).TrimEnd('"'); if (count == 1 && lines.Length == 2 && !l1Trim.EndsWith('.') && !l1Trim.EndsWith('!') && !l1Trim.EndsWith('?')) { int indexOf = line.IndexOf(". ", StringComparison.Ordinal); if (indexOf == -1) indexOf = line.IndexOf("! ", StringComparison.Ordinal); if (indexOf == -1) indexOf = line.IndexOf("? ", StringComparison.Ordinal); if (indexOf > 0 && indexOf < indexOfColon) { s = s.Remove(indexOf + 1, indexOfColon - indexOf); s = s.Insert(indexOf + 1, " -"); if (newText.StartsWith("<i>") && !newText.StartsWith("<i>-")) newText = "<i>- " + newText.Remove(0, 3); else if (!newText.StartsWith("-")) newText = "- " + newText; } } newText = (newText + Environment.NewLine + s).Trim(); if (newText.EndsWith("</i>", StringComparison.Ordinal) && text.StartsWith("<i>", StringComparison.Ordinal) && !newText.StartsWith("<i>", StringComparison.Ordinal)) newText = "<i>" + newText; else if (newText.EndsWith("</b>", StringComparison.Ordinal) && text.StartsWith("<b>", StringComparison.Ordinal) && !newText.StartsWith("<b>", StringComparison.Ordinal)) newText = "<b>" + newText; else if ((newText.EndsWith("</u>", StringComparison.Ordinal) && text.StartsWith("<u>", StringComparison.Ordinal) && !newText.StartsWith("<u>", StringComparison.Ordinal))) newText = "<u>" + newText; } } } else { char[] endChars = { '.', '?', '!' }; string s2 = line; for (int k = 0; k < 2; k++) { if (s2.Contains(':')) { int colonIndex = s2.IndexOf(':'); string start = s2.Substring(0, colonIndex); if (!Settings.RemoveTextBeforeColonOnlyUppercase || start == start.ToUpper()) { int endIndex = start.LastIndexOfAny(endChars); if (colonIndex > 0 && colonIndex < s2.Length - 1) { if (char.IsDigit(s2[colonIndex - 1]) && char.IsDigit(s2[colonIndex + 1])) endIndex = 0; } if (endIndex < 0) s2 = s2.Remove(0, colonIndex - endIndex); else if (endIndex > 0) s2 = s2.Remove(endIndex + 1, colonIndex - endIndex); } if (count == 0) removedInFirstLine = true; else if (count == 1) removedInSecondLine = true; } } newText = (newText + Environment.NewLine + s2).Trim(); } } } count++; } newText = newText.Trim(); if ((noOfNames > 0 || removedInFirstLine) && Utilities.GetNumberOfLines(newText) == 2) { int indexOfDialogChar = newText.IndexOf('-'); bool insertDash = true; var arr = newText.SplitToLines(); if (arr.Length == 2 && arr[0].Length > 1 && arr[1].Length > 1) { string arr0 = new StrippableText(arr[0]).StrippedText; var arr1Strippable = new StrippableText(arr[1]); string arr1 = arr1Strippable.StrippedText; if (arr0.Length > 0 && arr1.Length > 1) { // line continuation? if (char.IsLower(arr1[0])) // second line starts with lower case letter { char c = arr0[arr0.Length - 1]; if (char.IsLower(c) || c == ',') // first line ends with comma or lower case letter { if (!arr1Strippable.Pre.Contains("...")) { insertDash = false; } } } if (insertDash) { string arr0QuoteTrimmed = arr[0].TrimEnd('"'); if (arr0QuoteTrimmed.Length > 0 && !".?!".Contains(arr0QuoteTrimmed[arr0QuoteTrimmed.Length - 1]) && !arr0QuoteTrimmed.EndsWith("</i>", StringComparison.Ordinal) && !arr0QuoteTrimmed.EndsWith("--", StringComparison.Ordinal) && !arr0QuoteTrimmed.EndsWith("—", StringComparison.Ordinal)) { if (!arr1Strippable.Pre.Contains('-')) { insertDash = false; } } } } if (insertDash && removedInFirstLine && !removedInSecondLine && !text.StartsWith('-') && !text.StartsWith("<i>-", StringComparison.Ordinal)) { if (!arr[1].StartsWith('-') && !arr[1].StartsWith("<i>-", StringComparison.Ordinal)) insertDash = false; } } if (insertDash) { if (indexOfDialogChar < 0 || indexOfDialogChar > 4) { var st = new StrippableText(newText, string.Empty, string.Empty); newText = st.Pre + "- " + st.StrippedText + st.Post; } int indexOfNewLine = newText.IndexOf(Environment.NewLine, StringComparison.Ordinal); string second = newText.Substring(indexOfNewLine).Trim(); indexOfDialogChar = second.IndexOf(" -", StringComparison.Ordinal); if (indexOfDialogChar < 0 || indexOfDialogChar > 6) { indexOfDialogChar = second.IndexOf("- ", StringComparison.Ordinal); } if ((indexOfDialogChar < 0 || indexOfDialogChar > 6) && !second.StartsWith('-')) { var st = new StrippableText(second, String.Empty, String.Empty); second = st.Pre + "- " + st.StrippedText + st.Post; newText = newText.Remove(indexOfNewLine) + Environment.NewLine + second; } } } else if (newText.Contains('-') && !newText.Contains(Environment.NewLine)) { var st = new StrippableText(newText); if (st.Pre.Contains('-') && !st.Pre.Contains("--")) newText = st.Pre.Replace("-", string.Empty) + st.StrippedText + st.Post; } else if (removedInSecondLine && !removedInFirstLine && Utilities.GetNumberOfLines(newText) == 2) { string noTags = HtmlUtil.RemoveHtmlTags(newText, true).Trim(); bool insertDash = noTags.StartsWith('-') && Utilities.CountTagInText(noTags, '-') == 1; if (insertDash) { if (newText.Contains(Environment.NewLine + "<i>")) newText = newText.Replace(Environment.NewLine + "<i>", Environment.NewLine + "<i>- "); else newText = newText.Replace(Environment.NewLine, Environment.NewLine + "- "); } } if (text.Contains("<i>") && !newText.Contains("<i>") && newText.EndsWith("</i>", StringComparison.Ordinal)) newText = "<i>" + newText; if (string.IsNullOrWhiteSpace(newText)) return string.Empty; return preAssTag + newText; }
public string RemoveInterjections(string text) { if (_interjectionList == null) { var interjectionList = new HashSet<string>(); foreach (var s in Configuration.Settings.Tools.Interjections.Split(new[] { ';' }, StringSplitOptions.RemoveEmptyEntries)) { if (s.Length > 0) { interjectionList.Add(s); var upper = s.ToUpper(); interjectionList.Add(upper); var lower = s.ToLower(); interjectionList.Add(lower); interjectionList.Add(lower.CapitalizeFirstLetter()); } } _interjectionList = new List<string>(interjectionList); interjectionList.Clear(); interjectionList.TrimExcess(); _interjectionList.Sort(CompareLength); } string oldText = text; bool doRepeat = true; while (doRepeat) { doRepeat = false; foreach (string s in _interjectionList) { if (text.Contains(s)) { var regex = new Regex("\\b" + Regex.Escape(s) + "\\b"); var match = regex.Match(text); if (match.Success) { int index = match.Index; string temp = text.Remove(index, s.Length); while (index == 0 && temp.StartsWith("... ", StringComparison.Ordinal)) { temp = temp.Remove(3, 1); } while (index == 3 && temp.StartsWith("<i>... ", StringComparison.Ordinal)) { temp = temp.Remove(6, 1); } while (index > 2 && (" \r\n".Contains(text.Substring(index - 1, 1))) && temp.Substring(index).StartsWith("... ", StringComparison.Ordinal)) { temp = temp.Remove(index + 3, 1); } if (temp.Remove(0, index) == " —" && temp.EndsWith("— —", StringComparison.Ordinal)) { temp = temp.Remove(temp.Length - 3); if (temp.EndsWith(Environment.NewLine + "—", StringComparison.Ordinal)) temp = temp.Remove(temp.Length - 1).TrimEnd(); } else if (temp.Remove(0, index) == " —" && temp.EndsWith("- —", StringComparison.Ordinal)) { temp = temp.Remove(temp.Length - 3); if (temp.EndsWith(Environment.NewLine + "-", StringComparison.Ordinal)) temp = temp.Remove(temp.Length - 1).TrimEnd(); } else if (index == 2 && temp.StartsWith("- —", StringComparison.Ordinal)) { temp = temp.Remove(2, 2); } else if (index == 2 && temp.StartsWith("- —", StringComparison.Ordinal)) { temp = temp.Remove(2, 1); } else if (index == 0 && temp.StartsWith(" —", StringComparison.Ordinal)) { temp = temp.Remove(0, 2); } else if (index == 0 && temp.StartsWith('—')) { temp = temp.Remove(0, 1); } else if (index > 3 && (temp.Substring(index - 2) == ". —" || temp.Substring(index - 2) == "! —" || temp.Substring(index - 2) == "? —")) { temp = temp.Remove(index - 2, 1).Replace(" ", " "); } string pre = string.Empty; if (index > 0) doRepeat = true; bool removeAfter = true; if (index > s.Length) { if (temp.Length > index - s.Length + 3) { int subIndex = index - s.Length + 1; string subTemp = temp.Substring(subIndex, 3); if (subTemp == ", !" || subTemp == ", ?" || subTemp == ", .") { temp = temp.Remove(subIndex, 2); removeAfter = false; } } if (removeAfter && temp.Length > index - s.Length + 2) { int subIndex = index - s.Length; string subTemp = temp.Substring(subIndex, 3); if (subTemp == ", !" || subTemp == ", ?" || subTemp == ", .") { temp = temp.Remove(subIndex, 2); removeAfter = false; } else { subTemp = temp.Substring(subIndex); if (subTemp.StartsWith(", -—", StringComparison.Ordinal)) { temp = temp.Remove(subIndex, 3); removeAfter = false; } else if (subTemp.StartsWith(", --", StringComparison.Ordinal)) { temp = temp.Remove(subIndex, 2); removeAfter = false; } else if (index > 2 && subTemp.StartsWith("- —", StringComparison.Ordinal)) { temp = temp.Remove(subIndex + 2, 2).Replace(" ", " "); removeAfter = false; } } } if (removeAfter && temp.Length > index - s.Length + 2) { int subIndex = index - s.Length + 1; string subTemp = temp.Substring(subIndex, 2); if (subTemp == "-!" || subTemp == "-?" || subTemp == "-.") { temp = temp.Remove(subIndex, 1); removeAfter = false; } subTemp = temp.Substring(subIndex); if (subTemp == " !" || subTemp == " ?" || subTemp == " .") { temp = temp.Remove(subIndex, 1); removeAfter = false; } } } if (index > 3 && index - 2 < temp.Length) { string subTemp = temp.Substring(index - 2); if (subTemp.StartsWith(", —", StringComparison.Ordinal) || subTemp.StartsWith(", —", StringComparison.Ordinal)) { temp = temp.Remove(index - 2, 1); index--; } if (subTemp.StartsWith("- ...", StringComparison.Ordinal)) { removeAfter = false; } } if (removeAfter) { if (index == 0) { if (temp.StartsWith('-')) temp = temp.Remove(0, 1).Trim(); } else if (index == 3 && temp.StartsWith("<i>-", StringComparison.Ordinal)) { temp = temp.Remove(3, 1); } else if (index > 0 && temp.Length > index) { pre = text.Substring(0, index); temp = temp.Remove(0, index); if (temp.StartsWith('-') && pre.EndsWith('-')) temp = temp.Remove(0, 1); if (temp.StartsWith('-') && pre.EndsWith("- ", StringComparison.Ordinal)) temp = temp.Remove(0, 1); } if (temp.StartsWith("...")) { pre = pre.Trim(); } else { while (temp.Length > 0 && " ,.?!".Contains(temp[0])) { temp = temp.Remove(0, 1); doRepeat = true; } } if (temp.Length > 0 && s[0].ToString(CultureInfo.InvariantCulture) != s[0].ToString(CultureInfo.InvariantCulture).ToLower()) { temp = char.ToUpper(temp[0]) + temp.Substring(1); doRepeat = true; } if (temp.StartsWith('-') && pre.EndsWith(' ')) temp = temp.Remove(0, 1); if (temp.StartsWith('—') && pre.EndsWith(',')) pre = pre.TrimEnd(',') + " "; temp = pre + temp; } if (temp.EndsWith(Environment.NewLine + "- ", StringComparison.Ordinal)) temp = temp.Remove(temp.Length - 2).TrimEnd(); var st = new StrippableText(temp); if (st.StrippedText.Length == 0) return string.Empty; if (temp.StartsWith('-') && !temp.Contains(Environment.NewLine) && text.Contains(Environment.NewLine)) temp = temp.Remove(0, 1).Trim(); text = temp; } } } } var lines = text.SplitToLines(); if (lines.Length == 2 && text != oldText) { if (lines[0] == "-" && lines[1] == "-") return string.Empty; if (lines[0].Length > 1 && lines[0][0] == '-' && lines[1].Trim() == "-") return lines[0].Remove(0, 1).Trim(); if (lines[1].Length > 1 && lines[1][0] == '-' && lines[0].Trim() == "-") return lines[1].Remove(0, 1).Trim(); if (lines[1].Length > 4 && lines[1].StartsWith("<i>-", StringComparison.Ordinal) && lines[0].Trim() == "-") return "<i>" + lines[1].Remove(0, 4).Trim(); if (lines[0].Length > 1 && lines[1] == "-" || lines[1] == "." || lines[1] == "!" || lines[1] == "?") { if (lines[0].StartsWith('-') && oldText.Contains(Environment.NewLine + "-")) lines[0] = lines[0].Remove(0, 1); return lines[0].Trim(); } var noTags0 = HtmlUtil.RemoveHtmlTags(lines[0]).Trim(); var noTags1 = HtmlUtil.RemoveHtmlTags(lines[1]).Trim(); if (noTags0 == "-") { if (noTags1 == noTags0) return string.Empty; if (lines[1].Length > 1 && lines[1][0] == '-') return lines[1].Remove(0, 1).Trim(); if (lines[1].Length > 4 && lines[1].StartsWith("<i>-", StringComparison.Ordinal)) return "<i>" + lines[1].Remove(0, 4).Trim(); return lines[1]; } if (noTags1 == "-") { if (lines[0].Length > 1 && lines[0][0] == '-') return lines[0].Remove(0, 1).Trim(); if (lines[0].Length > 4 && lines[0].StartsWith("<i>-", StringComparison.Ordinal)) { if (!lines[0].Contains("</i>") && lines[1].Contains("</i>")) return "<i>" + lines[0].Remove(0, 4).Trim() + "</i>"; return "<i>" + lines[0].Remove(0, 4).Trim(); } return lines[0]; } } if (lines.Length == 2) { if (string.IsNullOrWhiteSpace(lines[1].Replace(".", string.Empty).Replace("?", string.Empty).Replace("!", string.Empty).Replace("-", string.Empty).Replace("—", string.Empty))) { text = lines[0]; lines = text.SplitToLines(); } else if (string.IsNullOrWhiteSpace(lines[0].Replace(".", string.Empty).Replace("?", string.Empty).Replace("!", string.Empty).Replace("-", string.Empty).Replace("—", string.Empty))) { text = lines[1]; lines = text.SplitToLines(); } } if (lines.Length == 1 && text != oldText && Utilities.GetNumberOfLines(oldText) == 2) { if ((oldText.StartsWith('-') || oldText.StartsWith("<i>-", StringComparison.Ordinal)) && (oldText.Contains("." + Environment.NewLine) || oldText.Contains(".</i>" + Environment.NewLine) || oldText.Contains("!" + Environment.NewLine) || oldText.Contains("!</i>" + Environment.NewLine) || oldText.Contains("?" + Environment.NewLine) || oldText.Contains("?</i>" + Environment.NewLine))) { if (text.StartsWith("<i>-", StringComparison.Ordinal)) text = "<i>" + text.Remove(0, 4).TrimStart(); else text = text.TrimStart('-').TrimStart(); } else if ((oldText.Contains(Environment.NewLine + "-") || oldText.Contains(Environment.NewLine + "<i>-")) && (oldText.Contains("." + Environment.NewLine) || oldText.Contains(".</i>" + Environment.NewLine) || oldText.Contains("!" + Environment.NewLine) || oldText.Contains("!</i>" + Environment.NewLine) || oldText.Contains("?" + Environment.NewLine) || oldText.Contains("?</i>" + Environment.NewLine))) { if (text.StartsWith("<i>-", StringComparison.Ordinal)) text = "<i>" + text.Remove(0, 4).TrimStart(); else text = text.TrimStart('-').TrimStart(); } } if (oldText != text) { text = text.Replace(Environment.NewLine + "<i>" + Environment.NewLine, Environment.NewLine + "<i>"); text = text.Replace(Environment.NewLine + "</i>" + Environment.NewLine, "</i>" + Environment.NewLine); if (text.StartsWith("<i>" + Environment.NewLine)) { text = text.Remove(3, Environment.NewLine.Length); } if (text.EndsWith(Environment.NewLine + "</i>")) { text = text.Remove(text.Length - (Environment.NewLine.Length + 4), Environment.NewLine.Length); } text = text.Replace(Environment.NewLine + "</i>" + Environment.NewLine, "</i>" + Environment.NewLine); } return text; }
private void FixSpanishInvertedLetter(char mark, string inverseMark, Paragraph p, Paragraph last, ref bool wasLastLineClosed, string fixAction, ref int fixCount, IFixCallbacks callbacks) { if (p.Text.Contains(mark)) { bool skip = false; if (last != null && p.Text.Contains(mark) && !p.Text.Contains(inverseMark) && last.Text.Contains(inverseMark) && !last.Text.Contains(mark)) skip = true; if (!skip && Utilities.CountTagInText(p.Text, mark) == Utilities.CountTagInText(p.Text, inverseMark) && HtmlUtil.RemoveHtmlTags(p.Text).TrimStart(inverseMark[0]).Contains(inverseMark) == false && HtmlUtil.RemoveHtmlTags(p.Text).TrimEnd(mark).Contains(mark) == false) { skip = true; } if (!skip) { int startIndex = 0; int markIndex = p.Text.IndexOf(mark); if (!wasLastLineClosed && ((p.Text.IndexOf('!') > 0 && p.Text.IndexOf('!') < markIndex) || (p.Text.IndexOf('?') > 0 && p.Text.IndexOf('?') < markIndex) || (p.Text.IndexOf('.') > 0 && p.Text.IndexOf('.') < markIndex))) wasLastLineClosed = true; while (markIndex > 0 && startIndex < p.Text.Length) { int inverseMarkIndex = p.Text.IndexOf(inverseMark, startIndex, StringComparison.Ordinal); if (wasLastLineClosed && (inverseMarkIndex < 0 || inverseMarkIndex > markIndex)) { if (callbacks.AllowFix(p, fixAction)) { int j = markIndex - 1; while (j > startIndex && (p.Text[j] == '.' || p.Text[j] == '!' || p.Text[j] == '?')) j--; while (j > startIndex && (p.Text[j] != '.' || IsSpanishAbbreviation(p.Text, j, callbacks)) && p.Text[j] != '!' && p.Text[j] != '?' && !(j > 3 && p.Text.Substring(j - 3, 3) == Environment.NewLine + "-") && !(j > 4 && p.Text.Substring(j - 4, 4) == Environment.NewLine + " -") && !(j > 6 && p.Text.Substring(j - 6, 6) == Environment.NewLine + "<i>-")) j--; if (@".!?".Contains(p.Text[j])) { j++; } if (j + 3 < p.Text.Length && p.Text.Substring(j + 1, 2) == Environment.NewLine) { j += 3; } else if (j + 2 < p.Text.Length && p.Text.Substring(j, 2) == Environment.NewLine) { j += 2; } if (j >= startIndex) { string part = p.Text.Substring(j, markIndex - j + 1); string speaker = string.Empty; int speakerEnd = part.IndexOf(')'); if (part.StartsWith('(') && speakerEnd > 0 && speakerEnd < part.IndexOf(mark)) { while (Environment.NewLine.Contains(part[speakerEnd + 1])) speakerEnd++; speaker = part.Substring(0, speakerEnd + 1); part = part.Substring(speakerEnd + 1); } speakerEnd = part.IndexOf(']'); if (part.StartsWith('[') && speakerEnd > 0 && speakerEnd < part.IndexOf(mark)) { while (Environment.NewLine.Contains(part[speakerEnd + 1])) speakerEnd++; speaker = part.Substring(0, speakerEnd + 1); part = part.Substring(speakerEnd + 1); } var st = new StrippableText(part); if (j == 0 && mark == '!' && st.Pre == "¿" && Utilities.CountTagInText(p.Text, mark) == 1 && HtmlUtil.RemoveHtmlTags(p.Text).EndsWith(mark)) { p.Text = inverseMark + p.Text; } else if (j == 0 && mark == '?' && st.Pre == "¡" && Utilities.CountTagInText(p.Text, mark) == 1 && HtmlUtil.RemoveHtmlTags(p.Text).EndsWith(mark)) { p.Text = inverseMark + p.Text; } else { string temp = inverseMark; int addToIndex = 0; while (p.Text.Length > markIndex + 1 && p.Text[markIndex + 1] == mark && Utilities.CountTagInText(p.Text, mark) > Utilities.CountTagInText(p.Text + temp, inverseMark)) { temp += inverseMark; st.Post += mark; markIndex++; addToIndex++; } p.Text = p.Text.Remove(j, markIndex - j + 1).Insert(j, speaker + st.Pre + temp + st.StrippedText + st.Post); markIndex += addToIndex; } } } } else if (last != null && !wasLastLineClosed && inverseMarkIndex == p.Text.IndexOf(mark) && !last.Text.Contains(inverseMark)) { string lastOldtext = last.Text; int idx = last.Text.Length - 2; while (idx > 0 && (last.Text.Substring(idx, 2) != ". ") && (last.Text.Substring(idx, 2) != "! ") && (last.Text.Substring(idx, 2) != "? ")) idx--; last.Text = last.Text.Insert(idx, inverseMark); fixCount++; callbacks.AddFixToListView(last, fixAction, lastOldtext, last.Text); } startIndex = markIndex + 2; if (startIndex < p.Text.Length) markIndex = p.Text.IndexOf(mark, startIndex); else markIndex = -1; wasLastLineClosed = true; } } if (p.Text.EndsWith(mark + "...", StringComparison.Ordinal) && p.Text.Length > 4) { p.Text = p.Text.Remove(p.Text.Length - 4, 4) + "..." + mark; } } else if (Utilities.CountTagInText(p.Text, inverseMark) == 1) { int idx = p.Text.IndexOf(inverseMark, StringComparison.Ordinal); while (idx < p.Text.Length && !@".!?".Contains(p.Text[idx])) { idx++; } if (idx < p.Text.Length) { p.Text = p.Text.Insert(idx, mark.ToString(CultureInfo.InvariantCulture)); if (p.Text.Contains("¡¿") && p.Text.Contains("!?")) p.Text = p.Text.Replace("!?", "?!"); if (p.Text.Contains("¿¡") && p.Text.Contains("?!")) p.Text = p.Text.Replace("?!", "!?"); } } }
public static string FixHyphensRemove(Subtitle subtitle, int i) { Paragraph p = subtitle.Paragraphs[i]; string text = p.Text; if (text.TrimStart().StartsWith('-') || text.TrimStart().StartsWith("<i>-", StringComparison.OrdinalIgnoreCase) || text.TrimStart().StartsWith("<i> -", StringComparison.OrdinalIgnoreCase) || text.Contains(Environment.NewLine + '-') || text.Contains(Environment.NewLine + " -") || text.Contains(Environment.NewLine + "<i>-") || text.Contains(Environment.NewLine + "<i> -") || text.Contains(Environment.NewLine + "<I>-") || text.Contains(Environment.NewLine + "<I> -")) { var prev = subtitle.GetParagraphOrDefault(i - 1); if (prev == null || !HtmlUtil.RemoveHtmlTags(prev.Text).TrimEnd().EndsWith('-') || HtmlUtil.RemoveHtmlTags(prev.Text).TrimEnd().EndsWith("--", StringComparison.Ordinal)) { var noTaglines = HtmlUtil.RemoveHtmlTags(p.Text).SplitToLines(); int startHyphenCount = noTaglines.Count(line => line.TrimStart().StartsWith('-')); if (startHyphenCount == 1) { bool remove = true; var noTagparts = HtmlUtil.RemoveHtmlTags(text).SplitToLines(); if (noTagparts.Length == 2) { if (noTagparts[0].TrimStart().StartsWith('-') && noTagparts[1].Contains(": ")) remove = false; if (noTagparts[1].TrimStart().StartsWith('-') && noTagparts[0].Contains(": ")) remove = false; } if (remove) { int idx = text.IndexOf('-'); var st = new StrippableText(text); if (idx < 5 && st.Pre.Length >= idx) { text = text.Remove(idx, 1).TrimStart(); idx = text.IndexOf('-'); st = new StrippableText(text); if (idx < 5 && idx >= 0 && st.Pre.Length >= idx) { text = text.Remove(idx, 1).TrimStart(); st = new StrippableText(text); } idx = text.IndexOf('-'); if (idx < 5 && idx >= 0 && st.Pre.Length >= idx) text = text.Remove(idx, 1).TrimStart(); text = RemoveSpacesBeginLine(text); } else { int indexOfNewLine = text.IndexOf(Environment.NewLine, StringComparison.Ordinal); if (indexOfNewLine > 0) { idx = text.IndexOf('-', indexOfNewLine); if (idx >= 0 && indexOfNewLine + 5 > indexOfNewLine) { text = text.Remove(idx, 1).TrimStart().Replace(Environment.NewLine + " ", Environment.NewLine); idx = text.IndexOf('-', indexOfNewLine); if (idx >= 0 && indexOfNewLine + 5 > indexOfNewLine) { text = text.Remove(idx, 1).TrimStart(); text = RemoveSpacesBeginLine(text); } } } } } } } } else if (text.StartsWith("<font ", StringComparison.Ordinal)) { var prev = subtitle.GetParagraphOrDefault(i - 1); if (prev == null || !HtmlUtil.RemoveHtmlTags(prev.Text).TrimEnd().EndsWith('-') || HtmlUtil.RemoveHtmlTags(prev.Text).TrimEnd().EndsWith("--", StringComparison.Ordinal)) { var st = new StrippableText(text); if (st.Pre.EndsWith('-') || st.Pre.EndsWith("- ", StringComparison.Ordinal)) { text = st.Pre.TrimEnd('-', ' ') + st.StrippedText + st.Post; } } } return text; }
public void Fix(Subtitle subtitle, IFixCallbacks callbacks) { var language = Configuration.Settings.Language.FixCommonErrors; string fixAction = language.FixMissingPeriodAtEndOfLine; int missingPeriodsAtEndOfLine = 0; for (int i = 0; i < subtitle.Paragraphs.Count; i++) { var p = subtitle.Paragraphs[i]; var next = subtitle.GetParagraphOrDefault(i + 1); string nextText = string.Empty; if (next != null) { nextText = HtmlUtil.RemoveHtmlTags(next.Text, true).TrimStart('-', '"', '„').TrimStart(); } bool isNextClose = next != null && next.StartTime.TotalMilliseconds - p.EndTime.TotalMilliseconds < 400; string tempNoHtml = HtmlUtil.RemoveHtmlTags(p.Text).TrimEnd(); if (IsOneLineUrl(p.Text) || p.Text.Contains(ExpectedChars) || p.Text.EndsWith('\'')) { // ignore urls } else if (!string.IsNullOrEmpty(nextText) && next != null && next.Text.Length > 0 && char.IsUpper(nextText[0]) && tempNoHtml.Length > 0 && !ExpectedString1.Contains(tempNoHtml[tempNoHtml.Length - 1])) { string tempTrimmed = tempNoHtml.TrimEnd().TrimEnd('\'', '"', '“', '”').TrimEnd(); if (tempTrimmed.Length > 0 && !ExpectedString2.Contains(tempTrimmed[tempTrimmed.Length - 1]) && p.Text != p.Text.ToUpperInvariant()) { //don't end the sentence if the next word is an I word as they're always capped. bool isNextCloseAndStartsWithI = isNextClose && (nextText.StartsWith("I ", StringComparison.Ordinal) || nextText.StartsWith("I'", StringComparison.Ordinal)); if (!isNextCloseAndStartsWithI) { //test to see if the first word of the next line is a name if (callbacks.AllowFix(p, fixAction)) { string oldText = p.Text; if (callbacks.IsName(next.Text.Split(WordSplitChars)[0])) { if (next.StartTime.TotalMilliseconds - p.EndTime.TotalMilliseconds > 2000) { AddPeriod(p, tempNoHtml); } } else { AddPeriod(p, tempNoHtml); } if (p.Text != oldText) { missingPeriodsAtEndOfLine++; callbacks.AddFixToListView(p, fixAction, oldText, p.Text); } } } } } else if (next != null && !string.IsNullOrEmpty(p.Text) && Utilities.AllLettersAndNumbers.Contains(p.Text[p.Text.Length - 1])) { if (p.Text != p.Text.ToUpperInvariant()) { var st = new StrippableText(next.Text); if (st.StrippedText.Length > 0 && st.StrippedText != st.StrippedText.ToUpperInvariant() && char.IsUpper(st.StrippedText[0])) { if (callbacks.AllowFix(p, fixAction)) { int j = p.Text.Length - 1; while (j >= 0 && !@".!?¿¡".Contains(p.Text[j])) { j--; } string endSign = "."; if (j >= 0 && p.Text[j] == '¿') { endSign = "?"; } if (j >= 0 && p.Text[j] == '¡') { endSign = "!"; } string oldText = p.Text; missingPeriodsAtEndOfLine++; p.Text += endSign; callbacks.AddFixToListView(p, fixAction, oldText, p.Text); } } } } if (p.Text.Length > 4) { int indexOfNewLine = p.Text.IndexOf(Environment.NewLine + " -", 3, StringComparison.Ordinal); if (indexOfNewLine < 0) { indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "-", 3, StringComparison.Ordinal); } if (indexOfNewLine < 0) { indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "<i>-", 3, StringComparison.Ordinal); } if (indexOfNewLine < 0) { indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "<i> -", 3, StringComparison.Ordinal); } if (indexOfNewLine > 0 && char.IsUpper(char.ToUpper(p.Text[indexOfNewLine - 1])) && callbacks.AllowFix(p, fixAction)) { string oldText = p.Text; string text = p.Text.Substring(0, indexOfNewLine); var st = new StrippableText(text); if (st.Pre.TrimEnd().EndsWith('¿')) // Spanish ¿ { p.Text = p.Text.Insert(indexOfNewLine, "?"); } else if (st.Pre.TrimEnd().EndsWith('¡')) // Spanish ¡ { p.Text = p.Text.Insert(indexOfNewLine, "!"); } else { p.Text = p.Text.Insert(indexOfNewLine, "."); } missingPeriodsAtEndOfLine++; callbacks.AddFixToListView(p, fixAction, oldText, p.Text); } } } callbacks.UpdateFixStatus(missingPeriodsAtEndOfLine, language.AddPeriods, language.XPeriodsAdded); }
public void Fix(Subtitle subtitle, IFixCallbacks callbacks) { var language = Configuration.Settings.Language.FixCommonErrors; string fixAction = language.FixUppercaseIInsideLowercaseWord; int uppercaseIsInsideLowercaseWords = 0; for (int i = 0; i < subtitle.Paragraphs.Count; i++) { Paragraph p = subtitle.Paragraphs[i]; string oldText = p.Text; Match match = ReAfterLowercaseLetter.Match(p.Text); while (match.Success) { if (!(match.Index > 1 && p.Text.Substring(match.Index - 1, 2) == "Mc") // irish names, McDonalds etc. && p.Text[match.Index + 1] == 'I' && callbacks.AllowFix(p, fixAction)) { p.Text = p.Text.Substring(0, match.Index + 1) + "l"; if (match.Index + 2 < oldText.Length) p.Text += oldText.Substring(match.Index + 2); uppercaseIsInsideLowercaseWords++; callbacks.AddFixToListView(p, fixAction, oldText, p.Text); } match = match.NextMatch(); } var st = new StrippableText(p.Text); match = ReBeforeLowercaseLetter.Match(st.StrippedText); while (match.Success) { string word = GetWholeWord(st.StrippedText, match.Index); if (!callbacks.IsName(word)) { if (callbacks.AllowFix(p, fixAction)) { if (word.Equals("internal", StringComparison.OrdinalIgnoreCase) || word.Equals("island", StringComparison.OrdinalIgnoreCase) || word.Equals("islands", StringComparison.OrdinalIgnoreCase)) { } else if (match.Index == 0) { // first letter in paragraph //too risky! - perhaps if periods is fixed at the same time... or too complicated!? //if (isLineContinuation) //{ // st.StrippedText = st.StrippedText.Remove(match.Index, 1).Insert(match.Index, "l"); // p.Text = st.MergedString; // uppercaseIsInsideLowercaseWords++; // AddFixToListView(p, fixAction, oldText, p.Text); //} } else { if (match.Index > 2 && st.StrippedText[match.Index - 1] == ' ') { if ((Utilities.AllLettersAndNumbers + @",").Contains(st.StrippedText[match.Index - 2]) && match.Length >= 2 && Utilities.LowercaseVowels.Contains(char.ToLower(match.Value[1]))) { st.StrippedText = st.StrippedText.Remove(match.Index, 1).Insert(match.Index, "l"); p.Text = st.MergedString; uppercaseIsInsideLowercaseWords++; callbacks.AddFixToListView(p, fixAction, oldText, p.Text); } } else if (match.Index > Environment.NewLine.Length + 1 && Environment.NewLine.Contains(st.StrippedText[match.Index - 1])) { if ((Utilities.AllLettersAndNumbers + @",").Contains(st.StrippedText[match.Index - Environment.NewLine.Length + 1]) && match.Length >= 2 && Utilities.LowercaseVowels.Contains(match.Value[1])) { st.StrippedText = st.StrippedText.Remove(match.Index, 1).Insert(match.Index, "l"); p.Text = st.MergedString; uppercaseIsInsideLowercaseWords++; callbacks.AddFixToListView(p, fixAction, oldText, p.Text); } } else if (match.Index > 1 && ((st.StrippedText[match.Index - 1] == '\"') || (st.StrippedText[match.Index - 1] == '\'') || (st.StrippedText[match.Index - 1] == '>') || (st.StrippedText[match.Index - 1] == '-'))) { } else { var before = '\0'; var after = '\0'; if (match.Index > 0) before = st.StrippedText[match.Index - 1]; if (match.Index < st.StrippedText.Length - 2) after = st.StrippedText[match.Index + 1]; if (before != '\0' && char.IsUpper(before) && after != '\0' && char.IsLower(after) && !Utilities.LowercaseVowels.Contains(char.ToLower(before)) && !Utilities.LowercaseVowels.Contains(after)) { st.StrippedText = st.StrippedText.Remove(match.Index, 1).Insert(match.Index, "i"); p.Text = st.MergedString; uppercaseIsInsideLowercaseWords++; callbacks.AddFixToListView(p, fixAction, oldText, p.Text); } else if (@"‘’¡¿„“()[]♪'. @".Contains(before) && !Utilities.LowercaseVowels.Contains(char.ToLower(after))) { } else { st.StrippedText = st.StrippedText.Remove(match.Index, 1).Insert(match.Index, "l"); p.Text = st.MergedString; uppercaseIsInsideLowercaseWords++; callbacks.AddFixToListView(p, fixAction, oldText, p.Text); } } } } } match = match.NextMatch(); } } callbacks.UpdateFixStatus(uppercaseIsInsideLowercaseWords, language.FixUppercaseIInsindeLowercaseWords, language.XUppercaseIsFoundInsideLowercaseWords); }