public void StripableTextFontDontTouch() { var st = new StripableText("{MAN} Hi, how are you today!"); Assert.AreEqual(st.Pre, ""); Assert.AreEqual(st.Post, "!"); Assert.AreEqual(st.StrippedText, "{MAN} Hi, how are you today"); }
public void StripableOnlyPre3() { var st = new StripableText("<i>"); Assert.AreEqual(st.Pre, "<i>"); Assert.AreEqual(st.Post, ""); Assert.AreEqual(st.StrippedText, ""); }
public void StripableTextAss() { var st = new StripableText("{\\an9}Hi!"); Assert.AreEqual(st.Pre, "{\\an9}"); Assert.AreEqual(st.Post, "!"); Assert.AreEqual(st.StrippedText, "Hi"); }
public void StripableTextItalic2() { var st = new StripableText("<i>O</i>"); Assert.AreEqual(st.Pre, "<i>"); Assert.AreEqual(st.Post, "</i>"); Assert.AreEqual(st.StrippedText, "O"); }
public void StripableTextFont() { var st = new StripableText("<font color=\"red\">Hi!</font>"); Assert.AreEqual(st.Pre, "<font color=\"red\">"); Assert.AreEqual(st.Post, "!</font>"); Assert.AreEqual(st.StrippedText, "Hi"); }
private void GeneratePreview() { Cursor = Cursors.WaitCursor; listViewFixes.BeginUpdate(); listViewFixes.Items.Clear(); foreach (Paragraph p in _subtitle.Paragraphs) { string text = p.Text; foreach (ListViewItem item in listViewNames.Items) { string name = item.SubItems[1].Text; string textNoTags = HtmlUtil.RemoveHtmlTags(text); if (textNoTags != textNoTags.ToUpper()) { if (item.Checked && text != null && text.Contains(name, StringComparison.OrdinalIgnoreCase) && name.Length > 1 && name != name.ToLower()) { var st = new StripableText(text); st.FixCasing(new List <string> { name }, true, false, false, string.Empty); text = st.MergedString; } } } if (text != p.Text) { AddToPreviewListView(p, text); } } listViewFixes.EndUpdate(); groupBoxLinesFound.Text = string.Format(Configuration.Settings.Language.ChangeCasingNames.LinesFoundX, listViewFixes.Items.Count); Cursor = Cursors.Default; }
public void StripableTextItalic3() { var st = new StripableText("<i>Hi!"); Assert.AreEqual(st.Pre, "<i>"); Assert.AreEqual(st.Post, "!"); Assert.AreEqual(st.StrippedText, "Hi"); }
public void StripableOnlyText() { var st = new StripableText("H"); Assert.AreEqual(st.Pre, ""); Assert.AreEqual(st.Post, ""); Assert.AreEqual(st.StrippedText, "H"); }
public void StripableTextItalic() { var st = new StripableText("<i>Hi!</i>"); Assert.AreEqual(st.Pre, "<i>"); Assert.AreEqual(st.Post, "!</i>"); Assert.AreEqual(st.StrippedText, "Hi"); }
public void StripableTextFontDontTouch() { var st = new StripableText("{MAN} Hi, how are you today!"); Assert.AreEqual(st.Pre, ""); Assert.AreEqual(st.Post, "!"); Assert.AreEqual(st.StrippedText, "{MAN} Hi, how are you today"); }
public void StripableTextItalic2() { var st = new StripableText("<i>O</i>"); Assert.AreEqual(st.Pre, "<i>"); Assert.AreEqual(st.Post, "</i>"); Assert.AreEqual(st.StrippedText, "O"); }
public void StripableTextFont() { var st = new StripableText("<font color=\"red\">Hi!</font>"); Assert.AreEqual(st.Pre, "<font color=\"red\">"); Assert.AreEqual(st.Post, "!</font>"); Assert.AreEqual(st.StrippedText, "Hi"); }
public void StripableTextAss() { var st = new StripableText("{\\an9}Hi!"); Assert.AreEqual(st.Pre, "{\\an9}"); Assert.AreEqual(st.Post, "!"); Assert.AreEqual(st.StrippedText, "Hi"); }
public void Fix(Subtitle subtitle, IFixCallbacks callbacks) { var language = Configuration.Settings.Language.FixCommonErrors; string fixAction = language.StartWithUppercaseLetterAfterPeriodInsideParagraph; int noOfFixes = 0; for (int i = 0; i < subtitle.Paragraphs.Count; i++) { Paragraph p = subtitle.Paragraphs[i]; string oldText = p.Text; if (p.Text.Length > 3 && callbacks.AllowFix(p, fixAction)) { var st = new StripableText(p.Text); string text = st.StrippedText; int start = text.IndexOfAny(ExpectedChars); while (start > 0 && start < text.Length) { char charAtPosition = text[start]; // Allow fixing lowercase letter after recursive ??? or !!!. if (charAtPosition != '.') // Dot is not include 'cause I don't capitalize word after the ellipses (...), right? { while (start + 1 < text.Length && text[start + 1] == charAtPosition) { start++; } } if ((start + 3 < text.Length) && (text[start + 1] == ' ') && !IsAbbreviation(text, start, callbacks)) { var subText = new StripableText(text.Substring(start + 2)); text = text.Substring(0, start + 2) + subText.CombineWithPrePost(ToUpperFirstLetter(subText.StrippedText, callbacks)); } // Try to reach the last dot if char at *start is '.'. if (charAtPosition == '.') { while (start + 1 < text.Length && text[start + 1] == '.') { start++; } } start += 3; if (start < text.Length) { start = text.IndexOfAny(ExpectedChars, start); } } text = st.CombineWithPrePost(text); if (oldText != text) { p.Text = text; noOfFixes++; callbacks.AddFixToListView(p, fixAction, oldText, p.Text); } } } callbacks.UpdateFixStatus(noOfFixes, language.StartWithUppercaseLetterAfterPeriodInsideParagraph, noOfFixes.ToString(CultureInfo.InvariantCulture)); }
public void Fix(Subtitle subtitle, IFixCallbacks callbacks) { var language = Configuration.Settings.Language.FixCommonErrors; string fixAction = language.StartWithUppercaseLetterAfterPeriodInsideParagraph; int noOfFixes = 0; for (int i = 0; i < subtitle.Paragraphs.Count; i++) { Paragraph p = subtitle.Paragraphs[i]; string oldText = p.Text; if (p.Text.Length > 3 && callbacks.AllowFix(p, fixAction)) { var st = new StripableText(p.Text); string text = st.StrippedText; int start = text.IndexOfAny(ExpectedChars); while (start > 0 && start < text.Length) { char charAtPosition = text[start]; // Allow fixing lowercase letter after recursive ??? or !!!. if (charAtPosition != '.') // Dot is not include 'cause I don't capitalize word after the ellipses (...), right? { while (start + 1 < text.Length && text[start + 1] == charAtPosition) { start++; } } if ((start + 3 < text.Length) && (text[start + 1] == ' ') && !IsAbbreviation(text, start, callbacks)) { var subText = new StripableText(text.Substring(start + 2)); text = text.Substring(0, start + 2) + subText.CombineWithPrePost(ToUpperFirstLetter(subText.StrippedText, callbacks)); } // Try to reach the last dot if char at *start is '.'. if (charAtPosition == '.') { while (start + 1 < text.Length && text[start + 1] == '.') { start++; } } start += 3; if (start < text.Length) start = text.IndexOfAny(ExpectedChars, start); } text = st.CombineWithPrePost(text); if (oldText != text) { p.Text = text; noOfFixes++; callbacks.AddFixToListView(p, fixAction, oldText, p.Text); } } } callbacks.UpdateFixStatus(noOfFixes, language.StartWithUppercaseLetterAfterPeriodInsideParagraph, noOfFixes.ToString(CultureInfo.InvariantCulture)); }
private string FixCasing(string text, string lastLine, List <string> namesEtc) { string original = text; if (radioButtonNormal.Checked) { if (checkBoxOnlyAllUpper.Checked && text != text.ToUpper()) { return(text); } if (text.Length > 1) { // first all to lower text = text.ToLower().Trim(); while (text.Contains(" ")) { text = text.Replace(" ", " "); } text = text.Replace(" " + Environment.NewLine, Environment.NewLine); text = text.Replace(Environment.NewLine + " ", Environment.NewLine); var st = new StripableText(text); st.FixCasing(namesEtc, false, true, true, lastLine); // fix all casing but names (that's a seperate option) text = st.MergedString; } } else if (radioButtonUppercase.Checked) { StripableText st = new StripableText(text); text = st.Pre + st.StrippedText.ToUpper() + st.Post; text = text.Replace("<I>", "<i>"); text = text.Replace("</I>", "</i>"); text = text.Replace("<B>", "<b>"); text = text.Replace("</B>", "</b>"); text = text.Replace("<U>", "<u>"); text = text.Replace("<U>", "</u>"); text = text.Replace("<FONT COLOR>", "<font color>"); text = text.Replace("</FONT>", "</font>"); } else if (radioButtonLowercase.Checked) { text = text.ToLower(); } if (original != text) { _noOfLinesChanged++; } return(text); }
private void MergeLinesWithContinuation() { var temp = new Subtitle(); bool skipNext = false; for (int i = 0; i < _subtitle.Paragraphs.Count; i++) { Paragraph p = _subtitle.Paragraphs[i]; if (!skipNext) { Paragraph next = _subtitle.GetParagraphOrDefault(i + 1); bool merge = !(p.Text.Contains(Environment.NewLine) || next == null) && Configuration.Settings.Tools.ListViewSyntaxMoreThanXLinesX > 1; if (merge && (p.Text.TrimEnd().EndsWith('!') || p.Text.TrimEnd().EndsWith('.'))) { var st = new StripableText(p.Text); if (st.StrippedText.Length > 0 && char.IsUpper(st.StrippedText[0])) { merge = false; } } if (merge && (p.Text.Length >= Configuration.Settings.General.SubtitleLineMaximumLength - 5 || next.Text.Length >= Configuration.Settings.General.SubtitleLineMaximumLength - 5)) { merge = false; } if (merge) { temp.Paragraphs.Add(new Paragraph { Text = p.Text + Environment.NewLine + next.Text }); skipNext = true; } else { temp.Paragraphs.Add(new Paragraph(p)); } } else { skipNext = false; } } _subtitle = temp; }
private void MergeLinesWithContinuation() { var temp = new Subtitle(); bool skipNext = false; for (int i = 0; i < _subtitle.Paragraphs.Count; i++) { Paragraph p = _subtitle.Paragraphs[i]; if (!skipNext) { Paragraph next = _subtitle.GetParagraphOrDefault(i + 1); bool merge = !(p.Text.Contains(Environment.NewLine) || next == null); if (merge && (p.Text.TrimEnd().EndsWith('!') || p.Text.TrimEnd().EndsWith('.'))) { var st = new StripableText(p.Text); if (st.StrippedText.Length > 0 && Utilities.UppercaseLetters.Contains(st.StrippedText[0].ToString(CultureInfo.InvariantCulture))) { merge = false; } } if (merge && (p.Text.Length >= Configuration.Settings.General.SubtitleLineMaximumLength - 5 || next.Text.Length >= Configuration.Settings.General.SubtitleLineMaximumLength - 5)) { merge = false; } if (merge) { temp.Paragraphs.Add(new Paragraph { Text = p.Text + Environment.NewLine + next.Text }); skipNext = true; } else { temp.Paragraphs.Add(new Paragraph(p)); } } else { skipNext = false; } } _subtitle = temp; }
private string FixCasing(string text, string lastLine, List <string> namesEtc) { string original = text; if (radioButtonNormal.Checked) { if (checkBoxOnlyAllUpper.Checked && text != text.ToUpper()) { return(text); } if (text.Length > 1) { // first all to lower text = text.ToLower().Trim(); text = text.FixExtraSpaces(); var st = new StripableText(text); st.FixCasing(namesEtc, false, true, true, lastLine); // fix all casing but names (that's a seperate option) text = st.MergedString; } } else if (radioButtonUppercase.Checked) { var st = new StripableText(text); text = st.Pre + st.StrippedText.ToUpper() + st.Post; text = HtmlUtil.FixUpperTags(text); // tags inside text } else if (radioButtonLowercase.Checked) { text = text.ToLower(); } if (original != text) { _noOfLinesChanged++; } return(text); }
public static string FixHyphensRemove(Subtitle subtitle, int i) { Paragraph p = subtitle.Paragraphs[i]; string text = p.Text; if (text.TrimStart().StartsWith('-') || text.TrimStart().StartsWith("<i>-", StringComparison.OrdinalIgnoreCase) || text.TrimStart().StartsWith("<i> -", StringComparison.OrdinalIgnoreCase) || text.Contains(Environment.NewLine + '-') || text.Contains(Environment.NewLine + " -") || text.Contains(Environment.NewLine + "<i>-") || text.Contains(Environment.NewLine + "<i> -") || text.Contains(Environment.NewLine + "<I>-") || text.Contains(Environment.NewLine + "<I> -")) { var prev = subtitle.GetParagraphOrDefault(i - 1); if (prev == null || !HtmlUtil.RemoveHtmlTags(prev.Text).TrimEnd().EndsWith('-') || HtmlUtil.RemoveHtmlTags(prev.Text).TrimEnd().EndsWith("--", StringComparison.Ordinal)) { var noTaglines = HtmlUtil.RemoveHtmlTags(p.Text).SplitToLines(); int startHyphenCount = noTaglines.Count(line => line.TrimStart().StartsWith('-')); if (startHyphenCount == 1) { bool remove = true; var noTagparts = HtmlUtil.RemoveHtmlTags(text).SplitToLines(); if (noTagparts.Length == 2) { if (noTagparts[0].TrimStart().StartsWith('-') && noTagparts[1].Contains(": ")) remove = false; if (noTagparts[1].TrimStart().StartsWith('-') && noTagparts[0].Contains(": ")) remove = false; } if (remove) { int idx = text.IndexOf('-'); var st = new StripableText(text); if (idx < 5 && st.Pre.Length >= idx) { text = text.Remove(idx, 1).TrimStart(); idx = text.IndexOf('-'); st = new StripableText(text); if (idx < 5 && idx >= 0 && st.Pre.Length >= idx) { text = text.Remove(idx, 1).TrimStart(); st = new StripableText(text); } idx = text.IndexOf('-'); if (idx < 5 && idx >= 0 && st.Pre.Length >= idx) text = text.Remove(idx, 1).TrimStart(); text = RemoveSpacesBeginLine(text); } else { int indexOfNewLine = text.IndexOf(Environment.NewLine, StringComparison.Ordinal); if (indexOfNewLine > 0) { idx = text.IndexOf('-', indexOfNewLine); if (idx >= 0 && indexOfNewLine + 5 > indexOfNewLine) { text = text.Remove(idx, 1).TrimStart().Replace(Environment.NewLine + " ", Environment.NewLine); idx = text.IndexOf('-', indexOfNewLine); if (idx >= 0 && indexOfNewLine + 5 > indexOfNewLine) { text = text.Remove(idx, 1).TrimStart(); text = RemoveSpacesBeginLine(text); } } } } } } } } else if (text.StartsWith("<font ", StringComparison.Ordinal)) { var prev = subtitle.GetParagraphOrDefault(i - 1); if (prev == null || !HtmlUtil.RemoveHtmlTags(prev.Text).TrimEnd().EndsWith('-') || HtmlUtil.RemoveHtmlTags(prev.Text).TrimEnd().EndsWith("--", StringComparison.Ordinal)) { var st = new StripableText(text); if (st.Pre.EndsWith('-') || st.Pre.EndsWith("- ", StringComparison.Ordinal)) { text = st.Pre.TrimEnd('-', ' ') + st.StrippedText + st.Post; } } } return text; }
public void Fix(Subtitle subtitle, IFixCallbacks callbacks) { var language = Configuration.Settings.Language.FixCommonErrors; string fixAction = language.FixMissingPeriodAtEndOfLine; int missigPeriodsAtEndOfLine = 0; for (int i = 0; i < subtitle.Paragraphs.Count; i++) { Paragraph p = subtitle.Paragraphs[i]; Paragraph next = subtitle.GetParagraphOrDefault(i + 1); string nextText = string.Empty; if (next != null) { nextText = HtmlUtil.RemoveHtmlTags(next.Text).TrimStart('-', '"', '„').TrimStart(); } string tempNoHtml = HtmlUtil.RemoveHtmlTags(p.Text).TrimEnd(); if (IsOneLineUrl(p.Text) || p.Text.Contains(new[] { '♪', '♫' }) || p.Text.EndsWith('\'')) { // ignore urls } else if (!string.IsNullOrEmpty(nextText) && next != null && next.Text.Length > 0 && Utilities.UppercaseLetters.Contains(nextText[0]) && tempNoHtml.Length > 0 && !@",.!?:;>-])♪♫…".Contains(tempNoHtml[tempNoHtml.Length - 1])) { string tempTrimmed = tempNoHtml.TrimEnd().TrimEnd('\'', '"', '“', '”').TrimEnd(); if (tempTrimmed.Length > 0 && !@")]*#¶.!?".Contains(tempTrimmed[tempTrimmed.Length - 1]) && p.Text != p.Text.ToUpper()) { //don't end the sentence if the next word is an I word as they're always capped. if (!next.Text.StartsWith("I ", StringComparison.Ordinal) && !next.Text.StartsWith("I'", StringComparison.Ordinal)) { //test to see if the first word of the next line is a name if (!callbacks.IsName(next.Text.Split(WordSplitChars)[0]) && callbacks.AllowFix(p, fixAction)) { string oldText = p.Text; if (p.Text.EndsWith('>')) { int lastLessThan = p.Text.LastIndexOf('<'); if (lastLessThan > 0) { p.Text = p.Text.Insert(lastLessThan, "."); } } else { if (p.Text.EndsWith('“') && tempNoHtml.StartsWith('„')) { p.Text = p.Text.TrimEnd('“') + ".“"; } else if (p.Text.EndsWith('"') && tempNoHtml.StartsWith('"')) { p.Text = p.Text.TrimEnd('"') + ".\""; } else { p.Text += "."; } } if (p.Text != oldText) { missigPeriodsAtEndOfLine++; callbacks.AddFixToListView(p, fixAction, oldText, p.Text); } } } } } else if (next != null && !string.IsNullOrEmpty(p.Text) && Utilities.AllLettersAndNumbers.Contains(p.Text[p.Text.Length - 1])) { if (p.Text != p.Text.ToUpper()) { var st = new StripableText(next.Text); if (st.StrippedText.Length > 0 && st.StrippedText != st.StrippedText.ToUpper() && Utilities.UppercaseLetters.Contains(st.StrippedText[0])) { if (callbacks.AllowFix(p, fixAction)) { int j = p.Text.Length - 1; while (j >= 0 && !@".!?¿¡".Contains(p.Text[j])) { j--; } string endSign = "."; if (j >= 0 && p.Text[j] == '¿') { endSign = "?"; } if (j >= 0 && p.Text[j] == '¡') { endSign = "!"; } string oldText = p.Text; missigPeriodsAtEndOfLine++; p.Text += endSign; callbacks.AddFixToListView(p, fixAction, oldText, p.Text); } } } } if (p.Text.Length > 4) { int indexOfNewLine = p.Text.IndexOf(Environment.NewLine + " -", 3, StringComparison.Ordinal); if (indexOfNewLine < 0) { indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "-", 3, StringComparison.Ordinal); } if (indexOfNewLine < 0) { indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "<i>-", 3, StringComparison.Ordinal); } if (indexOfNewLine < 0) { indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "<i> -", 3, StringComparison.Ordinal); } if (indexOfNewLine > 0 && Configuration.Settings.General.UppercaseLetters.Contains(char.ToUpper(p.Text[indexOfNewLine - 1])) && callbacks.AllowFix(p, fixAction)) { string oldText = p.Text; string text = p.Text.Substring(0, indexOfNewLine); var st = new StripableText(text); if (st.Pre.TrimEnd().EndsWith('¿')) // Spanish ¿ { p.Text = p.Text.Insert(indexOfNewLine, "?"); } else if (st.Pre.TrimEnd().EndsWith('¡')) // Spanish ¡ { p.Text = p.Text.Insert(indexOfNewLine, "!"); } else { p.Text = p.Text.Insert(indexOfNewLine, "."); } missigPeriodsAtEndOfLine++; callbacks.AddFixToListView(p, fixAction, oldText, p.Text); } } } callbacks.UpdateFixStatus(missigPeriodsAtEndOfLine, language.AddPeriods, language.XPeriodsAdded); }
public string RemoveTextFromHearImpaired(string text) { if (Settings.RemoveWhereContains) { foreach (var removeIfTextContain in Settings.RemoveIfTextContains) { if (text.Contains(removeIfTextContain)) return string.Empty; } } string oldText = text; text = RemoveColon(text); string pre = " >-\"'‘`´♪¿¡.…—"; string post = " -\"'`´♪.!?:…—"; if (Settings.RemoveTextBetweenCustomTags) { pre = pre.Replace(Settings.CustomStart, string.Empty); post = post.Replace(Settings.CustomEnd, string.Empty); } var st = new StripableText(text, pre, post); var sb = new StringBuilder(); var parts = st.StrippedText.Trim().SplitToLines(); int lineNumber = 0; bool removedDialogInFirstLine = false; int noOfNamesRemoved = 0; int noOfNamesRemovedNotInLineOne = 0; foreach (string s in parts) { var stSub = new StripableText(s, pre, post); string strippedText = stSub.StrippedText; if (lineNumber == parts.Length - 1 && st.Post.Contains('?')) strippedText += "?"; else if (stSub.Post.Contains('?')) strippedText += "?"; if (!StartsAndEndsWithHearImpariedTags(strippedText)) { if (removedDialogInFirstLine && stSub.Pre.Contains("- ")) stSub.Pre = stSub.Pre.Replace("- ", string.Empty); string newText = stSub.StrippedText; newText = RemoveHearImpairedTags(newText); if (stSub.StrippedText.Length - newText.Length > 2) { string removedText = GetRemovedString(stSub.StrippedText, newText); if (!IsHIDescription(removedText)) { noOfNamesRemoved++; if (lineNumber > 0) noOfNamesRemovedNotInLineOne++; } } sb.AppendLine(stSub.Pre + newText + stSub.Post); } else { if (!IsHIDescription(stSub.StrippedText)) { noOfNamesRemoved++; if (lineNumber > 0) noOfNamesRemovedNotInLineOne++; } if (lineNumber == 0) { if (st.Pre.Contains("- ")) { st.Pre = st.Pre.Replace("- ", string.Empty); removedDialogInFirstLine = true; } else if (st.Pre == "-") { st.Pre = string.Empty; removedDialogInFirstLine = true; } } if (st.Pre.Contains("<i>") && stSub.Post.Contains("</i>")) st.Pre = st.Pre.Replace("<i>", string.Empty); if (s.Contains("<i>") && !s.Contains("</i>") && st.Post.Contains("</i>")) st.Post = st.Post.Replace("</i>", string.Empty); } lineNumber++; } text = st.Pre + sb.ToString().Trim() + st.Post; text = text.Replace(" ", " ").Trim(); text = text.Replace("<i></i>", string.Empty); text = text.Replace("<i> </i>", " "); text = text.Replace("<b></b>", string.Empty); text = text.Replace("<b> </b>", " "); text = text.Replace("<u></u>", string.Empty); text = text.Replace("<u> </u>", " "); text = RemoveEmptyFontTag(text); text = text.Replace(" ", " ").Trim(); text = RemoveColon(text); text = RemoveLineIfAllUppercase(text); text = RemoveHearImpairedtagsInsideLine(text); if (Settings.RemoveInterjections) text = RemoveInterjections(text); st = new StripableText(text, " >-\"'‘`´♪¿¡.…—", " -\"'`´♪.!?:…—"); text = st.StrippedText; if (StartsAndEndsWithHearImpariedTags(text)) { text = RemoveStartEndTags(text); } text = RemoveHearImpairedTags(text); // fix 3 lines to two liners - if only two lines if (noOfNamesRemoved >= 1 && Utilities.GetNumberOfLines(text) == 3) { var splitChars = new[] { '.', '?', '!' }; var splitParts = HtmlUtil.RemoveHtmlTags(text).Replace(" ", string.Empty).Split(splitChars, StringSplitOptions.RemoveEmptyEntries); if (splitParts.Length == 2) { var temp = new StripableText(text); temp.StrippedText = temp.StrippedText.Replace(Environment.NewLine, " "); int splitIndex = temp.StrippedText.LastIndexOfAny(splitChars); if (splitIndex > 0) { text = temp.Pre + temp.StrippedText.Insert(splitIndex + 1, Environment.NewLine) + temp.Post; } } } if (!text.StartsWith('-') && noOfNamesRemoved >= 1 && Utilities.GetNumberOfLines(text) == 2) { var lines = text.SplitToLines(); var part0 = lines[0].Trim().Replace("</i>", string.Empty).Trim(); if (!part0.EndsWith(',') && (!part0.EndsWith('-') || noOfNamesRemovedNotInLineOne > 0)) { if (part0.Length > 0 && ".?!".Contains(part0[part0.Length - 1])) { if (noOfNamesRemovedNotInLineOne > 0) { if (!st.Pre.Contains('-')) text = "- " + text.Replace(Environment.NewLine, Environment.NewLine + "- "); if (!text.Contains(Environment.NewLine + "-") && !text.Contains(Environment.NewLine + "<i>-")) text = text.Replace(Environment.NewLine, Environment.NewLine + "- "); } } } } if (!string.IsNullOrEmpty(text) || (st.Pre.Contains('♪') || st.Post.Contains('♪'))) text = st.Pre + text + st.Post; if (oldText.TrimStart().StartsWith("- ", StringComparison.Ordinal) && text != null && !text.Contains(Environment.NewLine) && (oldText.Contains(Environment.NewLine + "- ") || oldText.Contains(Environment.NewLine + " - ") || oldText.Contains(Environment.NewLine + "<i>- ") || oldText.Contains(Environment.NewLine + "<i> - "))) { text = text.TrimStart().TrimStart('-').TrimStart(); } if (oldText.TrimStart().StartsWith('-') && !oldText.TrimStart().StartsWith("--", StringComparison.Ordinal) && text != null && !text.Contains(Environment.NewLine) && (oldText.Contains(Environment.NewLine + "-") && !oldText.Contains(Environment.NewLine + "--") || oldText.Contains(Environment.NewLine + " - ") || oldText.Contains(Environment.NewLine + "<i>- ") || oldText.Contains(Environment.NewLine + "<i> - "))) { text = text.TrimStart().TrimStart('-').TrimStart(); } if (oldText.TrimStart().StartsWith("<i>- ", StringComparison.Ordinal) && text != null && text.StartsWith("<i>- ", StringComparison.Ordinal) && !text.Contains(Environment.NewLine) && (oldText.Contains(Environment.NewLine + "- ") || oldText.Contains(Environment.NewLine + " - ") || oldText.Contains(Environment.NewLine + "<i>- ") || oldText.Contains(Environment.NewLine + "<i> - "))) { text = text.Remove(3, 2); } if (text != null && !text.Contains(Environment.NewLine) && (oldText.Contains(':') && !text.Contains(':') || oldText.Contains('[') && !text.Contains('[') || oldText.Contains('(') && !text.Contains('(') || oldText.Contains('{') && !text.Contains('{')) && (oldText.Contains(Environment.NewLine + "- ") || oldText.Contains(Environment.NewLine + " - ") || oldText.Contains(Environment.NewLine + "<i>- ") || oldText.Contains(Environment.NewLine + "<i> - "))) { text = text.TrimStart().TrimStart('-').TrimStart(); } if (oldText != text) { // insert spaces before "-" text = text.Replace(Environment.NewLine + "- <i>", Environment.NewLine + "<i>- "); text = text.Replace(Environment.NewLine + "-<i>", Environment.NewLine + "<i>- "); if (text.Length > 2 && text[0] == '-' && text[1] != ' ' && text[1] != '-') text = text.Insert(1, " "); if (text.Length > 5 && text.StartsWith("<i>-", StringComparison.Ordinal) && text[4] != ' ' && text[4] != '-') text = text.Insert(4, " "); int index = text.IndexOf(Environment.NewLine + "-", StringComparison.Ordinal); if (index >= 0 && text.Length - index > 4) { index += Environment.NewLine.Length + 1; if (text[index] != ' ' && text[index] != '-') text = text.Insert(index, " "); } index = text.IndexOf(Environment.NewLine + "<i>-", StringComparison.Ordinal); if (index >= 0 && text.Length - index > 5) { index += Environment.NewLine.Length + 4; if (text[index] != ' ' && text[index] != '-') text = text.Insert(index, " "); } } return text.Trim(); }
public static string FixHyphensRemove(Subtitle subtitle, int i) { Paragraph p = subtitle.Paragraphs[i]; string text = p.Text; if (text.TrimStart().StartsWith('-') || text.TrimStart().StartsWith("<i>-", StringComparison.OrdinalIgnoreCase) || text.TrimStart().StartsWith("<i> -", StringComparison.OrdinalIgnoreCase) || text.Contains(Environment.NewLine + '-') || text.Contains(Environment.NewLine + " -") || text.Contains(Environment.NewLine + "<i>-") || text.Contains(Environment.NewLine + "<i> -") || text.Contains(Environment.NewLine + "<I>-") || text.Contains(Environment.NewLine + "<I> -")) { var prev = subtitle.GetParagraphOrDefault(i - 1); if (prev == null || !HtmlUtil.RemoveHtmlTags(prev.Text).TrimEnd().EndsWith('-') || HtmlUtil.RemoveHtmlTags(prev.Text).TrimEnd().EndsWith("--", StringComparison.Ordinal)) { var noTaglines = HtmlUtil.RemoveHtmlTags(p.Text).SplitToLines(); int startHyphenCount = noTaglines.Count(line => line.TrimStart().StartsWith('-')); if (startHyphenCount == 1) { bool remove = true; var noTagparts = HtmlUtil.RemoveHtmlTags(text).SplitToLines(); if (noTagparts.Length == 2) { if (noTagparts[0].TrimStart().StartsWith('-') && noTagparts[1].Contains(": ")) { remove = false; } if (noTagparts[1].TrimStart().StartsWith('-') && noTagparts[0].Contains(": ")) { remove = false; } } if (remove) { int idx = text.IndexOf('-'); var st = new StripableText(text); if (idx < 5 && st.Pre.Length >= idx) { text = text.Remove(idx, 1).TrimStart(); idx = text.IndexOf('-'); st = new StripableText(text); if (idx < 5 && idx >= 0 && st.Pre.Length >= idx) { text = text.Remove(idx, 1).TrimStart(); st = new StripableText(text); } idx = text.IndexOf('-'); if (idx < 5 && idx >= 0 && st.Pre.Length >= idx) { text = text.Remove(idx, 1).TrimStart(); } text = RemoveSpacesBeginLine(text); } else { int indexOfNewLine = text.IndexOf(Environment.NewLine, StringComparison.Ordinal); if (indexOfNewLine > 0) { idx = text.IndexOf('-', indexOfNewLine); if (idx >= 0 && indexOfNewLine + 5 > indexOfNewLine) { text = text.Remove(idx, 1).TrimStart().Replace(Environment.NewLine + " ", Environment.NewLine); idx = text.IndexOf('-', indexOfNewLine); if (idx >= 0 && indexOfNewLine + 5 > indexOfNewLine) { text = text.Remove(idx, 1).TrimStart(); text = RemoveSpacesBeginLine(text); } } } } } } } } else if (text.StartsWith("<font ", StringComparison.Ordinal)) { var prev = subtitle.GetParagraphOrDefault(i - 1); if (prev == null || !HtmlUtil.RemoveHtmlTags(prev.Text).TrimEnd().EndsWith('-') || HtmlUtil.RemoveHtmlTags(prev.Text).TrimEnd().EndsWith("--", StringComparison.Ordinal)) { var st = new StripableText(text); if (st.Pre.EndsWith('-') || st.Pre.EndsWith("- ", StringComparison.Ordinal)) { text = st.Pre.TrimEnd('-', ' ') + st.StrippedText + st.Post; } } } return(text); }
public void StripableTextItalicAndMore() { var st = new StripableText("<i>...<b>Hi!</b></i>"); Assert.AreEqual(st.Pre, "<i>...<b>"); Assert.AreEqual(st.Post, "!</b></i>"); Assert.AreEqual(st.StrippedText, "Hi"); }
private static string DoFix(Paragraph p, Paragraph prev, Encoding encoding, string language) { if (p.Text != null && p.Text.Length > 1) { string text = p.Text; string pre = string.Empty; if (text.Length > 4 && text.StartsWith("<i> ", StringComparison.Ordinal)) { pre = "<i> "; text = text.Substring(4); } if (text.Length > 3 && text.StartsWith("<i>", StringComparison.Ordinal)) { pre = "<i>"; text = text.Substring(3); } if (text.Length > 4 && text.StartsWith("<I> ", StringComparison.Ordinal)) { pre = "<I> "; text = text.Substring(4); } if (text.Length > 3 && text.StartsWith("<I>", StringComparison.Ordinal)) { pre = "<I>"; text = text.Substring(3); } if (text.Length > 2 && text.StartsWith('♪')) { pre = pre + "♪"; text = text.Substring(1); } if (text.Length > 2 && text.StartsWith(' ')) { pre = pre + " "; text = text.Substring(1); } if (text.Length > 2 && text.StartsWith('♫')) { pre = pre + "♫"; text = text.Substring(1); } if (text.Length > 2 && text.StartsWith(' ')) { pre = pre + " "; text = text.Substring(1); } var firstLetter = text[0]; string prevText = " ."; if (prev != null) { prevText = HtmlUtil.RemoveHtmlTags(prev.Text); } bool isPrevEndOfLine = Helper.IsPreviousTextEndOfParagraph(prevText); if (prevText == " .") { isPrevEndOfLine = true; } if ((!text.StartsWith("www.", StringComparison.Ordinal) && !text.StartsWith("http:", StringComparison.Ordinal) && !text.StartsWith("https:", StringComparison.Ordinal)) && (char.IsLower(firstLetter) || Helper.IsTurkishLittleI(firstLetter, encoding, language)) && !char.IsDigit(firstLetter) && isPrevEndOfLine) { bool isMatchInKnowAbbreviations = language == "en" && (prevText.EndsWith(" o.r.", StringComparison.Ordinal) || prevText.EndsWith(" a.m.", StringComparison.Ordinal) || prevText.EndsWith(" p.m.", StringComparison.Ordinal)); if (!isMatchInKnowAbbreviations) { if (Helper.IsTurkishLittleI(firstLetter, encoding, language)) { p.Text = pre + Helper.GetTurkishUppercaseLetter(firstLetter, encoding) + text.Substring(1); } else if (language == "en" && (text.StartsWith("l ", StringComparison.Ordinal) || text.StartsWith("l-I", StringComparison.Ordinal) || text.StartsWith("ls ", StringComparison.Ordinal) || text.StartsWith("lnterested") || text.StartsWith("lsn't ", StringComparison.Ordinal) || text.StartsWith("ldiot", StringComparison.Ordinal) || text.StartsWith("ln", StringComparison.Ordinal) || text.StartsWith("lm", StringComparison.Ordinal) || text.StartsWith("ls", StringComparison.Ordinal) || text.StartsWith("lt", StringComparison.Ordinal) || text.StartsWith("lf ", StringComparison.Ordinal) || text.StartsWith("lc", StringComparison.Ordinal) || text.StartsWith("l'm ", StringComparison.Ordinal)) || text.StartsWith("l am ", StringComparison.Ordinal)) // l > I { p.Text = pre + "I" + text.Substring(1); } else { p.Text = pre + char.ToUpper(firstLetter) + text.Substring(1); } } } } if (p.Text != null && p.Text.Contains(Environment.NewLine)) { var arr = p.Text.SplitToLines(); if (arr.Length == 2 && arr[1].Length > 1) { string text = arr[1]; string pre = string.Empty; if (text.Length > 4 && text.StartsWith("<i> ", StringComparison.Ordinal)) { pre = "<i> "; text = text.Substring(4); } if (text.Length > 3 && text.StartsWith("<i>", StringComparison.Ordinal)) { pre = "<i>"; text = text.Substring(3); } if (text.Length > 4 && text.StartsWith("<I> ", StringComparison.Ordinal)) { pre = "<I> "; text = text.Substring(4); } if (text.Length > 3 && text.StartsWith("<I>", StringComparison.Ordinal)) { pre = "<I>"; text = text.Substring(3); } if (text.Length > 2 && text.StartsWith('♪')) { pre = pre + "♪"; text = text.Substring(1); } if (text.Length > 2 && text.StartsWith(' ')) { pre = pre + " "; text = text.Substring(1); } if (text.Length > 2 && text.StartsWith('♫')) { pre = pre + "♫"; text = text.Substring(1); } if (text.Length > 2 && text.StartsWith(' ')) { pre = pre + " "; text = text.Substring(1); } char firstLetter = text[0]; string prevText = HtmlUtil.RemoveHtmlTags(arr[0]); bool isPrevEndOfLine = Helper.IsPreviousTextEndOfParagraph(prevText); if ((!text.StartsWith("www.", StringComparison.Ordinal) && !text.StartsWith("http:", StringComparison.Ordinal) && !text.StartsWith("https:", StringComparison.Ordinal)) && (char.IsLower(firstLetter) || Helper.IsTurkishLittleI(firstLetter, encoding, language)) && !prevText.EndsWith("...", StringComparison.Ordinal) && isPrevEndOfLine) { bool isMatchInKnowAbbreviations = language == "en" && (prevText.EndsWith(" o.r.", StringComparison.Ordinal) || prevText.EndsWith(" a.m.", StringComparison.Ordinal) || prevText.EndsWith(" p.m.", StringComparison.Ordinal)); if (!isMatchInKnowAbbreviations) { if (Helper.IsTurkishLittleI(firstLetter, encoding, language)) { text = pre + Helper.GetTurkishUppercaseLetter(firstLetter, encoding) + text.Substring(1); } else if (language == "en" && (text.StartsWith("l ", StringComparison.Ordinal) || text.StartsWith("l-I", StringComparison.Ordinal) || text.StartsWith("ls ") || text.StartsWith("lnterested") || text.StartsWith("lsn't ", StringComparison.Ordinal) || text.StartsWith("ldiot", StringComparison.Ordinal) || text.StartsWith("ln", StringComparison.Ordinal) || text.StartsWith("lm", StringComparison.Ordinal) || text.StartsWith("ls", StringComparison.Ordinal) || text.StartsWith("lt", StringComparison.Ordinal) || text.StartsWith("lf ", StringComparison.Ordinal) || text.StartsWith("lc", StringComparison.Ordinal) || text.StartsWith("l'm ", StringComparison.Ordinal)) || text.StartsWith("l am ", StringComparison.Ordinal)) // l > I { text = pre + "I" + text.Substring(1); } else { text = pre + char.ToUpper(firstLetter) + text.Substring(1); } p.Text = arr[0] + Environment.NewLine + text; } } arr = p.Text.SplitToLines(); if ((arr[0].StartsWith('-') || arr[0].StartsWith("<i>-", StringComparison.Ordinal)) && (arr[1].StartsWith('-') || arr[1].StartsWith("<i>-", StringComparison.Ordinal)) && !arr[0].StartsWith("--", StringComparison.Ordinal) && !arr[0].StartsWith("<i>--", StringComparison.Ordinal) && !arr[1].StartsWith("--", StringComparison.Ordinal) && !arr[1].StartsWith("<i>--", StringComparison.Ordinal)) { if (isPrevEndOfLine && arr[1].StartsWith("<i>- ", StringComparison.Ordinal) && arr[1].Length > 6) { p.Text = arr[0] + Environment.NewLine + "<i>- " + char.ToUpper(arr[1][5]) + arr[1].Remove(0, 6); } else if (isPrevEndOfLine && arr[1].StartsWith("- ", StringComparison.Ordinal) && arr[1].Length > 3) { p.Text = arr[0] + Environment.NewLine + "- " + char.ToUpper(arr[1][2]) + arr[1].Remove(0, 3); } arr = p.Text.SplitToLines(); prevText = " ."; if (prev != null && p.StartTime.TotalMilliseconds - 10000 < prev.EndTime.TotalMilliseconds) { prevText = HtmlUtil.RemoveHtmlTags(prev.Text); } bool isPrevLineEndOfLine = Helper.IsPreviousTextEndOfParagraph(prevText); if (isPrevLineEndOfLine && arr[0].StartsWith("<i>- ", StringComparison.Ordinal) && arr[0].Length > 6) { p.Text = "<i>- " + char.ToUpper(arr[0][5]) + arr[0].Remove(0, 6) + Environment.NewLine + arr[1]; } else if (isPrevLineEndOfLine && arr[0].StartsWith("- ", StringComparison.Ordinal) && arr[0].Length > 3) { p.Text = "- " + char.ToUpper(arr[0][2]) + arr[0].Remove(0, 3) + Environment.NewLine + arr[1]; } } } } if (p.Text != null && p.Text.Length > 4) { int len = 0; int indexOfNewLine = p.Text.IndexOf(Environment.NewLine + " -", 1, StringComparison.Ordinal); if (indexOfNewLine < 0) { indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "- <i> ♪", 1, StringComparison.Ordinal); len = "- <i> ♪".Length; } if (indexOfNewLine < 0) { indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "-", 1, StringComparison.Ordinal); len = "-".Length; } if (indexOfNewLine < 0) { indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "<i>-", 1, StringComparison.Ordinal); len = "<i>-".Length; } if (indexOfNewLine < 0) { indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "<i> -", 1, StringComparison.Ordinal); len = "<i> -".Length; } if (indexOfNewLine < 0) { indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "♪ -", 1, StringComparison.Ordinal); len = "♪ -".Length; } if (indexOfNewLine < 0) { indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "♪ <i> -", 1, StringComparison.Ordinal); len = "♪ <i> -".Length; } if (indexOfNewLine < 0) { indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "♪ <i>-", 1, StringComparison.Ordinal); len = "♪ <i>-".Length; } if (indexOfNewLine > 0) { string text = p.Text.Substring(indexOfNewLine + len); var st = new StripableText(text); if (st.StrippedText.Length > 0 && Helper.IsTurkishLittleI(st.StrippedText[0], encoding, language) && !st.Pre.EndsWith('[') && !st.Pre.Contains("...")) { text = st.Pre + Helper.GetTurkishUppercaseLetter(st.StrippedText[0], encoding) + st.StrippedText.Substring(1) + st.Post; p.Text = p.Text.Remove(indexOfNewLine + len).Insert(indexOfNewLine + len, text); } else if (st.StrippedText.Length > 0 && st.StrippedText[0] != char.ToUpper(st.StrippedText[0]) && !st.Pre.EndsWith('[') && !st.Pre.Contains("...")) { text = st.Pre + char.ToUpper(st.StrippedText[0]) + st.StrippedText.Substring(1) + st.Post; p.Text = p.Text.Remove(indexOfNewLine + len).Insert(indexOfNewLine + len, text); } } } return(p.Text); }
public void Fix(Subtitle subtitle, IFixCallbacks callbacks) { var language = Configuration.Settings.Language.FixCommonErrors; string fixAction = language.StartWithUppercaseLetterAfterColon; int noOfFixes = 0; for (int i = 0; i < subtitle.Paragraphs.Count; i++) { var p = new Paragraph(subtitle.Paragraphs[i]); Paragraph last = subtitle.GetParagraphOrDefault(i - 1); string oldText = p.Text; int skipCount = 0; if (last != null) { string lastText = HtmlUtil.RemoveHtmlTags(last.Text); if (lastText.EndsWith(':') || lastText.EndsWith(';')) { var st = new StripableText(p.Text); if (st.StrippedText.Length > 0 && st.StrippedText[0] != char.ToUpper(st.StrippedText[0])) p.Text = st.Pre + char.ToUpper(st.StrippedText[0]) + st.StrippedText.Substring(1) + st.Post; } } if (oldText.Contains(new[] { ':', ';' })) { bool lastWasColon = false; for (int j = 0; j < p.Text.Length; j++) { var s = p.Text[j]; if (s == ':' || s == ';') { lastWasColon = true; } else if (lastWasColon) { var startFromJ = p.Text.Substring(j); if (skipCount > 0) skipCount--; else if (startFromJ.StartsWith("<i>", StringComparison.OrdinalIgnoreCase)) skipCount = 2; else if (startFromJ.StartsWith("<b>", StringComparison.OrdinalIgnoreCase)) skipCount = 2; else if (startFromJ.StartsWith("<u>", StringComparison.OrdinalIgnoreCase)) skipCount = 2; else if (startFromJ.StartsWith("<font ", StringComparison.OrdinalIgnoreCase) && p.Text.Substring(j).Contains('>')) skipCount = startFromJ.IndexOf('>') - startFromJ.IndexOf("<font ", StringComparison.OrdinalIgnoreCase); else if (Helper.IsTurkishLittleI(s, callbacks.Encoding, callbacks.Language)) { p.Text = p.Text.Remove(j, 1).Insert(j, Helper.GetTurkishUppercaseLetter(s, callbacks.Encoding).ToString(CultureInfo.InvariantCulture)); lastWasColon = false; } else if (char.IsLower(s)) { // iPhone bool change = true; if (s == 'i' && p.Text.Length > j + 1) { if (p.Text[j + 1] == char.ToUpper(p.Text[j + 1])) change = false; } if (change) p.Text = p.Text.Remove(j, 1).Insert(j, char.ToUpper(s).ToString(CultureInfo.InvariantCulture)); lastWasColon = false; } else if (!(" " + Environment.NewLine).Contains(s)) lastWasColon = false; } } } if (oldText != p.Text && callbacks.AllowFix(p, fixAction)) { noOfFixes++; subtitle.Paragraphs[i].Text = p.Text; callbacks.AddFixToListView(p, fixAction, oldText, p.Text); } } callbacks.UpdateFixStatus(noOfFixes, language.StartWithUppercaseLetterAfterColon, noOfFixes.ToString(CultureInfo.InvariantCulture)); }
public static string FixStartWithUppercaseLetterAfterParagraph(Paragraph p, Paragraph prev, Encoding encoding, string language) { if (p.Text != null && p.Text.Length > 1) { string text = p.Text; string pre = string.Empty; if (text.Length > 4 && text.StartsWith("<i> ", StringComparison.Ordinal)) { pre = "<i> "; text = text.Substring(4); } if (text.Length > 3 && text.StartsWith("<i>", StringComparison.Ordinal)) { pre = "<i>"; text = text.Substring(3); } if (text.Length > 4 && text.StartsWith("<I> ", StringComparison.Ordinal)) { pre = "<I> "; text = text.Substring(4); } if (text.Length > 3 && text.StartsWith("<I>", StringComparison.Ordinal)) { pre = "<I>"; text = text.Substring(3); } if (text.Length > 2 && text.StartsWith('♪')) { pre = pre + "♪"; text = text.Substring(1); } if (text.Length > 2 && text.StartsWith(' ')) { pre = pre + " "; text = text.Substring(1); } if (text.Length > 2 && text.StartsWith('♫')) { pre = pre + "♫"; text = text.Substring(1); } if (text.Length > 2 && text.StartsWith(' ')) { pre = pre + " "; text = text.Substring(1); } var firstLetter = text[0]; string prevText = " ."; if (prev != null) prevText = HtmlUtil.RemoveHtmlTags(prev.Text); bool isPrevEndOfLine = FixCommonErrorsHelper.IsPreviousTextEndOfParagraph(prevText); if (prevText == " .") isPrevEndOfLine = true; if ((!text.StartsWith("www.", StringComparison.Ordinal) && !text.StartsWith("http:", StringComparison.Ordinal) && !text.StartsWith("https:", StringComparison.Ordinal)) && (char.IsLower(firstLetter) || IsTurkishLittleI(firstLetter, encoding, language)) && !char.IsDigit(firstLetter) && isPrevEndOfLine) { bool isMatchInKnowAbbreviations = language == "en" && (prevText.EndsWith(" o.r.", StringComparison.Ordinal) || prevText.EndsWith(" a.m.", StringComparison.Ordinal) || prevText.EndsWith(" p.m.", StringComparison.Ordinal)); if (!isMatchInKnowAbbreviations) { if (IsTurkishLittleI(firstLetter, encoding, language)) p.Text = pre + GetTurkishUppercaseLetter(firstLetter, encoding) + text.Substring(1); else if (language == "en" && (text.StartsWith("l ", StringComparison.Ordinal) || text.StartsWith("l-I", StringComparison.Ordinal) || text.StartsWith("ls ", StringComparison.Ordinal) || text.StartsWith("lnterested") || text.StartsWith("lsn't ", StringComparison.Ordinal) || text.StartsWith("ldiot", StringComparison.Ordinal) || text.StartsWith("ln", StringComparison.Ordinal) || text.StartsWith("lm", StringComparison.Ordinal) || text.StartsWith("ls", StringComparison.Ordinal) || text.StartsWith("lt", StringComparison.Ordinal) || text.StartsWith("lf ", StringComparison.Ordinal) || text.StartsWith("lc", StringComparison.Ordinal) || text.StartsWith("l'm ", StringComparison.Ordinal)) || text.StartsWith("l am ", StringComparison.Ordinal)) // l > I p.Text = pre + "I" + text.Substring(1); else p.Text = pre + char.ToUpper(firstLetter) + text.Substring(1); } } } if (p.Text != null && p.Text.Contains(Environment.NewLine)) { var arr = p.Text.SplitToLines(); if (arr.Length == 2 && arr[1].Length > 1) { string text = arr[1]; string pre = string.Empty; if (text.Length > 4 && text.StartsWith("<i> ", StringComparison.Ordinal)) { pre = "<i> "; text = text.Substring(4); } if (text.Length > 3 && text.StartsWith("<i>", StringComparison.Ordinal)) { pre = "<i>"; text = text.Substring(3); } if (text.Length > 4 && text.StartsWith("<I> ", StringComparison.Ordinal)) { pre = "<I> "; text = text.Substring(4); } if (text.Length > 3 && text.StartsWith("<I>", StringComparison.Ordinal)) { pre = "<I>"; text = text.Substring(3); } if (text.Length > 2 && text.StartsWith('♪')) { pre = pre + "♪"; text = text.Substring(1); } if (text.Length > 2 && text.StartsWith(' ')) { pre = pre + " "; text = text.Substring(1); } if (text.Length > 2 && text.StartsWith('♫')) { pre = pre + "♫"; text = text.Substring(1); } if (text.Length > 2 && text.StartsWith(' ')) { pre = pre + " "; text = text.Substring(1); } char firstLetter = text[0]; string prevText = HtmlUtil.RemoveHtmlTags(arr[0]); bool isPrevEndOfLine = FixCommonErrorsHelper.IsPreviousTextEndOfParagraph(prevText); if ((!text.StartsWith("www.", StringComparison.Ordinal) && !text.StartsWith("http:", StringComparison.Ordinal) && !text.StartsWith("https:", StringComparison.Ordinal)) && (char.IsLower(firstLetter) || IsTurkishLittleI(firstLetter, encoding, language)) && !prevText.EndsWith("...", StringComparison.Ordinal) && isPrevEndOfLine) { bool isMatchInKnowAbbreviations = language == "en" && (prevText.EndsWith(" o.r.", StringComparison.Ordinal) || prevText.EndsWith(" a.m.", StringComparison.Ordinal) || prevText.EndsWith(" p.m.", StringComparison.Ordinal)); if (!isMatchInKnowAbbreviations) { if (IsTurkishLittleI(firstLetter, encoding, language)) text = pre + GetTurkishUppercaseLetter(firstLetter, encoding) + text.Substring(1); else if (language == "en" && (text.StartsWith("l ", StringComparison.Ordinal) || text.StartsWith("l-I", StringComparison.Ordinal) || text.StartsWith("ls ") || text.StartsWith("lnterested") || text.StartsWith("lsn't ", StringComparison.Ordinal) || text.StartsWith("ldiot", StringComparison.Ordinal) || text.StartsWith("ln", StringComparison.Ordinal) || text.StartsWith("lm", StringComparison.Ordinal) || text.StartsWith("ls", StringComparison.Ordinal) || text.StartsWith("lt", StringComparison.Ordinal) || text.StartsWith("lf ", StringComparison.Ordinal) || text.StartsWith("lc", StringComparison.Ordinal) || text.StartsWith("l'm ", StringComparison.Ordinal)) || text.StartsWith("l am ", StringComparison.Ordinal)) // l > I text = pre + "I" + text.Substring(1); else text = pre + char.ToUpper(firstLetter) + text.Substring(1); p.Text = arr[0] + Environment.NewLine + text; } } arr = p.Text.SplitToLines(); if ((arr[0].StartsWith('-') || arr[0].StartsWith("<i>-", StringComparison.Ordinal)) && (arr[1].StartsWith('-') || arr[1].StartsWith("<i>-", StringComparison.Ordinal)) && !arr[0].StartsWith("--", StringComparison.Ordinal) && !arr[0].StartsWith("<i>--", StringComparison.Ordinal) && !arr[1].StartsWith("--", StringComparison.Ordinal) && !arr[1].StartsWith("<i>--", StringComparison.Ordinal)) { if (isPrevEndOfLine && arr[1].StartsWith("<i>- ", StringComparison.Ordinal) && arr[1].Length > 6) { p.Text = arr[0] + Environment.NewLine + "<i>- " + char.ToUpper(arr[1][5]) + arr[1].Remove(0, 6); } else if (isPrevEndOfLine && arr[1].StartsWith("- ", StringComparison.Ordinal) && arr[1].Length > 3) { p.Text = arr[0] + Environment.NewLine + "- " + char.ToUpper(arr[1][2]) + arr[1].Remove(0, 3); } arr = p.Text.SplitToLines(); prevText = " ."; if (prev != null && p.StartTime.TotalMilliseconds - 10000 < prev.EndTime.TotalMilliseconds) prevText = HtmlUtil.RemoveHtmlTags(prev.Text); bool isPrevLineEndOfLine = FixCommonErrorsHelper.IsPreviousTextEndOfParagraph(prevText); if (isPrevLineEndOfLine && arr[0].StartsWith("<i>- ", StringComparison.Ordinal) && arr[0].Length > 6) { p.Text = "<i>- " + char.ToUpper(arr[0][5]) + arr[0].Remove(0, 6) + Environment.NewLine + arr[1]; } else if (isPrevLineEndOfLine && arr[0].StartsWith("- ", StringComparison.Ordinal) && arr[0].Length > 3) { p.Text = "- " + char.ToUpper(arr[0][2]) + arr[0].Remove(0, 3) + Environment.NewLine + arr[1]; } } } } if (p.Text.Length > 4) { int len = 0; int indexOfNewLine = p.Text.IndexOf(Environment.NewLine + " -", 1, StringComparison.Ordinal); if (indexOfNewLine < 0) { indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "- <i> ♪", 1, StringComparison.Ordinal); len = "- <i> ♪".Length; } if (indexOfNewLine < 0) { indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "-", 1, StringComparison.Ordinal); len = "-".Length; } if (indexOfNewLine < 0) { indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "<i>-", 1, StringComparison.Ordinal); len = "<i>-".Length; } if (indexOfNewLine < 0) { indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "<i> -", 1, StringComparison.Ordinal); len = "<i> -".Length; } if (indexOfNewLine < 0) { indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "♪ -", 1, StringComparison.Ordinal); len = "♪ -".Length; } if (indexOfNewLine < 0) { indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "♪ <i> -", 1, StringComparison.Ordinal); len = "♪ <i> -".Length; } if (indexOfNewLine < 0) { indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "♪ <i>-", 1, StringComparison.Ordinal); len = "♪ <i>-".Length; } if (indexOfNewLine > 0) { string text = p.Text.Substring(indexOfNewLine + len); var st = new StripableText(text); if (st.StrippedText.Length > 0 && IsTurkishLittleI(st.StrippedText[0], encoding, language) && !st.Pre.EndsWith('[') && !st.Pre.Contains("...")) { text = st.Pre + GetTurkishUppercaseLetter(st.StrippedText[0], encoding) + st.StrippedText.Substring(1) + st.Post; p.Text = p.Text.Remove(indexOfNewLine + len).Insert(indexOfNewLine + len, text); } else if (st.StrippedText.Length > 0 && st.StrippedText[0] != char.ToUpper(st.StrippedText[0]) && !st.Pre.EndsWith('[') && !st.Pre.Contains("...")) { text = st.Pre + char.ToUpper(st.StrippedText[0]) + st.StrippedText.Substring(1) + st.Post; p.Text = p.Text.Remove(indexOfNewLine + len).Insert(indexOfNewLine + len, text); } } } return p.Text; }
public string RemoveInterjections(string text) { if (_interjectionList == null) { var interjectionList = new HashSet<string>(); foreach (var s in Configuration.Settings.Tools.Interjections.Split(new[] { ';' }, StringSplitOptions.RemoveEmptyEntries)) { if (s.Length > 0) { interjectionList.Add(s); var upper = s.ToUpper(); interjectionList.Add(upper); var lower = s.ToLower(); interjectionList.Add(lower); interjectionList.Add(lower.CapitalizeFirstLetter()); } } _interjectionList = new List<string>(interjectionList); interjectionList.Clear(); interjectionList.TrimExcess(); _interjectionList.Sort(CompareLength); } string oldText = text; bool doRepeat = true; while (doRepeat) { doRepeat = false; foreach (string s in _interjectionList) { if (text.Contains(s)) { var regex = new Regex("\\b" + Regex.Escape(s) + "\\b"); var match = regex.Match(text); if (match.Success) { int index = match.Index; string temp = text.Remove(index, s.Length); if (temp.Remove(0, index) == " —" && temp.EndsWith("— —", StringComparison.Ordinal)) { temp = temp.Remove(temp.Length - 3); if (temp.EndsWith(Environment.NewLine + "—", StringComparison.Ordinal)) temp = temp.Remove(temp.Length - 1).TrimEnd(); } else if (temp.Remove(0, index) == " —" && temp.EndsWith("- —", StringComparison.Ordinal)) { temp = temp.Remove(temp.Length - 3); if (temp.EndsWith(Environment.NewLine + "-", StringComparison.Ordinal)) temp = temp.Remove(temp.Length - 1).TrimEnd(); } else if (index == 2 && temp.StartsWith("- —", StringComparison.Ordinal)) { temp = temp.Remove(2, 2); } else if (index == 2 && temp.StartsWith("- —", StringComparison.Ordinal)) { temp = temp.Remove(2, 1); } else if (index == 0 && temp.StartsWith(" —", StringComparison.Ordinal)) { temp = temp.Remove(0, 2); } else if (index == 0 && temp.StartsWith('—')) { temp = temp.Remove(0, 1); } string pre = string.Empty; if (index > 0) doRepeat = true; bool removeAfter = true; if (index > s.Length) { if (temp.Length > index - s.Length + 3) { int subIndex = index - s.Length + 1; string subTemp = temp.Substring(subIndex, 3); if (subTemp == ", !" || subTemp == ", ?" || subTemp == ", .") { temp = temp.Remove(subIndex, 2); removeAfter = false; } } if (removeAfter && temp.Length > index - s.Length + 2) { int subIndex = index - s.Length; string subTemp = temp.Substring(subIndex, 3); if (subTemp == ", !" || subTemp == ", ?" || subTemp == ", .") { temp = temp.Remove(subIndex, 2); removeAfter = false; } else { subTemp = temp.Substring(subIndex); if (subTemp.StartsWith(", -—", StringComparison.Ordinal)) { temp = temp.Remove(subIndex, 3); removeAfter = false; } else if (subTemp.StartsWith(", --", StringComparison.Ordinal)) { temp = temp.Remove(subIndex, 2); removeAfter = false; } } } if (removeAfter && temp.Length > index - s.Length + 2) { int subIndex = index - s.Length + 1; string subTemp = temp.Substring(subIndex, 2); if (subTemp == "-!" || subTemp == "-?" || subTemp == "-.") { temp = temp.Remove(subIndex, 1); removeAfter = false; } } } if (index > 3 && index - 2 < temp.Length) { string subTemp = temp.Substring(index - 2); if (subTemp.StartsWith(", —", StringComparison.Ordinal) || subTemp.StartsWith(", —", StringComparison.Ordinal)) { temp = temp.Remove(index - 2, 1); index--; } } if (removeAfter) { if (index == 0) { if (temp.StartsWith('-')) temp = temp.Remove(0, 1).Trim(); } else if (index == 3 && temp.StartsWith("<i>-", StringComparison.Ordinal)) { temp = temp.Remove(3, 1); } else if (index > 0 && temp.Length > index) { pre = text.Substring(0, index); temp = temp.Remove(0, index); if (temp.StartsWith('-') && pre.EndsWith('-')) temp = temp.Remove(0, 1); if (temp.StartsWith('-') && pre.EndsWith("- ", StringComparison.Ordinal)) temp = temp.Remove(0, 1); } while (temp.Length > 0 && " ,.?!".Contains(temp[0])) { temp = temp.Remove(0, 1); doRepeat = true; } if (temp.Length > 0 && s[0].ToString(CultureInfo.InvariantCulture) != s[0].ToString(CultureInfo.InvariantCulture).ToLower()) { temp = char.ToUpper(temp[0]) + temp.Substring(1); doRepeat = true; } if (temp.StartsWith('-') && pre.EndsWith(' ')) temp = temp.Remove(0, 1); if (temp.StartsWith('—') && pre.EndsWith(',')) pre = pre.TrimEnd(',') + " "; temp = pre + temp; } if (temp.EndsWith(Environment.NewLine + "- ", StringComparison.Ordinal)) temp = temp.Remove(temp.Length - 2).TrimEnd(); var st = new StripableText(temp); if (st.StrippedText.Length == 0) return string.Empty; if (temp.StartsWith('-') && !temp.Contains(Environment.NewLine) && text.Contains(Environment.NewLine)) temp = temp.Remove(0, 1).Trim(); text = temp; } } } } var lines = text.SplitToLines(); if (lines.Length == 2 && text != oldText) { if (lines[0] == "-" && lines[1] == "-") return string.Empty; if (lines[0].Length > 1 && lines[0][0] == '-' && lines[1].Trim() == "-") return lines[0].Remove(0, 1).Trim(); if (lines[1].Length > 1 && lines[1][0] == '-' && lines[0].Trim() == "-") return lines[1].Remove(0, 1).Trim(); if (lines[1].Length > 4 && lines[1].StartsWith("<i>-", StringComparison.Ordinal) && lines[0].Trim() == "-") return "<i>" + lines[1].Remove(0, 4).Trim(); if (lines[0].Length > 1 && lines[1] == "-" || lines[1] == "." || lines[1] == "!" || lines[1] == "?") { if (lines[0].StartsWith('-') && oldText.Contains(Environment.NewLine + "-")) lines[0] = lines[0].Remove(0, 1); return lines[0].Trim(); } var noTags0 = HtmlUtil.RemoveHtmlTags(lines[0], false).Trim(); var noTags1 = HtmlUtil.RemoveHtmlTags(lines[1], false).Trim(); if (noTags0 == "-") { if (noTags1 == noTags0) return string.Empty; if (lines[1].Length > 1 && lines[1][0] == '-') return lines[1].Remove(0, 1).Trim(); if (lines[1].Length > 4 && lines[1].StartsWith("<i>-", StringComparison.Ordinal)) return "<i>" + lines[1].Remove(0, 4).Trim(); return lines[1]; } if (noTags1 == "-") { if (lines[0].Length > 1 && lines[0][0] == '-') return lines[0].Remove(0, 1).Trim(); if (lines[0].Length > 4 && lines[0].StartsWith("<i>-", StringComparison.Ordinal)) return "<i>" + lines[0].Remove(0, 4).Trim(); return lines[0]; } } if (lines.Length == 2) { if (string.IsNullOrWhiteSpace(lines[1].Replace(".", string.Empty).Replace("?", string.Empty).Replace("!", string.Empty).Replace("-", string.Empty).Replace("—", string.Empty))) { text = lines[0]; lines = text.SplitToLines(); } else if (string.IsNullOrWhiteSpace(lines[0].Replace(".", string.Empty).Replace("?", string.Empty).Replace("!", string.Empty).Replace("-", string.Empty).Replace("—", string.Empty))) { text = lines[1]; lines = text.SplitToLines(); } } if (lines.Length == 1 && text != oldText && Utilities.GetNumberOfLines(oldText) == 2) { if ((oldText.StartsWith('-') || oldText.StartsWith("<i>-", StringComparison.Ordinal)) && (oldText.Contains("." + Environment.NewLine) || oldText.Contains(".</i>" + Environment.NewLine) || oldText.Contains("!" + Environment.NewLine) || oldText.Contains("!</i>" + Environment.NewLine) || oldText.Contains("?" + Environment.NewLine) || oldText.Contains("?</i>" + Environment.NewLine))) { if (text.StartsWith("<i>-", StringComparison.Ordinal)) text = "<i>" + text.Remove(0, 4).TrimStart(); else text = text.TrimStart('-').TrimStart(); } else if ((oldText.Contains(Environment.NewLine + "-") || oldText.Contains(Environment.NewLine + "<i>-")) && (oldText.Contains("." + Environment.NewLine) || oldText.Contains(".</i>" + Environment.NewLine) || oldText.Contains("!" + Environment.NewLine) || oldText.Contains("!</i>" + Environment.NewLine) || oldText.Contains("?" + Environment.NewLine) || oldText.Contains("?</i>" + Environment.NewLine))) { if (text.StartsWith("<i>-", StringComparison.Ordinal)) text = "<i>" + text.Remove(0, 4).TrimStart(); else text = text.TrimStart('-').TrimStart(); } } return text; }
private void GeneratePreview() { Cursor = Cursors.WaitCursor; listViewFixes.BeginUpdate(); listViewFixes.Items.Clear(); foreach (Paragraph p in _subtitle.Paragraphs) { string text = p.Text; foreach (ListViewItem item in listViewNames.Items) { string name = item.SubItems[1].Text; string textNoTags = HtmlUtil.RemoveHtmlTags(text); if (textNoTags != textNoTags.ToUpper()) { if (item.Checked && text != null && text.Contains(name, StringComparison.OrdinalIgnoreCase) && name.Length > 1 && name != name.ToLower()) { var st = new StripableText(text); st.FixCasing(new List<string> { name }, true, false, false, string.Empty); text = st.MergedString; } } } if (text != p.Text) AddToPreviewListView(p, text); } listViewFixes.EndUpdate(); groupBoxLinesFound.Text = string.Format(Configuration.Settings.Language.ChangeCasingNames.LinesFoundX, listViewFixes.Items.Count); Cursor = Cursors.Default; }
private string FixCasing(string text, string lastLine, List<string> namesEtc) { string original = text; if (radioButtonNormal.Checked) { if (checkBoxOnlyAllUpper.Checked && text != text.ToUpper()) return text; if (text.Length > 1) { // first all to lower text = text.ToLower().Trim(); text = text.FixExtraSpaces(); var st = new StripableText(text); st.FixCasing(namesEtc, false, true, true, lastLine); // fix all casing but names (that's a seperate option) text = st.MergedString; } } else if (radioButtonUppercase.Checked) { var st = new StripableText(text); text = st.Pre + st.StrippedText.ToUpper() + st.Post; text = HtmlUtil.FixUpperTags(text); // tags inside text } else if (radioButtonLowercase.Checked) { text = text.ToLower(); } if (original != text) _noOfLinesChanged++; return text; }
private string FixLowercaseIToUppercaseI(string input, string lastLine) { var sb = new StringBuilder(); string[] lines = input.Split(Environment.NewLine.ToCharArray(), StringSplitOptions.RemoveEmptyEntries); for (int i = 0; i < lines.Length; i++) { string l = lines[i]; if (i > 0) lastLine = lines[i - 1]; lastLine = Utilities.RemoveHtmlTags(lastLine); if (string.IsNullOrEmpty(lastLine) || lastLine.EndsWith(".", StringComparison.Ordinal) || lastLine.EndsWith("!", StringComparison.Ordinal) || lastLine.EndsWith("?", StringComparison.Ordinal)) { var st = new StripableText(l); if (st.StrippedText.StartsWith("i", StringComparison.Ordinal) && !st.Pre.EndsWith("[", StringComparison.Ordinal) && !st.Pre.EndsWith("(", StringComparison.Ordinal) && !st.Pre.EndsWith("...", StringComparison.Ordinal)) { if (string.IsNullOrEmpty(lastLine) || (!lastLine.EndsWith("...", StringComparison.Ordinal) && !EndsWithAbbreviation(lastLine, _abbreviationList))) l = st.Pre + "I" + st.StrippedText.Remove(0, 1) + st.Post; } } sb.AppendLine(l); } return sb.ToString().TrimEnd('\r').TrimEnd('\n').TrimEnd('\r').TrimEnd('\n'); }
public string FixOcrErrorsViaHardcodedRules(string input, string lastLine, HashSet<string> abbreviationList) { if (!Configuration.Settings.Tools.OcrFixUseHardcodedRules) return input; input = input.Replace(",...", "..."); if (input.StartsWith("..") && !input.StartsWith("...")) input = "." + input; string pre = string.Empty; if (input.StartsWith("- ", StringComparison.Ordinal)) { pre = "- "; input = input.Remove(0, 2); } else if (input.StartsWith("-", StringComparison.Ordinal)) { pre = "-"; input = input.Remove(0, 1); } bool hasDotDot = input.Contains("..") || input.Contains(". ."); if (hasDotDot) { if (input.Length > 5 && input.StartsWith("..") && Utilities.AllLettersAndNumbers.Contains(input.Substring(2, 1))) input = "..." + input.Remove(0, 2); if (input.Length > 7 && input.StartsWith("<i>..") && Utilities.AllLettersAndNumbers.Contains(input.Substring(5, 1))) input = "<i>..." + input.Remove(0, 5); if (input.Length > 5 && input.StartsWith(".. ") && Utilities.AllLettersAndNumbers.Contains(input.Substring(3, 1))) input = "..." + input.Remove(0, 3); if (input.Length > 7 && input.StartsWith("<i>.. ") && Utilities.AllLettersAndNumbers.Contains(input.Substring(6, 1))) input = "<i>..." + input.Remove(0, 6); if (input.Contains(Environment.NewLine + ".. ")) input = input.Replace(Environment.NewLine + ".. ", Environment.NewLine + "..."); if (input.Contains(Environment.NewLine + "<i>.. ")) input = input.Replace(Environment.NewLine + "<i>.. ", Environment.NewLine + "<i>..."); if (input.StartsWith(". ..")) input = "..." + input.Remove(0, 4); if (input.StartsWith(".. .")) input = "..." + input.Remove(0, 4); if (input.StartsWith(". . .")) input = "..." + input.Remove(0, 5); if (input.StartsWith("... ")) input = input.Remove(3, 1); } input = pre + input; if (hasDotDot) { if (input.StartsWith("<i>. ..")) input = "<i>..." + input.Remove(0, 7); if (input.StartsWith("<i>.. .")) input = "<i>..." + input.Remove(0, 7); if (input.StartsWith("<i>. . .")) input = "<i>..." + input.Remove(0, 8); if (input.StartsWith("<i>... ")) input = input.Remove(6, 1); if (input.StartsWith(". . <i>.")) input = "<i>..." + input.Remove(0, 8); if (input.StartsWith("...<i>") && (input.IndexOf("</i>", StringComparison.Ordinal) > input.IndexOf(" ", StringComparison.Ordinal))) input = "<i>..." + input.Remove(0, 6); if (input.EndsWith(". ..")) input = input.Remove(input.Length - 4, 4) + "..."; if (input.EndsWith(".. .")) input = input.Remove(input.Length - 4, 4) + "..."; if (input.EndsWith(". . .")) input = input.Remove(input.Length - 5, 5) + "..."; if (input.EndsWith(". ...")) input = input.Remove(input.Length - 5, 5) + "..."; if (input.EndsWith(". ..</i>")) input = input.Remove(input.Length - 8, 8) + "...</i>"; if (input.EndsWith(".. .</i>")) input = input.Remove(input.Length - 8, 8) + "...</i>"; if (input.EndsWith(". . .</i>")) input = input.Remove(input.Length - 9, 9) + "...</i>"; if (input.EndsWith(". ...</i>")) input = input.Remove(input.Length - 9, 9) + "...</i>"; if (input.EndsWith(".</i> . .")) input = input.Remove(input.Length - 9, 9) + "...</i>"; if (input.EndsWith(".</i>..")) input = input.Remove(input.Length - 7, 7) + "...</i>"; input = input.Replace(".</i> . ." + Environment.NewLine, "...</i>" + Environment.NewLine); input = input.Replace(".. ?", "..?"); input = input.Replace("..?", "...?"); input = input.Replace("....?", "...?"); input = input.Replace(".. !", "..!"); input = input.Replace("..!", "...!"); input = input.Replace("....!", "...!"); input = input.Replace("... ?", "...?"); input = input.Replace("... !", "...!"); input = input.Replace("....", "..."); input = input.Replace("....", "..."); if (input.StartsWith("- ...") && lastLine != null && lastLine.EndsWith("...") && !(input.Contains(Environment.NewLine + "-"))) input = input.Remove(0, 2); if (input.StartsWith("-...") && lastLine != null && lastLine.EndsWith("...") && !(input.Contains(Environment.NewLine + "-"))) input = input.Remove(0, 1); } if (input.Length > 2 && input[0] == '-' && Utilities.UppercaseLetters.Contains(input[1].ToString())) { input = input.Insert(1, " "); } if (input.Length > 5 && input.StartsWith("<i>-") && Utilities.UppercaseLetters.Contains(input[4].ToString())) { input = input.Insert(4, " "); } int idx = input.IndexOf(Environment.NewLine + "-", StringComparison.Ordinal); if (idx > 0 && idx + Environment.NewLine.Length + 1 < input.Length && Utilities.UppercaseLetters.Contains(input[idx + Environment.NewLine.Length + 1].ToString())) { input = input.Insert(idx + Environment.NewLine.Length + 1, " "); } idx = input.IndexOf(Environment.NewLine + "<i>-", StringComparison.Ordinal); if (idx > 0 && Utilities.UppercaseLetters.Contains(input[idx + Environment.NewLine.Length + 4].ToString())) { input = input.Insert(idx + Environment.NewLine.Length + 4, " "); } if (string.IsNullOrEmpty(lastLine) || lastLine.EndsWith(".") || lastLine.EndsWith("!") || lastLine.EndsWith("?") || lastLine.EndsWith("]") || lastLine.EndsWith("♪")) { lastLine = Utilities.RemoveHtmlTags(lastLine); var st = new StripableText(input); if (lastLine == null || (!lastLine.EndsWith("...") && !EndsWithAbbreviation(lastLine, abbreviationList))) { if (st.StrippedText.Length > 0 && st.StrippedText[0].ToString() != st.StrippedText[0].ToString().ToUpper() && !st.Pre.EndsWith("[") && !st.Pre.EndsWith("(") && !st.Pre.EndsWith("...")) { string uppercaseLetter = st.StrippedText[0].ToString().ToUpper(); if (st.StrippedText.Length > 1 && uppercaseLetter == "L" && "abcdfghjklmnpqrstvwxz".Contains(st.StrippedText[1].ToString())) uppercaseLetter = "I"; if ((st.StrippedText.StartsWith("lo ") || st.StrippedText == "lo.") && _threeLetterIsoLanguageName == "ita") uppercaseLetter = "I"; if ((st.StrippedText.StartsWith("k ") || st.StrippedText.StartsWith("m ") || st.StrippedText.StartsWith("n ") || st.StrippedText.StartsWith("r ") || st.StrippedText.StartsWith("s ") || st.StrippedText.StartsWith("t ")) && st.Pre.EndsWith("'") && _threeLetterIsoLanguageName == "nld") uppercaseLetter = st.StrippedText.Substring(0, 1); if ((st.StrippedText.StartsWith("l-I'll ") || st.StrippedText.StartsWith("l-l'll ")) && _threeLetterIsoLanguageName == "eng") { uppercaseLetter = "I"; st.StrippedText = "I-I" + st.StrippedText.Remove(0, 3); } st.StrippedText = st.StrippedText.Remove(0, 1).Insert(0, uppercaseLetter); input = st.Pre + st.StrippedText + st.Post; } } } // lines ending with ". should often end at ... (of no other quotes exists near by) if ((lastLine == null || !lastLine.Contains("\"")) && input != null && input.EndsWith("\".") && input.IndexOf('"') == input.LastIndexOf('"') && input.Length > 3) { string lastChar = input.Substring(input.Length - 3, 1); if (!"0123456789".Contains(lastChar)) { int position = input.Length - 2; input = input.Remove(position).Insert(position, "..."); } } // change '<number><space>1' to '<number>1' if (input.Contains("1")) { Match match = RegExNumber1.Match(input); while (match.Success) { bool doFix = true; if (match.Index + 4 < input.Length && input[match.Index + 3] == '/' && "0123456789".Contains(input[match.Index + 4].ToString())) doFix = false; if (doFix) { input = input.Substring(0, match.Index + 1) + input.Substring(match.Index + 2); match = RegExNumber1.Match(input); } else { match = RegExNumber1.Match(input, match.Index + 1); } } } // change '' to " input = input.Replace("''", "\""); // change 'sequeI of' to 'sequel of' if (input.Contains("I")) { var match = RegExUppercaseI.Match(input); while (match.Success) { bool doFix = true; if (match.Index >= 1 && input.Substring(match.Index - 1).StartsWith("Mc")) doFix = false; if (match.Index >= 2 && input.Substring(match.Index - 2).StartsWith("Mac")) doFix = false; if (doFix) input = input.Substring(0, match.Index + 1) + "l" + input.Substring(match.Index + 2); if (match.Index + 1 < input.Length) match = RegExUppercaseI.Match(input, match.Index + 1); else break; // end while } } // change 'NlCE' to 'NICE' if (input.Contains("l")) { var match = RegExLowercaseL.Match(input); while (match.Success) { input = input.Substring(0, match.Index + 1) + "I" + input.Substring(match.Index + 2); match = RegExLowercaseL.Match(input); } } return input; }
public void FixMissingPeriodsAtEndOfLine() { string fixAction = _language.FixMissingPeriodAtEndOfLine; int missigPeriodsAtEndOfLine = 0; for (int i = 0; i < Subtitle.Paragraphs.Count; i++) { Paragraph p = Subtitle.Paragraphs[i]; Paragraph next = Subtitle.GetParagraphOrDefault(i + 1); string nextText = string.Empty; if (next != null) nextText = HtmlUtil.RemoveHtmlTags(next.Text).TrimStart('-', '"', '„').TrimStart(); string tempNoHtml = HtmlUtil.RemoveHtmlTags(p.Text).TrimEnd(); if (IsOneLineUrl(p.Text) || p.Text.Contains(new[] { '♪', '♫' }) || p.Text.EndsWith('\'')) { // ignore urls } else if (!string.IsNullOrEmpty(nextText) && next != null && next.Text.Length > 0 && Utilities.UppercaseLetters.Contains(nextText[0]) && tempNoHtml.Length > 0 && !@",.!?:;>-])♪♫…".Contains(tempNoHtml[tempNoHtml.Length - 1])) { string tempTrimmed = tempNoHtml.TrimEnd().TrimEnd('\'', '"', '“', '”').TrimEnd(); if (tempTrimmed.Length > 0 && !@")]*#¶.!?".Contains(tempTrimmed[tempTrimmed.Length - 1]) && p.Text != p.Text.ToUpper()) { //don't end the sentence if the next word is an I word as they're always capped. if (!next.Text.StartsWith("I ", StringComparison.Ordinal) && !next.Text.StartsWith("I'", StringComparison.Ordinal)) { //test to see if the first word of the next line is a name if (!IsName(next.Text.Split(new[] { ' ', '.', ',', '-', '?', '!', ':', ';', '"', '(', ')', '[', ']', '{', '}', '|', '<', '>', '/', '+', '\r', '\n' })[0]) && AllowFix(p, fixAction)) { string oldText = p.Text; if (p.Text.EndsWith('>')) { int lastLessThan = p.Text.LastIndexOf('<'); if (lastLessThan > 0) p.Text = p.Text.Insert(lastLessThan, "."); } else { if (p.Text.EndsWith('“') && tempNoHtml.StartsWith('„')) p.Text = p.Text.TrimEnd('“') + ".“"; else if (p.Text.EndsWith('"') && tempNoHtml.StartsWith('"')) p.Text = p.Text.TrimEnd('"') + ".\""; else p.Text += "."; } if (p.Text != oldText) { missigPeriodsAtEndOfLine++; AddFixToListView(p, fixAction, oldText, p.Text); } } } } } else if (next != null && !string.IsNullOrEmpty(p.Text) && Utilities.AllLettersAndNumbers.Contains(p.Text[p.Text.Length - 1])) { if (p.Text != p.Text.ToUpper()) { var st = new StripableText(next.Text); if (st.StrippedText.Length > 0 && st.StrippedText != st.StrippedText.ToUpper() && Utilities.UppercaseLetters.Contains(st.StrippedText[0])) { if (AllowFix(p, fixAction)) { int j = p.Text.Length - 1; while (j >= 0 && !@".!?¿¡".Contains(p.Text[j])) j--; string endSign = "."; if (j >= 0 && p.Text[j] == '¿') endSign = "?"; if (j >= 0 && p.Text[j] == '¡') endSign = "!"; string oldText = p.Text; missigPeriodsAtEndOfLine++; p.Text += endSign; AddFixToListView(p, fixAction, oldText, p.Text); } } } } if (p.Text.Length > 4) { int indexOfNewLine = p.Text.IndexOf(Environment.NewLine + " -", 3, StringComparison.Ordinal); if (indexOfNewLine < 0) indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "-", 3, StringComparison.Ordinal); if (indexOfNewLine < 0) indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "<i>-", 3, StringComparison.Ordinal); if (indexOfNewLine < 0) indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "<i> -", 3, StringComparison.Ordinal); if (indexOfNewLine > 0 && Configuration.Settings.General.UppercaseLetters.Contains(char.ToUpper(p.Text[indexOfNewLine - 1])) && AllowFix(p, fixAction)) { string oldText = p.Text; string text = p.Text.Substring(0, indexOfNewLine); var st = new StripableText(text); if (st.Pre.TrimEnd().EndsWith('¿')) // Spanish ¿ p.Text = p.Text.Insert(indexOfNewLine, "?"); else if (st.Pre.TrimEnd().EndsWith('¡')) // Spanish ¡ p.Text = p.Text.Insert(indexOfNewLine, "!"); else p.Text = p.Text.Insert(indexOfNewLine, "."); missigPeriodsAtEndOfLine++; AddFixToListView(p, fixAction, oldText, p.Text); } } } UpdateFixStatus(missigPeriodsAtEndOfLine, _language.AddPeriods, _language.XPeriodsAdded); }
private void FixStartWithUppercaseLetterAfterColon() { string fixAction = _language.StartWithUppercaseLetterAfterColon; int noOfFixes = 0; listViewFixes.BeginUpdate(); for (int i = 0; i < Subtitle.Paragraphs.Count; i++) { Paragraph p = Subtitle.Paragraphs[i]; Paragraph last = Subtitle.GetParagraphOrDefault(i - 1); string oldText = p.Text; int skipCount = 0; if (last != null) { string lastText = HtmlUtil.RemoveHtmlTags(last.Text); if (lastText.EndsWith(':') || lastText.EndsWith(';')) { var st = new StripableText(p.Text); if (st.StrippedText.Length > 0 && st.StrippedText[0] != char.ToUpper(st.StrippedText[0])) p.Text = st.Pre + char.ToUpper(st.StrippedText[0]) + st.StrippedText.Substring(1) + st.Post; } } if (oldText.Contains(new[] { ':', ';' })) { bool lastWasColon = false; for (int j = 0; j < p.Text.Length; j++) { var s = p.Text[j]; if (s == ':' || s == ';') { lastWasColon = true; } else if (lastWasColon) { var startFromJ = p.Text.Substring(j); if (skipCount > 0) skipCount--; else if (startFromJ.StartsWith("<i>", StringComparison.OrdinalIgnoreCase)) skipCount = 2; else if (startFromJ.StartsWith("<b>", StringComparison.OrdinalIgnoreCase)) skipCount = 2; else if (startFromJ.StartsWith("<u>", StringComparison.OrdinalIgnoreCase)) skipCount = 2; else if (startFromJ.StartsWith("<font ", StringComparison.OrdinalIgnoreCase) && p.Text.Substring(j).Contains('>')) skipCount = startFromJ.IndexOf('>') - startFromJ.IndexOf("<font ", StringComparison.OrdinalIgnoreCase); else if (IsTurkishLittleI(s, _encoding, Language)) { p.Text = p.Text.Remove(j, 1).Insert(j, GetTurkishUppercaseLetter(s, _encoding).ToString(CultureInfo.InvariantCulture)); lastWasColon = false; } else if (char.IsLower(s)) { // iPhone bool change = true; if (s == 'i' && p.Text.Length > j + 1) { if (p.Text[j + 1] == char.ToUpper(p.Text[j + 1])) change = false; } if (change) p.Text = p.Text.Remove(j, 1).Insert(j, char.ToUpper(s).ToString(CultureInfo.InvariantCulture)); lastWasColon = false; } else if (!(" " + Environment.NewLine).Contains(s)) lastWasColon = false; } } } if (oldText != p.Text) { noOfFixes++; AddFixToListView(p, fixAction, oldText, p.Text); } } listViewFixes.EndUpdate(); if (noOfFixes > 0) { _totalFixes += noOfFixes; LogStatus(_language.StartWithUppercaseLetterAfterColon, noOfFixes.ToString(CultureInfo.InvariantCulture)); } }
public void FixUppercaseIInsideWords() { string fixAction = _language.FixUppercaseIInsideLowercaseWord; int uppercaseIsInsideLowercaseWords = 0; // bool isLineContinuation = false; for (int i = 0; i < Subtitle.Paragraphs.Count; i++) { Paragraph p = Subtitle.Paragraphs[i]; string oldText = p.Text; Match match = ReAfterLowercaseLetter.Match(p.Text); while (match.Success) { if (!(match.Index > 1 && p.Text.Substring(match.Index - 1, 2) == "Mc") // irish names, McDonalds etc. && p.Text[match.Index + 1] == 'I' && AllowFix(p, fixAction)) { p.Text = p.Text.Substring(0, match.Index + 1) + "l"; if (match.Index + 2 < oldText.Length) p.Text += oldText.Substring(match.Index + 2); uppercaseIsInsideLowercaseWords++; AddFixToListView(p, fixAction, oldText, p.Text); } match = match.NextMatch(); } var st = new StripableText(p.Text); match = ReBeforeLowercaseLetter.Match(st.StrippedText); while (match.Success) { string word = GetWholeWord(st.StrippedText, match.Index); if (!IsName(word)) { if (AllowFix(p, fixAction)) { if (word.Equals("internal", StringComparison.OrdinalIgnoreCase) || word.Equals("island", StringComparison.OrdinalIgnoreCase) || word.Equals("islands", StringComparison.OrdinalIgnoreCase)) { } else if (match.Index == 0) { // first letter in paragraph //too risky! - perhaps if periods is fixed at the same time... or too complicated!? //if (isLineContinuation) //{ // st.StrippedText = st.StrippedText.Remove(match.Index, 1).Insert(match.Index, "l"); // p.Text = st.MergedString; // uppercaseIsInsideLowercaseWords++; // AddFixToListView(p, fixAction, oldText, p.Text); //} } else { if (match.Index > 2 && st.StrippedText[match.Index - 1] == ' ') { if ((Utilities.AllLettersAndNumbers + @",").Contains(st.StrippedText[match.Index - 2]) && match.Length >= 2 && Utilities.LowercaseVowels.Contains(char.ToLower(match.Value[1]))) { st.StrippedText = st.StrippedText.Remove(match.Index, 1).Insert(match.Index, "l"); p.Text = st.MergedString; uppercaseIsInsideLowercaseWords++; AddFixToListView(p, fixAction, oldText, p.Text); } } else if (match.Index > Environment.NewLine.Length + 1 && Environment.NewLine.Contains(st.StrippedText[match.Index - 1])) { if ((Utilities.AllLettersAndNumbers + @",").Contains(st.StrippedText[match.Index - Environment.NewLine.Length + 1]) && match.Length >= 2 && Utilities.LowercaseVowels.Contains(match.Value[1])) { st.StrippedText = st.StrippedText.Remove(match.Index, 1).Insert(match.Index, "l"); p.Text = st.MergedString; uppercaseIsInsideLowercaseWords++; AddFixToListView(p, fixAction, oldText, p.Text); } } else if (match.Index > 1 && ((st.StrippedText[match.Index - 1] == '\"') || (st.StrippedText[match.Index - 1] == '\'') || (st.StrippedText[match.Index - 1] == '>') || (st.StrippedText[match.Index - 1] == '-'))) { } else { var before = '\0'; var after = '\0'; if (match.Index > 0) before = st.StrippedText[match.Index - 1]; if (match.Index < st.StrippedText.Length - 2) after = st.StrippedText[match.Index + 1]; if (before != '\0' && char.IsUpper(before) && after != '\0' && char.IsLower(after) && !Utilities.LowercaseVowels.Contains(char.ToLower(before)) && !Utilities.LowercaseVowels.Contains(after)) { st.StrippedText = st.StrippedText.Remove(match.Index, 1).Insert(match.Index, "i"); p.Text = st.MergedString; uppercaseIsInsideLowercaseWords++; AddFixToListView(p, fixAction, oldText, p.Text); } else if (@"‘’¡¿„“()[]♪'. @".Contains(before) && !Utilities.LowercaseVowels.Contains(char.ToLower(after))) { } else { st.StrippedText = st.StrippedText.Remove(match.Index, 1).Insert(match.Index, "l"); p.Text = st.MergedString; uppercaseIsInsideLowercaseWords++; AddFixToListView(p, fixAction, oldText, p.Text); } } } } } match = match.NextMatch(); } //isLineContinuation = p.Text.Length > 0 && Utilities.GetLetters(true, true, false).Contains(p.Text[p.Text.Length - 1].ToString()); } UpdateFixStatus(uppercaseIsInsideLowercaseWords, _language.FixUppercaseIInsindeLowercaseWords, _language.XUppercaseIsFoundInsideLowercaseWords); }
public void Fix(Subtitle subtitle, IFixCallbacks callbacks) { var language = Configuration.Settings.Language.FixCommonErrors; string fixAction = language.StartWithUppercaseLetterAfterPeriodInsideParagraph; int noOfFixes = 0; for (int i = 0; i < subtitle.Paragraphs.Count; i++) { Paragraph p = subtitle.Paragraphs[i]; string oldText = p.Text; var st = new StripableText(p.Text); if (p.Text.Length > 3) { string text = st.StrippedText.Replace(" ", " "); int start = text.IndexOfAny(ExpectedChars); while (start >= 0 && start < text.Length) { if (start > 0 && char.IsDigit(text[start - 1])) { // ignore periods after a number } else if (start + 4 < text.Length && text[start + 1] == ' ') { if (!IsAbbreviation(text, start, callbacks)) { var subText = new StripableText(text.Substring(start + 2)); if (subText.StrippedText.Length > 0 && Helper.IsTurkishLittleI(subText.StrippedText[0], callbacks.Encoding, callbacks.Language)) { if (subText.StrippedText.Length > 1 && !(subText.Pre.Contains('\'') && subText.StrippedText.StartsWith('s'))) { text = text.Substring(0, start + 2) + subText.Pre + Helper.GetTurkishUppercaseLetter(subText.StrippedText[0], callbacks.Encoding) + subText.StrippedText.Substring(1) + subText.Post; if (callbacks.AllowFix(p, fixAction)) { p.Text = st.Pre + text + st.Post; } } } else if (subText.StrippedText.Length > 0 && Configuration.Settings.General.UppercaseLetters.Contains(char.ToUpper(subText.StrippedText[0]))) { if (subText.StrippedText.Length > 1 && !(subText.Pre.Contains('\'') && subText.StrippedText.StartsWith('s'))) { text = text.Substring(0, start + 2) + subText.Pre + char.ToUpper(subText.StrippedText[0]) + subText.StrippedText.Substring(1) + subText.Post; if (callbacks.AllowFix(p, fixAction)) { p.Text = st.Pre + text + st.Post; } } } } } start += 4; if (start < text.Length) start = text.IndexOfAny(ExpectedChars, start); } } if (oldText != p.Text) { noOfFixes++; callbacks.AddFixToListView(p, fixAction, oldText, p.Text); } } callbacks.UpdateFixStatus(noOfFixes, language.StartWithUppercaseLetterAfterPeriodInsideParagraph, noOfFixes.ToString(CultureInfo.InvariantCulture)); }
private void FixSpanishInvertedLetter(char mark, string inverseMark, Paragraph p, Paragraph last, ref bool wasLastLineClosed, string fixAction, ref int fixCount) { if (p.Text.Contains(mark)) { bool skip = false; if (last != null && p.Text.Contains(mark) && !p.Text.Contains(inverseMark) && last.Text.Contains(inverseMark) && !last.Text.Contains(mark)) skip = true; if (!skip && Utilities.CountTagInText(p.Text, mark) == Utilities.CountTagInText(p.Text, inverseMark) && HtmlUtil.RemoveHtmlTags(p.Text).TrimStart(inverseMark[0]).Contains(inverseMark) == false && HtmlUtil.RemoveHtmlTags(p.Text).TrimEnd(mark).Contains(mark) == false) { skip = true; } if (!skip) { int startIndex = 0; int markIndex = p.Text.IndexOf(mark); if (!wasLastLineClosed && ((p.Text.IndexOf('!') > 0 && p.Text.IndexOf('!') < markIndex) || (p.Text.IndexOf('?') > 0 && p.Text.IndexOf('?') < markIndex) || (p.Text.IndexOf('.') > 0 && p.Text.IndexOf('.') < markIndex))) wasLastLineClosed = true; while (markIndex > 0 && startIndex < p.Text.Length) { int inverseMarkIndex = p.Text.IndexOf(inverseMark, startIndex, StringComparison.Ordinal); if (wasLastLineClosed && (inverseMarkIndex < 0 || inverseMarkIndex > markIndex)) { if (AllowFix(p, fixAction)) { int j = markIndex - 1; while (j > startIndex && (p.Text[j] == '.' || p.Text[j] == '!' || p.Text[j] == '?')) j--; while (j > startIndex && (p.Text[j] != '.' || IsSpanishAbbreviation(p.Text, j)) && p.Text[j] != '!' && p.Text[j] != '?' && !(j > 3 && p.Text.Substring(j - 3, 3) == Environment.NewLine + "-") && !(j > 4 && p.Text.Substring(j - 4, 4) == Environment.NewLine + " -") && !(j > 6 && p.Text.Substring(j - 6, 6) == Environment.NewLine + "<i>-")) j--; if (@".!?".Contains(p.Text[j])) { j++; } if (j + 3 < p.Text.Length && p.Text.Substring(j + 1, 2) == Environment.NewLine) { j += 3; } else if (j + 2 < p.Text.Length && p.Text.Substring(j, 2) == Environment.NewLine) { j += 2; } if (j >= startIndex) { string part = p.Text.Substring(j, markIndex - j + 1); string speaker = string.Empty; int speakerEnd = part.IndexOf(')'); if (part.StartsWith('(') && speakerEnd > 0 && speakerEnd < part.IndexOf(mark)) { while (Environment.NewLine.Contains(part[speakerEnd + 1])) speakerEnd++; speaker = part.Substring(0, speakerEnd + 1); part = part.Substring(speakerEnd + 1); } speakerEnd = part.IndexOf(']'); if (part.StartsWith('[') && speakerEnd > 0 && speakerEnd < part.IndexOf(mark)) { while (Environment.NewLine.Contains(part[speakerEnd + 1])) speakerEnd++; speaker = part.Substring(0, speakerEnd + 1); part = part.Substring(speakerEnd + 1); } var st = new StripableText(part); if (j == 0 && mark == '!' && st.Pre == "¿" && Utilities.CountTagInText(p.Text, mark) == 1 && HtmlUtil.RemoveHtmlTags(p.Text).EndsWith(mark)) { p.Text = inverseMark + p.Text; } else if (j == 0 && mark == '?' && st.Pre == "¡" && Utilities.CountTagInText(p.Text, mark) == 1 && HtmlUtil.RemoveHtmlTags(p.Text).EndsWith(mark)) { p.Text = inverseMark + p.Text; } else { string temp = inverseMark; int addToIndex = 0; while (p.Text.Length > markIndex + 1 && p.Text[markIndex + 1] == mark && Utilities.CountTagInText(p.Text, mark) > Utilities.CountTagInText(p.Text + temp, inverseMark)) { temp += inverseMark; st.Post += mark; markIndex++; addToIndex++; } p.Text = p.Text.Remove(j, markIndex - j + 1).Insert(j, speaker + st.Pre + temp + st.StrippedText + st.Post); markIndex += addToIndex; } } } } else if (last != null && !wasLastLineClosed && inverseMarkIndex == p.Text.IndexOf(mark) && !last.Text.Contains(inverseMark)) { string lastOldtext = last.Text; int idx = last.Text.Length - 2; while (idx > 0 && (last.Text.Substring(idx, 2) != ". ") && (last.Text.Substring(idx, 2) != "! ") && (last.Text.Substring(idx, 2) != "? ")) idx--; last.Text = last.Text.Insert(idx, inverseMark); fixCount++; AddFixToListView(last, fixAction, lastOldtext, last.Text); } startIndex = markIndex + 2; if (startIndex < p.Text.Length) markIndex = p.Text.IndexOf(mark, startIndex); else markIndex = -1; wasLastLineClosed = true; } } if (p.Text.EndsWith(mark + "...", StringComparison.Ordinal) && p.Text.Length > 4) { p.Text = p.Text.Remove(p.Text.Length - 4, 4) + "..." + mark; } } else if (Utilities.CountTagInText(p.Text, inverseMark) == 1) { int idx = p.Text.IndexOf(inverseMark, StringComparison.Ordinal); while (idx < p.Text.Length && !@".!?".Contains(p.Text[idx])) { idx++; } if (idx < p.Text.Length) { p.Text = p.Text.Insert(idx, mark.ToString(CultureInfo.InvariantCulture)); if (p.Text.Contains("¡¿") && p.Text.Contains("!?")) p.Text = p.Text.Replace("!?", "?!"); if (p.Text.Contains("¿¡") && p.Text.Contains("?!")) p.Text = p.Text.Replace("?!", "!?"); } } }
public void Fix(Subtitle subtitle, IFixCallbacks callbacks) { var language = Configuration.Settings.Language.FixCommonErrors; string fixAction = language.StartWithUppercaseLetterAfterPeriodInsideParagraph; int noOfFixes = 0; for (int i = 0; i < subtitle.Paragraphs.Count; i++) { Paragraph p = subtitle.Paragraphs[i]; string oldText = p.Text; var st = new StripableText(p.Text); if (p.Text.Length > 3) { string text = st.StrippedText.Replace(" ", " "); int start = text.IndexOfAny(ExpectedChars); while (start >= 0 && start < text.Length) { if (start > 0 && char.IsDigit(text[start - 1])) { // ignore periods after a number } else if (start + 4 < text.Length && text[start + 1] == ' ') { if (!IsAbbreviation(text, start, callbacks)) { var subText = new StripableText(text.Substring(start + 2)); if (subText.StrippedText.Length > 0 && Helper.IsTurkishLittleI(subText.StrippedText[0], callbacks.Encoding, callbacks.Language)) { if (subText.StrippedText.Length > 1 && !(subText.Pre.Contains('\'') && subText.StrippedText.StartsWith('s'))) { text = text.Substring(0, start + 2) + subText.Pre + Helper.GetTurkishUppercaseLetter(subText.StrippedText[0], callbacks.Encoding) + subText.StrippedText.Substring(1) + subText.Post; if (callbacks.AllowFix(p, fixAction)) { p.Text = st.Pre + text + st.Post; } } } else if (subText.StrippedText.Length > 0 && Configuration.Settings.General.UppercaseLetters.Contains(char.ToUpper(subText.StrippedText[0]))) { if (subText.StrippedText.Length > 1 && !(subText.Pre.Contains('\'') && subText.StrippedText.StartsWith('s'))) { text = text.Substring(0, start + 2) + subText.Pre + char.ToUpper(subText.StrippedText[0]) + subText.StrippedText.Substring(1) + subText.Post; if (callbacks.AllowFix(p, fixAction)) { p.Text = st.Pre + text + st.Post; } } } } } start += 4; if (start < text.Length) { start = text.IndexOfAny(ExpectedChars, start); } } } if (oldText != p.Text) { noOfFixes++; callbacks.AddFixToListView(p, fixAction, oldText, p.Text); } } callbacks.UpdateFixStatus(noOfFixes, language.StartWithUppercaseLetterAfterPeriodInsideParagraph, noOfFixes.ToString(CultureInfo.InvariantCulture)); }
private void FixStartWithUppercaseLetterAfterPeriodInsideParagraph() { string fixAction = _language.StartWithUppercaseLetterAfterPeriodInsideParagraph; int noOfFixes = 0; for (int i = 0; i < Subtitle.Paragraphs.Count; i++) { Paragraph p = Subtitle.Paragraphs[i]; string oldText = p.Text; StripableText st = new StripableText(p.Text); if (p.Text.Length > 3) { string text = st.StrippedText.Replace(" ", " "); int start = text.IndexOfAny(new[] { '.', '!', '?' }); while (start != -1 && start < text.Length) { if (start > 0 && char.IsDigit(text[start - 1])) { // ignore periods after a number } else if (start + 4 < text.Length && text[start + 1] == ' ') { if (!IsAbbreviation(text, start)) { var subText = new StripableText(text.Substring(start + 2)); if (subText.StrippedText.Length > 0 && IsTurkishLittleI(subText.StrippedText[0], _encoding, Language)) { if (subText.StrippedText.Length > 1 && !(subText.Pre.Contains('\'') && subText.StrippedText.StartsWith('s'))) { text = text.Substring(0, start + 2) + subText.Pre + GetTurkishUppercaseLetter(subText.StrippedText[0], _encoding) + subText.StrippedText.Substring(1) + subText.Post; if (AllowFix(p, fixAction)) { p.Text = st.Pre + text + st.Post; } } } else if (subText.StrippedText.Length > 0 && Configuration.Settings.General.UppercaseLetters.Contains(char.ToUpper(subText.StrippedText[0]))) { if (subText.StrippedText.Length > 1 && !(subText.Pre.Contains('\'') && subText.StrippedText.StartsWith('s'))) { text = text.Substring(0, start + 2) + subText.Pre + char.ToUpper(subText.StrippedText[0]) + subText.StrippedText.Substring(1) + subText.Post; if (AllowFix(p, fixAction)) { p.Text = st.Pre + text + st.Post; } } } } } start += 4; if (start < text.Length) start = text.IndexOfAny(new[] { '.', '!', '?' }, start); } } if (oldText != p.Text) { noOfFixes++; AddFixToListView(p, fixAction, oldText, p.Text); } } if (noOfFixes > 0) { _totalFixes += noOfFixes; LogStatus(_language.StartWithUppercaseLetterAfterPeriodInsideParagraph, noOfFixes.ToString(CultureInfo.InvariantCulture)); } }
public string RemoveTextFromHearImpaired(string text) { if (Settings.RemoveWhereContains && Settings.RemoveIfTextContains.Length > 0 && text.Contains(Settings.RemoveIfTextContains)) { return string.Empty; } string oldText = text; text = RemoveColon(text); string pre = " >-\"'‘`´♪¿¡.…—"; string post = " -\"'`´♪.!?:…—"; if (Settings.RemoveTextBetweenCustomTags) { pre = pre.Replace(Settings.CustomStart, string.Empty); post = post.Replace(Settings.CustomEnd, string.Empty); } var st = new StripableText(text, pre, post); var sb = new StringBuilder(); string[] parts = st.StrippedText.Trim().Split(Environment.NewLine.ToCharArray(), StringSplitOptions.RemoveEmptyEntries); int lineNumber = 0; bool removedDialogInFirstLine = false; int noOfNamesRemoved = 0; int noOfNamesRemovedNotInLineOne = 0; foreach (string s in parts) { StripableText stSub = new StripableText(s, pre, post); if (!StartAndEndsWithHearImpariedTags(stSub.StrippedText)) { if (removedDialogInFirstLine && stSub.Pre.Contains("- ")) stSub.Pre = stSub.Pre.Replace("- ", string.Empty); string newText = stSub.StrippedText; newText = RemoveHearImpairedTags(newText); if (stSub.StrippedText.Length - newText.Length > 2) { string removedText = GetRemovedString(stSub.StrippedText, newText); if (!IsHIDescription(removedText)) { noOfNamesRemoved++; if (lineNumber > 0) noOfNamesRemovedNotInLineOne++; } } sb.AppendLine(stSub.Pre + newText + stSub.Post); } else { if (!IsHIDescription(stSub.StrippedText)) { noOfNamesRemoved++; if (lineNumber > 0) noOfNamesRemovedNotInLineOne++; } if (st.Pre.Contains("- ") && lineNumber == 0) { st.Pre = st.Pre.Replace("- ", string.Empty); removedDialogInFirstLine = true; } if (st.Pre.Contains("<i>") && stSub.Post.Contains("</i>")) st.Pre = st.Pre.Replace("<i>", string.Empty); if (s.Contains("<i>") && !s.Contains("</i>") && st.Post.Contains("</i>")) st.Post = st.Post.Replace("</i>", string.Empty); } lineNumber++; } text = st.Pre + sb.ToString().Trim() + st.Post; text = text.Replace("<i></i>", string.Empty).Trim(); text = RemoveColon(text); text = RemoveLineIfAllUppercase(text); text = RemoveHearImpairedtagsInsideLine(text); if (Settings.RemoveInterjections) text = RemoveInterjections(text); st = new StripableText(text, " >-\"'‘`´♪¿¡.…—", " -\"'`´♪.!?:…—"); text = st.StrippedText; if (StartAndEndsWithHearImpariedTags(text)) { text = RemoveStartEndTags(text); } text = RemoveHearImpairedTags(text); // fix 3 lines to two liners - if only two lines if (noOfNamesRemoved >= 1 && Utilities.CountTagInText(text, Environment.NewLine) == 2) { string[] a = Utilities.RemoveHtmlTags(text).Replace(" ", string.Empty).Split("!?.".ToCharArray(), StringSplitOptions.RemoveEmptyEntries); if (a.Length == 2) { StripableText temp = new StripableText(text); temp.StrippedText = temp.StrippedText.Replace(Environment.NewLine, " "); int splitIndex = temp.StrippedText.LastIndexOf('!'); if (splitIndex == -1) splitIndex = temp.StrippedText.LastIndexOf('?'); if (splitIndex == -1) splitIndex = temp.StrippedText.LastIndexOf('.'); if (splitIndex > 0) { text = temp.Pre + temp.StrippedText.Insert(splitIndex + 1, Environment.NewLine) + temp.Post; } } } if (!text.StartsWith('-') && noOfNamesRemoved >= 1 && Utilities.CountTagInText(text, Environment.NewLine) == 1) { string[] arr = text.Split(Environment.NewLine.ToCharArray()); string part0 = arr[0].Trim().Replace("</i>", string.Empty).Trim(); if (!part0.EndsWith(',') && (!part0.EndsWith('-') || noOfNamesRemovedNotInLineOne > 0)) { if (part0.Length > 0 && ".!?".Contains(part0.Substring(part0.Length - 1))) { if (noOfNamesRemovedNotInLineOne > 0) { if (!st.Pre.Contains('-')) text = "- " + text.Replace(Environment.NewLine, Environment.NewLine + "- "); if (!text.Contains(Environment.NewLine + "-") && !text.Contains(Environment.NewLine + "<i>-")) text = text.Replace(Environment.NewLine, Environment.NewLine + "- "); } } } } if (!string.IsNullOrEmpty(text)) text = st.Pre + text + st.Post; if (oldText.Trim().StartsWith("- ") && (oldText.Contains(Environment.NewLine + "- ") || oldText.Contains(Environment.NewLine + " - ")) && !text.Contains(Environment.NewLine)) { text = text.TrimStart().TrimStart('-').TrimStart(); } if (oldText != text) { // insert spaces before "-" text = text.Replace(Environment.NewLine + "- <i>", Environment.NewLine + "<i>- "); text = text.Replace(Environment.NewLine + "-<i>", Environment.NewLine + "<i>- "); if (text.StartsWith('-') && text.Length > 2 && text[1] != ' ' && text[1] != '-') text = text.Insert(1, " "); if (text.StartsWith("<i>-") && text.Length > 5 && text[4] != ' ' && text[4] != '-') text = text.Insert(4, " "); if (text.Contains(Environment.NewLine + "-")) { int index = text.IndexOf(Environment.NewLine + "-", StringComparison.Ordinal); if (index + 4 < text.Length && text[index + Environment.NewLine.Length + 1] != ' ' && text[index + Environment.NewLine.Length + 1] != '-') text = text.Insert(index + Environment.NewLine.Length + 1, " "); } if (text.Contains(Environment.NewLine + "<i>-")) { int index = text.IndexOf(Environment.NewLine + "<i>-", StringComparison.Ordinal); if (index + 5 < text.Length && text[index + Environment.NewLine.Length + 4] != ' ' && text[index + Environment.NewLine.Length + 4] != '-') text = text.Insert(index + Environment.NewLine.Length + 4, " "); } } return text.Trim(); }
private string FixLowercaseIToUppercaseI(string input, string lastLine) { var sb = new StringBuilder(); var lines = input.SplitToLines(); for (int i = 0; i < lines.Length; i++) { string l = lines[i]; if (i > 0) { lastLine = lines[i - 1]; } lastLine = HtmlUtil.RemoveHtmlTags(lastLine); if (string.IsNullOrEmpty(lastLine) || lastLine.EndsWith('.') || lastLine.EndsWith('!') || lastLine.EndsWith('?')) { var st = new StripableText(l); if (st.StrippedText.StartsWith('i') && !st.Pre.EndsWith('[') && !st.Pre.EndsWith('(') && !st.Pre.EndsWith("...", StringComparison.Ordinal)) { if (string.IsNullOrEmpty(lastLine) || (!lastLine.EndsWith("...", StringComparison.Ordinal) && !EndsWithAbbreviation(lastLine, abbreviationList))) { l = st.Pre + "I" + st.StrippedText.Remove(0, 1) + st.Post; } } } sb.AppendLine(l); } return sb.ToString().TrimEnd('\r', '\n'); }
public string RemoveInterjections(string text) { string oldText = text; string[] arr = Configuration.Settings.Tools.Interjections.Split(";".ToCharArray(), StringSplitOptions.RemoveEmptyEntries); if (_interjectionList == null) { _interjectionList = new List<string>(); foreach (string s in arr) { if (s.Length > 0) { if (!_interjectionList.Contains(s)) _interjectionList.Add(s); string lower = s.ToLower(); if (!_interjectionList.Contains(lower)) _interjectionList.Add(lower); string upper = s.ToUpper(); if (!_interjectionList.Contains(upper)) _interjectionList.Add(upper); string pascalCasing = s.Substring(0, 1).ToUpper() + s.Remove(0, 1); if (!_interjectionList.Contains(pascalCasing)) _interjectionList.Add(pascalCasing); } } _interjectionList.Sort(CompareLength); } bool doRepeat = true; while (doRepeat) { doRepeat = false; foreach (string s in _interjectionList) { if (text.Contains(s)) { var regex = new Regex("\\b" + s + "\\b"); var match = regex.Match(text); if (match.Success) { int index = match.Index; string temp = text.Remove(index, s.Length); string pre = string.Empty; if (index > 0) doRepeat = true; bool removeAfter = true; if (temp.Length > index - s.Length + 3 && index > s.Length) { if (temp.Substring(index - s.Length + 1, 3) == ", !") { temp = temp.Remove(index - s.Length + 1, 2); removeAfter = false; } else if (temp.Substring(index - s.Length + 1, 3) == ", ?") { temp = temp.Remove(index - s.Length + 1, 2); removeAfter = false; } else if (temp.Substring(index - s.Length + 1, 3) == ", .") { temp = temp.Remove(index - s.Length + 1, 2); removeAfter = false; } } if (removeAfter && temp.Length > index - s.Length + 2 && index > s.Length) { if (temp.Substring(index - s.Length, 3) == ", !") { temp = temp.Remove(index - s.Length, 2); removeAfter = false; } else if (temp.Substring(index - s.Length, 3) == ", ?") { temp = temp.Remove(index - s.Length, 2); removeAfter = false; } else if (temp.Substring(index - s.Length, 3) == ", .") { temp = temp.Remove(index - s.Length, 2); removeAfter = false; } } if (removeAfter && temp.Length > index - s.Length + 2 && index > s.Length) { if (temp.Substring(index - s.Length + 1, 2) == "-!") { temp = temp.Remove(index - s.Length + 1, 1); removeAfter = false; } else if (temp.Substring(index - s.Length + 1, 2) == "-?") { temp = temp.Remove(index - s.Length + 1, 1); removeAfter = false; } else if (temp.Substring(index - s.Length + 1, 2) == "-.") { temp = temp.Remove(index - s.Length + 1, 1); removeAfter = false; } } if (removeAfter) { if (index == 0) { if (!string.IsNullOrEmpty(temp) && temp.StartsWith('-')) temp = temp.Remove(0, 1).Trim(); } else if (index == 3 && !string.IsNullOrEmpty(temp) && temp.StartsWith("<i>-")) { temp = temp.Remove(3, 1); } else if (index > 0) { pre = text.Substring(0, index); temp = temp.Remove(0, index); if (pre.EndsWith('-') && temp.StartsWith('-')) temp = temp.Remove(0, 1); if (pre.EndsWith("- ") && temp.StartsWith('-')) temp = temp.Remove(0, 1); } while (temp.Length > 0 && (temp.StartsWith(' ') || temp.StartsWith(',') || temp.StartsWith('.') || temp.StartsWith('!') || temp.StartsWith('?'))) { temp = temp.Remove(0, 1); doRepeat = true; } if (temp.Length > 0 && s[0].ToString(CultureInfo.InvariantCulture) != s[0].ToString(CultureInfo.InvariantCulture).ToLower()) { temp = temp.Remove(0, 1).Insert(0, temp[0].ToString(CultureInfo.InvariantCulture).ToUpper()); doRepeat = true; } if (pre.EndsWith(' ') && temp.StartsWith('-')) temp = temp.Remove(0, 1); temp = pre + temp; } if (temp.EndsWith(Environment.NewLine + "- ")) temp = temp.Remove(temp.Length - 4, 4); var st = new StripableText(temp); if (st.StrippedText.Length == 0) return string.Empty; if (!temp.Contains(Environment.NewLine) && text.Contains(Environment.NewLine) && temp.StartsWith('-')) temp = temp.Remove(0, 1).Trim(); text = temp; } } } } string[] lines = text.Split(Environment.NewLine.ToCharArray(), StringSplitOptions.RemoveEmptyEntries); if (text != oldText && lines.Length == 2) { if (lines[0] == "-" && lines[1] == "-") return string.Empty; if (lines[0].StartsWith('-') && lines[0].Length > 1 && lines[1].Trim() == "-") return lines[0].Remove(0, 1).Trim(); if (lines[1].StartsWith('-') && lines[1].Length > 1 && lines[0].Trim() == "-") return lines[1].Remove(0, 1).Trim(); if (lines[0].Length > 1 && (lines[1] == "-") || lines[1] == "." || lines[1] == "!" || lines[1] == "?") { if (oldText.Contains(Environment.NewLine + "-") && lines[0].StartsWith('-')) lines[0] = lines[0].Remove(0, 1); return lines[0].Trim(); } } return text; }
public string RemoveColon(string text) { if (!(Settings.RemoveTextBeforeColon && text.Contains(':'))) return text; string preAssTag = string.Empty; if (text.StartsWith("{\\", StringComparison.Ordinal)) { int indexOfEndBracketSuccessor = text.IndexOf('}') + 1; if (indexOfEndBracketSuccessor > 0) { preAssTag = text.Substring(0, indexOfEndBracketSuccessor); text = text.Remove(0, indexOfEndBracketSuccessor).TrimStart(); } } // House 7x01 line 52: and she would like you to do three things: // Okay or remove??? string noTagText = HtmlUtil.RemoveHtmlTags(text); if (noTagText.Length > 10 && noTagText.IndexOf(':') == noTagText.Length - 1 && noTagText != noTagText.ToUpper()) return text; string newText = string.Empty; var lines = text.Trim().SplitToLines(); int noOfNames = 0; int count = 0; bool removedInFirstLine = false; bool removedInSecondLine = false; foreach (string line in lines) { int indexOfColon = line.IndexOf(':'); if (indexOfColon <= 0 || IsInsideBrackets(line, indexOfColon)) { newText = (newText + Environment.NewLine + line).Trim(); if (newText.EndsWith("</i>", StringComparison.Ordinal) && text.StartsWith("<i>", StringComparison.Ordinal) && !newText.StartsWith("<i>", StringComparison.Ordinal)) newText = "<i>" + newText; else if (newText.EndsWith("</b>", StringComparison.Ordinal) && text.StartsWith("<b>", StringComparison.Ordinal) && !newText.StartsWith("<b>", StringComparison.Ordinal)) newText = "<b>" + newText; } else { var pre = line.Substring(0, indexOfColon); var noTagPre = HtmlUtil.RemoveHtmlTags(pre, true); if (Settings.RemoveTextBeforeColonOnlyUppercase && noTagPre != noTagPre.ToUpper()) { string s = line; string l1Trim = HtmlUtil.RemoveHtmlTags(lines[0]).TrimEnd('"'); if (count == 1 && lines.Length == 2 && !l1Trim.EndsWith('.') && !l1Trim.EndsWith('!') && !l1Trim.EndsWith('?')) { var indexOf = line.IndexOfAny(ExpectedStrings, StringComparison.Ordinal); if (indexOf > 0 && indexOf < indexOfColon) { var toRemove = s.Substring(indexOf + 1, indexOfColon - indexOf).Trim(); if (toRemove.Length > 1 && toRemove == toRemove.ToUpper()) { s = s.Remove(indexOf + 1, indexOfColon - indexOf); s = s.Insert(indexOf + 1, " -"); if (newText.StartsWith("<i>") && !newText.StartsWith("<i>-")) newText = "<i>- " + newText.Remove(0, 3); else if (!newText.StartsWith("-")) newText = "- " + newText; } } } newText = (newText + Environment.NewLine + s).Trim(); } else { var newTextNoHtml = HtmlUtil.RemoveHtmlTags(newText); if (Utilities.CountTagInText(line, ':') == 1) { if (count == 1 && newText.Length > 1 && removedInFirstLine && !".?!".Contains(newTextNoHtml[newTextNoHtml.Length - 1]) && newText.LineEndsWithHtmlTag(true) && line != line.ToUpper()) { newText += Environment.NewLine; if (pre.Contains("<i>") && line.Contains("</i>") && !line.Contains("<i>")) newText += "<i>" + line; else if (pre.Contains("<b>") && line.Contains("</b>") && !line.Contains("<b>")) newText += "<b>" + line; else if (pre.Contains("<u>") && line.Contains("</u>") && !line.Contains("<u>")) newText += "<u>" + line; else if (pre.Contains('[') && line.Contains(']') && !line.Contains("[")) newText += "[" + line; else if (pre.Contains('(') && line.EndsWith(')') && !line.Contains("(")) newText += "(" + line; else newText += line; } else if (count == 1 && newText.Length > 1 && indexOfColon > 15 && line.Substring(0, indexOfColon).Contains(' ') && !".?!".Contains(newTextNoHtml[newTextNoHtml.Length - 1]) && newText.LineEndsWithHtmlTag(true) && line != line.ToUpper()) { newText += Environment.NewLine; if (pre.Contains("<i>") && line.Contains("</i>") && !line.Contains("<i>")) newText += "<i>" + line; else if (pre.Contains("<b>") && line.Contains("</b>") && !line.Contains("<b>")) newText += "<b>" + line; else if (pre.Contains("<u>") && line.Contains("</u>") && !line.Contains("<u>")) newText += "<u>" + line; else if (pre.Contains('[') && line.Contains(']') && !line.Contains("[")) newText += "[" + line; else if (pre.Contains('(') && line.EndsWith(')') && !line.Contains("(")) newText += "(" + line; else newText += line; } else { var preStripable = new StripableText(pre); var remove = true; if (indexOfColon < line.Length - 1) { if (Settings.ColonSeparateLine && !line.Substring(indexOfColon + 1).StartsWith(Environment.NewLine, StringComparison.Ordinal)) remove = false; else if (Utilities.IsBetweenNumbers(line, indexOfColon)) remove = false; } if (remove && !DoRemove(pre)) remove = false; string l1Trimmed = HtmlUtil.RemoveHtmlTags(lines[0]).TrimEnd('"'); if (count == 1 && lines.Length == 2 && !l1Trimmed.EndsWith('.') && !l1Trimmed.EndsWith('!') && !l1Trimmed.EndsWith('?')) { remove = false; } if (remove) { var content = line.Substring(indexOfColon + 1).Trim(); if (content.Length > 0) { newText += Environment.NewLine; if (pre.Contains("<i>") && content.Contains("</i>")) newText += "<i>" + content; else if (pre.Contains("<b>") && content.Contains("</b>")) newText += "<b>" + content; else if (pre.Contains('[') && content.Contains(']')) newText += "[" + content; else if (pre.Contains('(') && content.EndsWith(')')) newText += "(" + content; else newText += content; if (count == 0) removedInFirstLine = true; else if (count == 1) removedInSecondLine = true; } newText = newText.Trim(); if (text.StartsWith('(') && newText.EndsWith(')') && !newText.Contains('(')) newText = newText.TrimEnd(')'); else if (text.StartsWith('[') && newText.EndsWith(']') && !newText.Contains('[')) newText = newText.TrimEnd(']'); else if (newText.EndsWith("</i>", StringComparison.Ordinal) && text.StartsWith("<i>", StringComparison.Ordinal) && !newText.StartsWith("<i>", StringComparison.Ordinal)) newText = "<i>" + newText; else if (newText.EndsWith("</b>", StringComparison.Ordinal) && text.StartsWith("<b>", StringComparison.Ordinal) && !newText.StartsWith("<b>", StringComparison.Ordinal)) newText = "<b>" + newText; else if (newText.EndsWith("</u>", StringComparison.Ordinal) && text.StartsWith("<u>", StringComparison.Ordinal) && !newText.StartsWith("<u>", StringComparison.Ordinal)) newText = "<u>" + newText; if (!IsHIDescription(preStripable.StrippedText)) noOfNames++; } else { string s = line; string l1Trim = HtmlUtil.RemoveHtmlTags(lines[0]).TrimEnd('"'); if (count == 1 && lines.Length == 2 && !l1Trim.EndsWith('.') && !l1Trim.EndsWith('!') && !l1Trim.EndsWith('?')) { int indexOf = line.IndexOf(". ", StringComparison.Ordinal); if (indexOf == -1) indexOf = line.IndexOf("! ", StringComparison.Ordinal); if (indexOf == -1) indexOf = line.IndexOf("? ", StringComparison.Ordinal); if (indexOf > 0 && indexOf < indexOfColon) { s = s.Remove(indexOf + 1, indexOfColon - indexOf); s = s.Insert(indexOf + 1, " -"); if (newText.StartsWith("<i>") && !newText.StartsWith("<i>-")) newText = "<i>- " + newText.Remove(0, 3); else if (!newText.StartsWith("-")) newText = "- " + newText; } } newText = (newText + Environment.NewLine + s).Trim(); if (newText.EndsWith("</i>", StringComparison.Ordinal) && text.StartsWith("<i>", StringComparison.Ordinal) && !newText.StartsWith("<i>", StringComparison.Ordinal)) newText = "<i>" + newText; else if (newText.EndsWith("</b>", StringComparison.Ordinal) && text.StartsWith("<b>", StringComparison.Ordinal) && !newText.StartsWith("<b>", StringComparison.Ordinal)) newText = "<b>" + newText; else if ((newText.EndsWith("</u>", StringComparison.Ordinal) && text.StartsWith("<u>", StringComparison.Ordinal) && !newText.StartsWith("<u>", StringComparison.Ordinal))) newText = "<u>" + newText; } } } else { char[] endChars = { '.', '?', '!' }; string s2 = line; for (int k = 0; k < 2; k++) { if (s2.Contains(':')) { int colonIndex = s2.IndexOf(':'); string start = s2.Substring(0, colonIndex); if (!Settings.RemoveTextBeforeColonOnlyUppercase || start == start.ToUpper()) { int endIndex = start.LastIndexOfAny(endChars); if (colonIndex > 0 && colonIndex < s2.Length - 1) { if (char.IsDigit(s2[colonIndex - 1]) && char.IsDigit(s2[colonIndex + 1])) endIndex = 0; } if (endIndex < 0) s2 = s2.Remove(0, colonIndex - endIndex); else if (endIndex > 0) s2 = s2.Remove(endIndex + 1, colonIndex - endIndex); } if (count == 0) removedInFirstLine = true; else if (count == 1) removedInSecondLine = true; } } newText = (newText + Environment.NewLine + s2).Trim(); } } } count++; } newText = newText.Trim(); if (noOfNames > 0 && Utilities.GetNumberOfLines(newText) == 2) { int indexOfDialogChar = newText.IndexOf('-'); bool insertDash = true; var arr = newText.SplitToLines(); if (arr.Length == 2 && arr[0].Length > 1 && arr[1].Length > 1) { string arr0 = new StripableText(arr[0]).StrippedText; var arr1Stripable = new StripableText(arr[1]); string arr1 = arr1Stripable.StrippedText; if (arr0.Length > 0 && arr1.Length > 1) { // line continuation? if (Utilities.LowercaseLetters.Contains(arr1[0])) // second line starts with lower case letter { char c = arr0[arr0.Length - 1]; if (Utilities.LowercaseLetters.Contains(c) || c == ',') // first line ends with comma or lower case letter { if (!arr1Stripable.Pre.Contains("...")) { insertDash = false; } } } if (insertDash) { string arr0QuoteTrimmed = arr[0].TrimEnd('"'); if (arr0QuoteTrimmed.Length > 0 && !".?!".Contains(arr0QuoteTrimmed[arr0QuoteTrimmed.Length - 1]) && !arr0QuoteTrimmed.EndsWith("</i>", StringComparison.Ordinal)) { if (!arr1Stripable.Pre.Contains('-')) { insertDash = false; } } } } if (insertDash && removedInFirstLine && !removedInSecondLine && !text.StartsWith('-') && !text.StartsWith("<i>-", StringComparison.Ordinal)) { if (!arr[1].StartsWith('-') && !arr[1].StartsWith("<i>-", StringComparison.Ordinal)) insertDash = false; } } if (insertDash) { if (indexOfDialogChar < 0 || indexOfDialogChar > 4) { var st = new StripableText(newText, string.Empty, string.Empty); newText = st.Pre + "- " + st.StrippedText + st.Post; } int indexOfNewLine = newText.IndexOf(Environment.NewLine, StringComparison.Ordinal); string second = newText.Substring(indexOfNewLine).Trim(); indexOfDialogChar = second.IndexOf('-'); if (indexOfDialogChar < 0 || indexOfDialogChar > 6) { var st = new StripableText(second, String.Empty, String.Empty); second = st.Pre + "- " + st.StrippedText + st.Post; newText = newText.Remove(indexOfNewLine) + Environment.NewLine + second; } } } else if (newText.Contains('-') && !newText.Contains(Environment.NewLine)) { var st = new StripableText(newText); if (st.Pre.Contains('-')) newText = st.Pre.Replace("-", string.Empty) + st.StrippedText + st.Post; } else if (removedInSecondLine && !removedInFirstLine && Utilities.GetNumberOfLines(newText) == 2) { string noTags = HtmlUtil.RemoveHtmlTags(newText, true).Trim(); bool insertDash = noTags.StartsWith('-') && Utilities.CountTagInText(noTags, '-') == 1; if (insertDash) { if (newText.Contains(Environment.NewLine + "<i>")) newText = newText.Replace(Environment.NewLine + "<i>", Environment.NewLine + "<i>- "); else newText = newText.Replace(Environment.NewLine, Environment.NewLine + "- "); } } if (text.Contains("<i>") && !newText.Contains("<i>") && newText.EndsWith("</i>", StringComparison.Ordinal)) newText = "<i>" + newText; if (string.IsNullOrWhiteSpace(newText)) return string.Empty; return preAssTag + newText; }
public string RemoveColon(string text) { if (!Settings.RemoveTextBeforeColon) return text; if (text.IndexOf(":", StringComparison.Ordinal) < 0) return text; // House 7x01 line 52: and she would like you to do three things: // Okay or remove??? if (text.IndexOf(':') > 0 && text.IndexOf(':') == text.Length - 1 && text != text.ToUpper()) return text; string newText = string.Empty; string[] parts = text.Trim().Split(Environment.NewLine.ToCharArray(), StringSplitOptions.RemoveEmptyEntries); int noOfNames = 0; int count = 0; bool removedInFirstLine = false; bool removedInSecondLine = false; foreach (string s in parts) { int indexOfColon = s.IndexOf(":", StringComparison.Ordinal); if (indexOfColon > 0) { string pre = s.Substring(0, indexOfColon); if (Settings.RemoveTextBeforeColonOnlyUppercase && pre.Replace("<i>", string.Empty) != pre.Replace("<i>", string.Empty).ToUpper()) { newText = newText + Environment.NewLine + s; newText = newText.Trim(); } else { StripableText st = new StripableText(pre); if (count == 1 && Utilities.CountTagInText(text, Environment.NewLine) == 1 && removedInFirstLine && Utilities.CountTagInText(s, ":") == 1 && !newText.EndsWith('.') && !newText.EndsWith('!') && !newText.EndsWith('?') && !newText.EndsWith(".</i>") && !newText.EndsWith("!</i>") && !newText.EndsWith("?</i>") && s != s.ToUpper()) { if (pre.Contains("<i>") && s.Contains("</i>")) newText = newText + Environment.NewLine + "<i>" + s; else if (pre.Contains("<b>") && s.Contains("</b>")) newText = newText + Environment.NewLine + "<b>" + s; else if (pre.Contains('[') && s.Contains(']')) newText = newText + Environment.NewLine + "[" + s; else if (pre.Contains('(') && s.EndsWith(')')) newText = newText + Environment.NewLine + "(" + s; else newText = newText + Environment.NewLine + s; } else if (count == 1 && Utilities.CountTagInText(text, Environment.NewLine) == 1 && indexOfColon > 15 && s.Substring(0, indexOfColon).Contains(' ') && Utilities.CountTagInText(s, ":") == 1 && !newText.EndsWith('.') && !newText.EndsWith('!') && !newText.EndsWith('?') && !newText.EndsWith(".</i>") && !newText.EndsWith("!</i>") && !newText.EndsWith("?</i>") && s != s.ToUpper()) { if (pre.Contains("<i>") && s.Contains("</i>")) newText = newText + Environment.NewLine + "<i>" + s; else if (pre.Contains("<b>") && s.Contains("</b>")) newText = newText + Environment.NewLine + "<b>" + s; else if (pre.Contains('[') && s.Contains(']')) newText = newText + Environment.NewLine + "[" + s; else if (pre.Contains('(') && s.EndsWith(')')) newText = newText + Environment.NewLine + "(" + s; else newText = newText + Environment.NewLine + s; } else if (Utilities.CountTagInText(s, ":") == 1) { bool remove = true; if (indexOfColon > 0 && indexOfColon < s.Length - 1) { if ("1234567890".Contains(s.Substring(indexOfColon - 1, 1)) && "1234567890".Contains(s.Substring(indexOfColon + 1, 1))) remove = false; } if (s.StartsWith("Previously on") || s.StartsWith("<i>Previously on")) remove = false; if (remove && Settings.ColonSeparateLine) { if (indexOfColon == s.Length - 1 || s.Substring(indexOfColon + 1).StartsWith(Environment.NewLine)) remove = true; else remove = false; } if (remove) { string content = s.Substring(indexOfColon + 1).Trim(); if (content.Length > 0) { if (pre.Contains("<i>") && content.Contains("</i>")) newText = newText + Environment.NewLine + "<i>" + content; else if (pre.Contains("<b>") && content.Contains("</b>")) newText = newText + Environment.NewLine + "<b>" + content; else if (pre.Contains('[') && content.Contains(']')) newText = newText + Environment.NewLine + "[" + content; else if (pre.Contains('(') && content.EndsWith(')')) newText = newText + Environment.NewLine + "(" + content; else newText = newText + Environment.NewLine + content; if (count == 0) removedInFirstLine = true; else if (count == 1) removedInSecondLine = true; } newText = newText.Trim(); if (text.StartsWith('(') && newText.EndsWith(')') && !newText.Contains('(')) newText = newText.TrimEnd(')'); else if (newText.EndsWith("</i>") && text.StartsWith("<i>") && !newText.StartsWith("<i>")) newText = "<i>" + newText; else if (newText.EndsWith("</b>") && text.StartsWith("<b>") && !newText.StartsWith("<b>")) newText = "<b>" + newText; if (!IsHIDescription(st.StrippedText)) noOfNames++; } else { newText = newText + Environment.NewLine + s; newText = newText.Trim(); if (newText.EndsWith("</i>") && text.StartsWith("<i>") && !newText.StartsWith("<i>")) newText = "<i>" + newText; else if (newText.EndsWith("</b>") && text.StartsWith("<b>") && !newText.StartsWith("<b>")) newText = "<b>" + newText; } } else { string s2 = s; for (int k = 0; k < 2; k++) { if (s2.Contains(':')) { int colonIndex = s2.IndexOf(":", StringComparison.Ordinal); string start = s2.Substring(0, colonIndex); if (!Settings.RemoveTextBeforeColonOnlyUppercase || start == start.ToUpper()) { int periodIndex = start.LastIndexOf(". ", StringComparison.Ordinal); int questIndex = start.LastIndexOf("? ", StringComparison.Ordinal); int exclaIndex = start.LastIndexOf("! ", StringComparison.Ordinal); int endIndex = periodIndex; if (endIndex == -1 || questIndex > endIndex) endIndex = questIndex; if (endIndex == -1 || exclaIndex > endIndex) endIndex = exclaIndex; if (colonIndex > 0 && colonIndex < s2.Length - 1) { if ("1234567890".Contains(s2.Substring(colonIndex - 1, 1)) && "1234567890".Contains(s2.Substring(colonIndex + 1, 1))) endIndex = -10; } if (endIndex == -1) s2 = s2.Remove(0, colonIndex - endIndex); else if (endIndex > 0) s2 = s2.Remove(endIndex + 1, colonIndex - endIndex); } if (count == 0) removedInFirstLine = true; else if (count == 1) removedInSecondLine = true; } } newText = newText + Environment.NewLine + s2; newText = newText.Trim(); } } } else { newText = newText + Environment.NewLine + s; newText = newText.Trim(); if (newText.EndsWith("</i>") && text.StartsWith("<i>") && !newText.StartsWith("<i>")) newText = "<i>" + newText; else if (newText.EndsWith("</b>") && text.StartsWith("<b>") && !newText.StartsWith("<b>")) newText = "<b>" + newText; } count++; } newText = newText.Trim(); if (noOfNames > 0 && Utilities.CountTagInText(newText, Environment.NewLine) == 1) { int indexOfDialogChar = newText.IndexOf('-'); bool insertDash = true; string[] arr = newText.Split(Environment.NewLine.ToCharArray(), StringSplitOptions.RemoveEmptyEntries); if (arr.Length == 2 && arr[0].Length > 1 && arr[1].Length > 1) { string arr0 = new StripableText(arr[0]).StrippedText; string arr1 = new StripableText(arr[1]).StrippedText; //line continuation? if (arr0.Length > 0 && arr1.Length > 1 && (Utilities.LowercaseLetters + ",").Contains(arr0.Substring(arr0.Length - 1)) && Utilities.LowercaseLetters.Contains(arr1.Substring(0, 1))) { if (new StripableText(arr[1]).Pre.Contains("...") == false) insertDash = false; } if (arr0.Length > 0 && arr1.Length > 1 && !(arr[0].EndsWith('.') || arr[0].EndsWith('!') || arr[0].EndsWith('?') || arr[0].EndsWith("</i>")) && !(new StripableText(arr[1]).Pre.Contains('-'))) { insertDash = false; } if (removedInFirstLine && !removedInSecondLine && !text.StartsWith('-') && !text.StartsWith("<i>-")) { if (!insertDash || (!arr[1].StartsWith('-') && !arr[1].StartsWith("<i>-"))) insertDash = false; } } if (insertDash) { if (indexOfDialogChar < 0 || indexOfDialogChar > 4) { StripableText st = new StripableText(newText, "", ""); newText = st.Pre + "- " + st.StrippedText + st.Post; } int indexOfNewLine = newText.IndexOf(Environment.NewLine, StringComparison.Ordinal); string second = newText.Substring(indexOfNewLine).Trim(); indexOfDialogChar = second.IndexOf('-'); if (indexOfDialogChar < 0 || indexOfDialogChar > 6) { StripableText st = new StripableText(second, "", ""); second = st.Pre + "- " + st.StrippedText + st.Post; newText = newText.Remove(indexOfNewLine) + Environment.NewLine + second; } } } else if (!newText.Contains(Environment.NewLine) && newText.Contains('-')) { StripableText st = new StripableText(newText); if (st.Pre.Contains('-')) newText = st.Pre.Replace("-", string.Empty) + st.StrippedText + st.Post; } else if (Utilities.CountTagInText(newText, Environment.NewLine) == 1 && removedInFirstLine == false && removedInSecondLine) { string noTags = Utilities.RemoveHtmlTags(newText, true).Trim(); bool insertDash = noTags.StartsWith('-') && Utilities.CountTagInText(noTags, "-") == 1; if (insertDash) { if (newText.Contains(Environment.NewLine + "<i>")) newText = newText.Replace(Environment.NewLine + "<i>", Environment.NewLine + "<i>- "); else newText = newText.Replace(Environment.NewLine, Environment.NewLine + "- "); } } if (text.Contains("<i>") && !newText.Contains("<i>") && newText.EndsWith("</i>")) newText = "<i>" + newText; return newText; }
public string RemoveTextFromHearImpaired(string text) { if (Settings.RemoveWhereContains && Settings.RemoveIfTextContains.Length > 0 && text.Contains(Settings.RemoveIfTextContains)) { return(string.Empty); } string oldText = text; text = RemoveColon(text); string pre = " >-\"'‘`´♪¿¡.…—"; string post = " -\"'`´♪.!?:…—"; if (Settings.RemoveTextBetweenCustomTags) { pre = pre.Replace(Settings.CustomStart, string.Empty); post = post.Replace(Settings.CustomEnd, string.Empty); } var st = new StripableText(text, pre, post); var sb = new StringBuilder(); string[] parts = st.StrippedText.Trim().Split(Utilities.NewLineChars, StringSplitOptions.RemoveEmptyEntries); int lineNumber = 0; bool removedDialogInFirstLine = false; int noOfNamesRemoved = 0; int noOfNamesRemovedNotInLineOne = 0; foreach (string s in parts) { StripableText stSub = new StripableText(s, pre, post); if (!StartAndEndsWithHearImpariedTags(stSub.StrippedText)) { if (removedDialogInFirstLine && stSub.Pre.Contains("- ")) { stSub.Pre = stSub.Pre.Replace("- ", string.Empty); } string newText = stSub.StrippedText; newText = RemoveHearImpairedTags(newText); if (stSub.StrippedText.Length - newText.Length > 2) { string removedText = GetRemovedString(stSub.StrippedText, newText); if (!IsHIDescription(removedText)) { noOfNamesRemoved++; if (lineNumber > 0) { noOfNamesRemovedNotInLineOne++; } } } sb.AppendLine(stSub.Pre + newText + stSub.Post); } else { if (!IsHIDescription(stSub.StrippedText)) { noOfNamesRemoved++; if (lineNumber > 0) { noOfNamesRemovedNotInLineOne++; } } if (st.Pre.Contains("- ") && lineNumber == 0) { st.Pre = st.Pre.Replace("- ", string.Empty); removedDialogInFirstLine = true; } if (st.Pre.Contains("<i>") && stSub.Post.Contains("</i>")) { st.Pre = st.Pre.Replace("<i>", string.Empty); } if (s.Contains("<i>") && !s.Contains("</i>") && st.Post.Contains("</i>")) { st.Post = st.Post.Replace("</i>", string.Empty); } } lineNumber++; } text = st.Pre + sb.ToString().Trim() + st.Post; text = text.Replace("<i></i>", string.Empty).Trim(); text = RemoveColon(text); text = RemoveLineIfAllUppercase(text); text = RemoveHearImpairedtagsInsideLine(text); if (Settings.RemoveInterjections) { text = RemoveInterjections(text); } st = new StripableText(text, " >-\"'‘`´♪¿¡.…—", " -\"'`´♪.!?:…—"); text = st.StrippedText; if (StartAndEndsWithHearImpariedTags(text)) { text = RemoveStartEndTags(text); } text = RemoveHearImpairedTags(text); // fix 3 lines to two liners - if only two lines if (noOfNamesRemoved >= 1 && Utilities.CountTagInText(text, Environment.NewLine) == 2) { string[] a = Utilities.RemoveHtmlTags(text).Replace(" ", string.Empty).Split(new[] { '!', '?', '.' }, StringSplitOptions.RemoveEmptyEntries); if (a.Length == 2) { StripableText temp = new StripableText(text); temp.StrippedText = temp.StrippedText.Replace(Environment.NewLine, " "); int splitIndex = temp.StrippedText.LastIndexOf('!'); if (splitIndex == -1) { splitIndex = temp.StrippedText.LastIndexOf('?'); } if (splitIndex == -1) { splitIndex = temp.StrippedText.LastIndexOf('.'); } if (splitIndex > 0) { text = temp.Pre + temp.StrippedText.Insert(splitIndex + 1, Environment.NewLine) + temp.Post; } } } if (!text.StartsWith('-') && noOfNamesRemoved >= 1 && Utilities.CountTagInText(text, Environment.NewLine) == 1) { string[] arr = text.Split(Utilities.NewLineChars); string part0 = arr[0].Trim().Replace("</i>", string.Empty).Trim(); if (!part0.EndsWith(',') && (!part0.EndsWith('-') || noOfNamesRemovedNotInLineOne > 0)) { if (part0.Length > 0 && ".!?".Contains(part0.Substring(part0.Length - 1))) { if (noOfNamesRemovedNotInLineOne > 0) { if (!st.Pre.Contains('-')) { text = "- " + text.Replace(Environment.NewLine, Environment.NewLine + "- "); } if (!text.Contains(Environment.NewLine + "-") && !text.Contains(Environment.NewLine + "<i>-")) { text = text.Replace(Environment.NewLine, Environment.NewLine + "- "); } } } } } if (!string.IsNullOrEmpty(text)) { text = st.Pre + text + st.Post; } if (oldText.Trim().StartsWith("- ") && (oldText.Contains(Environment.NewLine + "- ") || oldText.Contains(Environment.NewLine + " - ")) && !text.Contains(Environment.NewLine)) { text = text.TrimStart().TrimStart('-').TrimStart(); } if (oldText != text) { // insert spaces before "-" text = text.Replace(Environment.NewLine + "- <i>", Environment.NewLine + "<i>- "); text = text.Replace(Environment.NewLine + "-<i>", Environment.NewLine + "<i>- "); if (text.StartsWith('-') && text.Length > 2 && text[1] != ' ' && text[1] != '-') { text = text.Insert(1, " "); } if (text.StartsWith("<i>-") && text.Length > 5 && text[4] != ' ' && text[4] != '-') { text = text.Insert(4, " "); } if (text.Contains(Environment.NewLine + "-")) { int index = text.IndexOf(Environment.NewLine + "-", StringComparison.Ordinal); if (index + 4 < text.Length && text[index + Environment.NewLine.Length + 1] != ' ' && text[index + Environment.NewLine.Length + 1] != '-') { text = text.Insert(index + Environment.NewLine.Length + 1, " "); } } if (text.Contains(Environment.NewLine + "<i>-")) { int index = text.IndexOf(Environment.NewLine + "<i>-", StringComparison.Ordinal); if (index + 5 < text.Length && text[index + Environment.NewLine.Length + 4] != ' ' && text[index + Environment.NewLine.Length + 4] != '-') { text = text.Insert(index + Environment.NewLine.Length + 4, " "); } } } return(text.Trim()); }
public string RemoveColon(string text) { if (!Settings.RemoveTextBeforeColon) { return(text); } if (text.IndexOf(":", StringComparison.Ordinal) < 0) { return(text); } // House 7x01 line 52: and she would like you to do three things: // Okay or remove??? if (text.IndexOf(':') > 0 && text.IndexOf(':') == text.Length - 1 && text != text.ToUpper()) { return(text); } string newText = string.Empty; string[] parts = text.Trim().Split(Utilities.NewLineChars, StringSplitOptions.RemoveEmptyEntries); int noOfNames = 0; int count = 0; bool removedInFirstLine = false; bool removedInSecondLine = false; foreach (string s in parts) { int indexOfColon = s.IndexOf(":", StringComparison.Ordinal); if (indexOfColon > 0) { string pre = s.Substring(0, indexOfColon); if (Settings.RemoveTextBeforeColonOnlyUppercase && pre.Replace("<i>", string.Empty) != pre.Replace("<i>", string.Empty).ToUpper()) { newText = newText + Environment.NewLine + s; newText = newText.Trim(); } else { StripableText st = new StripableText(pre); if (count == 1 && Utilities.CountTagInText(text, Environment.NewLine) == 1 && removedInFirstLine && Utilities.CountTagInText(s, ":") == 1 && !newText.EndsWith('.') && !newText.EndsWith('!') && !newText.EndsWith('?') && !newText.EndsWith(".</i>") && !newText.EndsWith("!</i>") && !newText.EndsWith("?</i>") && s != s.ToUpper()) { if (pre.Contains("<i>") && s.Contains("</i>")) { newText = newText + Environment.NewLine + "<i>" + s; } else if (pre.Contains("<b>") && s.Contains("</b>")) { newText = newText + Environment.NewLine + "<b>" + s; } else if (pre.Contains('[') && s.Contains(']')) { newText = newText + Environment.NewLine + "[" + s; } else if (pre.Contains('(') && s.EndsWith(')')) { newText = newText + Environment.NewLine + "(" + s; } else { newText = newText + Environment.NewLine + s; } } else if (count == 1 && Utilities.CountTagInText(text, Environment.NewLine) == 1 && indexOfColon > 15 && s.Substring(0, indexOfColon).Contains(' ') && Utilities.CountTagInText(s, ":") == 1 && !newText.EndsWith('.') && !newText.EndsWith('!') && !newText.EndsWith('?') && !newText.EndsWith(".</i>") && !newText.EndsWith("!</i>") && !newText.EndsWith("?</i>") && s != s.ToUpper()) { if (pre.Contains("<i>") && s.Contains("</i>")) { newText = newText + Environment.NewLine + "<i>" + s; } else if (pre.Contains("<b>") && s.Contains("</b>")) { newText = newText + Environment.NewLine + "<b>" + s; } else if (pre.Contains('[') && s.Contains(']')) { newText = newText + Environment.NewLine + "[" + s; } else if (pre.Contains('(') && s.EndsWith(')')) { newText = newText + Environment.NewLine + "(" + s; } else { newText = newText + Environment.NewLine + s; } } else if (Utilities.CountTagInText(s, ":") == 1) { bool remove = true; if (indexOfColon > 0 && indexOfColon < s.Length - 1) { if ("1234567890".Contains(s.Substring(indexOfColon - 1, 1)) && "1234567890".Contains(s.Substring(indexOfColon + 1, 1))) { remove = false; } } if (s.StartsWith("Previously on") || s.StartsWith("<i>Previously on")) { remove = false; } if (remove && Settings.ColonSeparateLine) { if (indexOfColon == s.Length - 1 || s.Substring(indexOfColon + 1).StartsWith(Environment.NewLine)) { remove = true; } else { remove = false; } } if (remove) { string content = s.Substring(indexOfColon + 1).Trim(); if (content.Length > 0) { if (pre.Contains("<i>") && content.Contains("</i>")) { newText = newText + Environment.NewLine + "<i>" + content; } else if (pre.Contains("<b>") && content.Contains("</b>")) { newText = newText + Environment.NewLine + "<b>" + content; } else if (pre.Contains('[') && content.Contains(']')) { newText = newText + Environment.NewLine + "[" + content; } else if (pre.Contains('(') && content.EndsWith(')')) { newText = newText + Environment.NewLine + "(" + content; } else { newText = newText + Environment.NewLine + content; } if (count == 0) { removedInFirstLine = true; } else if (count == 1) { removedInSecondLine = true; } } newText = newText.Trim(); if (text.StartsWith('(') && newText.EndsWith(')') && !newText.Contains('(')) { newText = newText.TrimEnd(')'); } else if (newText.EndsWith("</i>") && text.StartsWith("<i>") && !newText.StartsWith("<i>")) { newText = "<i>" + newText; } else if (newText.EndsWith("</b>") && text.StartsWith("<b>") && !newText.StartsWith("<b>")) { newText = "<b>" + newText; } if (!IsHIDescription(st.StrippedText)) { noOfNames++; } } else { newText = newText + Environment.NewLine + s; newText = newText.Trim(); if (newText.EndsWith("</i>") && text.StartsWith("<i>") && !newText.StartsWith("<i>")) { newText = "<i>" + newText; } else if (newText.EndsWith("</b>") && text.StartsWith("<b>") && !newText.StartsWith("<b>")) { newText = "<b>" + newText; } } } else { string s2 = s; for (int k = 0; k < 2; k++) { if (s2.Contains(':')) { int colonIndex = s2.IndexOf(":", StringComparison.Ordinal); string start = s2.Substring(0, colonIndex); if (!Settings.RemoveTextBeforeColonOnlyUppercase || start == start.ToUpper()) { int periodIndex = start.LastIndexOf(". ", StringComparison.Ordinal); int questIndex = start.LastIndexOf("? ", StringComparison.Ordinal); int exclaIndex = start.LastIndexOf("! ", StringComparison.Ordinal); int endIndex = periodIndex; if (endIndex == -1 || questIndex > endIndex) { endIndex = questIndex; } if (endIndex == -1 || exclaIndex > endIndex) { endIndex = exclaIndex; } if (colonIndex > 0 && colonIndex < s2.Length - 1) { if ("1234567890".Contains(s2.Substring(colonIndex - 1, 1)) && "1234567890".Contains(s2.Substring(colonIndex + 1, 1))) { endIndex = -10; } } if (endIndex == -1) { s2 = s2.Remove(0, colonIndex - endIndex); } else if (endIndex > 0) { s2 = s2.Remove(endIndex + 1, colonIndex - endIndex); } } if (count == 0) { removedInFirstLine = true; } else if (count == 1) { removedInSecondLine = true; } } } newText = newText + Environment.NewLine + s2; newText = newText.Trim(); } } } else { newText = newText + Environment.NewLine + s; newText = newText.Trim(); if (newText.EndsWith("</i>") && text.StartsWith("<i>") && !newText.StartsWith("<i>")) { newText = "<i>" + newText; } else if (newText.EndsWith("</b>") && text.StartsWith("<b>") && !newText.StartsWith("<b>")) { newText = "<b>" + newText; } } count++; } newText = newText.Trim(); if (noOfNames > 0 && Utilities.CountTagInText(newText, Environment.NewLine) == 1) { int indexOfDialogChar = newText.IndexOf('-'); bool insertDash = true; string[] arr = newText.Split(Utilities.NewLineChars, StringSplitOptions.RemoveEmptyEntries); if (arr.Length == 2 && arr[0].Length > 1 && arr[1].Length > 1) { string arr0 = new StripableText(arr[0]).StrippedText; string arr1 = new StripableText(arr[1]).StrippedText; //line continuation? if (arr0.Length > 0 && arr1.Length > 1 && (Utilities.LowercaseLetters + ",").Contains(arr0.Substring(arr0.Length - 1)) && Utilities.LowercaseLetters.Contains(arr1.Substring(0, 1))) { if (new StripableText(arr[1]).Pre.Contains("...") == false) { insertDash = false; } } if (arr0.Length > 0 && arr1.Length > 1 && !(arr[0].EndsWith('.') || arr[0].EndsWith('!') || arr[0].EndsWith('?') || arr[0].EndsWith("</i>")) && !(new StripableText(arr[1]).Pre.Contains('-'))) { insertDash = false; } if (removedInFirstLine && !removedInSecondLine && !text.StartsWith('-') && !text.StartsWith("<i>-")) { if (!insertDash || (!arr[1].StartsWith('-') && !arr[1].StartsWith("<i>-"))) { insertDash = false; } } } if (insertDash) { if (indexOfDialogChar < 0 || indexOfDialogChar > 4) { StripableText st = new StripableText(newText, "", ""); newText = st.Pre + "- " + st.StrippedText + st.Post; } int indexOfNewLine = newText.IndexOf(Environment.NewLine, StringComparison.Ordinal); string second = newText.Substring(indexOfNewLine).Trim(); indexOfDialogChar = second.IndexOf('-'); if (indexOfDialogChar < 0 || indexOfDialogChar > 6) { StripableText st = new StripableText(second, "", ""); second = st.Pre + "- " + st.StrippedText + st.Post; newText = newText.Remove(indexOfNewLine) + Environment.NewLine + second; } } } else if (!newText.Contains(Environment.NewLine) && newText.Contains('-')) { StripableText st = new StripableText(newText); if (st.Pre.Contains('-')) { newText = st.Pre.Replace("-", string.Empty) + st.StrippedText + st.Post; } } else if (Utilities.CountTagInText(newText, Environment.NewLine) == 1 && removedInFirstLine == false && removedInSecondLine) { string noTags = Utilities.RemoveHtmlTags(newText, true).Trim(); bool insertDash = noTags.StartsWith('-') && Utilities.CountTagInText(noTags, "-") == 1; if (insertDash) { if (newText.Contains(Environment.NewLine + "<i>")) { newText = newText.Replace(Environment.NewLine + "<i>", Environment.NewLine + "<i>- "); } else { newText = newText.Replace(Environment.NewLine, Environment.NewLine + "- "); } } } if (text.Contains("<i>") && !newText.Contains("<i>") && newText.EndsWith("</i>")) { newText = "<i>" + newText; } return(newText); }
private void MergeLinesWithContinuation() { var temp = new Subtitle(); bool skipNext = false; for (int i = 0; i < _subtitle.Paragraphs.Count; i++) { Paragraph p = _subtitle.Paragraphs[i]; if (!skipNext) { Paragraph next = _subtitle.GetParagraphOrDefault(i + 1); bool merge = !(p.Text.Contains(Environment.NewLine) || next == null); if (merge && (p.Text.TrimEnd().EndsWith('!') || p.Text.TrimEnd().EndsWith('.'))) { var st = new StripableText(p.Text); if (st.StrippedText.Length > 0 && Utilities.UppercaseLetters.Contains(st.StrippedText[0].ToString(CultureInfo.InvariantCulture))) merge = false; } if (merge && (p.Text.Length >= Configuration.Settings.General.SubtitleLineMaximumLength - 5 || next.Text.Length >= Configuration.Settings.General.SubtitleLineMaximumLength - 5)) merge = false; if (merge) { temp.Paragraphs.Add(new Paragraph { Text = p.Text + Environment.NewLine + next.Text }); skipNext = true; } else { temp.Paragraphs.Add(new Paragraph(p)); } } else { skipNext = false; } } _subtitle = temp; }
public void Fix(Subtitle subtitle, IFixCallbacks callbacks) { var language = Configuration.Settings.Language.FixCommonErrors; string fixAction = language.FixUppercaseIInsideLowercaseWord; int uppercaseIsInsideLowercaseWords = 0; for (int i = 0; i < subtitle.Paragraphs.Count; i++) { Paragraph p = subtitle.Paragraphs[i]; string oldText = p.Text; Match match = ReAfterLowercaseLetter.Match(p.Text); while (match.Success) { if (!(match.Index > 1 && p.Text.Substring(match.Index - 1, 2) == "Mc") && // irish names, McDonalds etc. p.Text[match.Index + 1] == 'I' && callbacks.AllowFix(p, fixAction)) { p.Text = p.Text.Substring(0, match.Index + 1) + "l"; if (match.Index + 2 < oldText.Length) { p.Text += oldText.Substring(match.Index + 2); } uppercaseIsInsideLowercaseWords++; callbacks.AddFixToListView(p, fixAction, oldText, p.Text); } match = match.NextMatch(); } var st = new StripableText(p.Text); match = ReBeforeLowercaseLetter.Match(st.StrippedText); while (match.Success) { string word = GetWholeWord(st.StrippedText, match.Index); if (!callbacks.IsName(word)) { if (callbacks.AllowFix(p, fixAction)) { if (word.Equals("internal", StringComparison.OrdinalIgnoreCase) || word.Equals("island", StringComparison.OrdinalIgnoreCase) || word.Equals("islands", StringComparison.OrdinalIgnoreCase)) { } else if (match.Index == 0) { // first letter in paragraph //too risky! - perhaps if periods is fixed at the same time... or too complicated!? //if (isLineContinuation) //{ // st.StrippedText = st.StrippedText.Remove(match.Index, 1).Insert(match.Index, "l"); // p.Text = st.MergedString; // uppercaseIsInsideLowercaseWords++; // AddFixToListView(p, fixAction, oldText, p.Text); //} } else { if (match.Index > 2 && st.StrippedText[match.Index - 1] == ' ') { if ((Utilities.AllLettersAndNumbers + @",").Contains(st.StrippedText[match.Index - 2]) && match.Length >= 2 && Utilities.LowercaseVowels.Contains(char.ToLower(match.Value[1]))) { st.StrippedText = st.StrippedText.Remove(match.Index, 1).Insert(match.Index, "l"); p.Text = st.MergedString; uppercaseIsInsideLowercaseWords++; callbacks.AddFixToListView(p, fixAction, oldText, p.Text); } } else if (match.Index > Environment.NewLine.Length + 1 && Environment.NewLine.Contains(st.StrippedText[match.Index - 1])) { if ((Utilities.AllLettersAndNumbers + @",").Contains(st.StrippedText[match.Index - Environment.NewLine.Length + 1]) && match.Length >= 2 && Utilities.LowercaseVowels.Contains(match.Value[1])) { st.StrippedText = st.StrippedText.Remove(match.Index, 1).Insert(match.Index, "l"); p.Text = st.MergedString; uppercaseIsInsideLowercaseWords++; callbacks.AddFixToListView(p, fixAction, oldText, p.Text); } } else if (match.Index > 1 && ((st.StrippedText[match.Index - 1] == '\"') || (st.StrippedText[match.Index - 1] == '\'') || (st.StrippedText[match.Index - 1] == '>') || (st.StrippedText[match.Index - 1] == '-'))) { } else { var before = '\0'; var after = '\0'; if (match.Index > 0) { before = st.StrippedText[match.Index - 1]; } if (match.Index < st.StrippedText.Length - 2) { after = st.StrippedText[match.Index + 1]; } if (before != '\0' && char.IsUpper(before) && after != '\0' && char.IsLower(after) && !Utilities.LowercaseVowels.Contains(char.ToLower(before)) && !Utilities.LowercaseVowels.Contains(after)) { st.StrippedText = st.StrippedText.Remove(match.Index, 1).Insert(match.Index, "i"); p.Text = st.MergedString; uppercaseIsInsideLowercaseWords++; callbacks.AddFixToListView(p, fixAction, oldText, p.Text); } else if (@"‘’¡¿„“()[]♪'. @".Contains(before) && !Utilities.LowercaseVowels.Contains(char.ToLower(after))) { } else { st.StrippedText = st.StrippedText.Remove(match.Index, 1).Insert(match.Index, "l"); p.Text = st.MergedString; uppercaseIsInsideLowercaseWords++; callbacks.AddFixToListView(p, fixAction, oldText, p.Text); } } } } } match = match.NextMatch(); } } callbacks.UpdateFixStatus(uppercaseIsInsideLowercaseWords, language.FixUppercaseIInsindeLowercaseWords, language.XUppercaseIsFoundInsideLowercaseWords); }
public string RemoveInterjections(string text) { string oldText = text; string[] arr = Configuration.Settings.Tools.Interjections.Split(new[] { ';' }, StringSplitOptions.RemoveEmptyEntries); if (_interjectionList == null) { _interjectionList = new List <string>(); foreach (string s in arr) { if (s.Length > 0) { if (!_interjectionList.Contains(s)) { _interjectionList.Add(s); } string lower = s.ToLower(); if (!_interjectionList.Contains(lower)) { _interjectionList.Add(lower); } string upper = s.ToUpper(); if (!_interjectionList.Contains(upper)) { _interjectionList.Add(upper); } string pascalCasing = s.Substring(0, 1).ToUpper() + s.Remove(0, 1); if (!_interjectionList.Contains(pascalCasing)) { _interjectionList.Add(pascalCasing); } } } _interjectionList.Sort(CompareLength); } bool doRepeat = true; while (doRepeat) { doRepeat = false; foreach (string s in _interjectionList) { if (text.Contains(s)) { var regex = new Regex("\\b" + s + "\\b"); var match = regex.Match(text); if (match.Success) { int index = match.Index; string temp = text.Remove(index, s.Length); string pre = string.Empty; if (index > 0) { doRepeat = true; } bool removeAfter = true; if (temp.Length > index - s.Length + 3 && index > s.Length) { if (temp.Substring(index - s.Length + 1, 3) == ", !") { temp = temp.Remove(index - s.Length + 1, 2); removeAfter = false; } else if (temp.Substring(index - s.Length + 1, 3) == ", ?") { temp = temp.Remove(index - s.Length + 1, 2); removeAfter = false; } else if (temp.Substring(index - s.Length + 1, 3) == ", .") { temp = temp.Remove(index - s.Length + 1, 2); removeAfter = false; } } if (removeAfter && temp.Length > index - s.Length + 2 && index > s.Length) { if (temp.Substring(index - s.Length, 3) == ", !") { temp = temp.Remove(index - s.Length, 2); removeAfter = false; } else if (temp.Substring(index - s.Length, 3) == ", ?") { temp = temp.Remove(index - s.Length, 2); removeAfter = false; } else if (temp.Substring(index - s.Length, 3) == ", .") { temp = temp.Remove(index - s.Length, 2); removeAfter = false; } } if (removeAfter && temp.Length > index - s.Length + 2 && index > s.Length) { if (temp.Substring(index - s.Length + 1, 2) == "-!") { temp = temp.Remove(index - s.Length + 1, 1); removeAfter = false; } else if (temp.Substring(index - s.Length + 1, 2) == "-?") { temp = temp.Remove(index - s.Length + 1, 1); removeAfter = false; } else if (temp.Substring(index - s.Length + 1, 2) == "-.") { temp = temp.Remove(index - s.Length + 1, 1); removeAfter = false; } } if (removeAfter) { if (index == 0) { if (!string.IsNullOrEmpty(temp) && temp.StartsWith('-')) { temp = temp.Remove(0, 1).Trim(); } } else if (index == 3 && !string.IsNullOrEmpty(temp) && temp.StartsWith("<i>-")) { temp = temp.Remove(3, 1); } else if (index > 0) { pre = text.Substring(0, index); temp = temp.Remove(0, index); if (pre.EndsWith('-') && temp.StartsWith('-')) { temp = temp.Remove(0, 1); } if (pre.EndsWith("- ") && temp.StartsWith('-')) { temp = temp.Remove(0, 1); } } while (temp.Length > 0 && (temp.StartsWith(' ') || temp.StartsWith(',') || temp.StartsWith('.') || temp.StartsWith('!') || temp.StartsWith('?'))) { temp = temp.Remove(0, 1); doRepeat = true; } if (temp.Length > 0 && s[0].ToString(CultureInfo.InvariantCulture) != s[0].ToString(CultureInfo.InvariantCulture).ToLower()) { temp = temp.Remove(0, 1).Insert(0, temp[0].ToString(CultureInfo.InvariantCulture).ToUpper()); doRepeat = true; } if (pre.EndsWith(' ') && temp.StartsWith('-')) { temp = temp.Remove(0, 1); } temp = pre + temp; } if (temp.EndsWith(Environment.NewLine + "- ")) { temp = temp.Remove(temp.Length - 4, 4); } var st = new StripableText(temp); if (st.StrippedText.Length == 0) { return(string.Empty); } if (!temp.Contains(Environment.NewLine) && text.Contains(Environment.NewLine) && temp.StartsWith('-')) { temp = temp.Remove(0, 1).Trim(); } text = temp; } } } } string[] lines = text.Split(Utilities.NewLineChars, StringSplitOptions.RemoveEmptyEntries); if (text != oldText && lines.Length == 2) { if (lines[0] == "-" && lines[1] == "-") { return(string.Empty); } if (lines[0].StartsWith('-') && lines[0].Length > 1 && lines[1].Trim() == "-") { return(lines[0].Remove(0, 1).Trim()); } if (lines[1].StartsWith('-') && lines[1].Length > 1 && lines[0].Trim() == "-") { return(lines[1].Remove(0, 1).Trim()); } if (lines[0].Length > 1 && (lines[1] == "-") || lines[1] == "." || lines[1] == "!" || lines[1] == "?") { if (oldText.Contains(Environment.NewLine + "-") && lines[0].StartsWith('-')) { lines[0] = lines[0].Remove(0, 1); } return(lines[0].Trim()); } } return(text); }
public void Fix(Subtitle subtitle, IFixCallbacks callbacks) { var language = Configuration.Settings.Language.FixCommonErrors; string fixAction = language.FixMissingPeriodAtEndOfLine; int missigPeriodsAtEndOfLine = 0; for (int i = 0; i < subtitle.Paragraphs.Count; i++) { Paragraph p = subtitle.Paragraphs[i]; Paragraph next = subtitle.GetParagraphOrDefault(i + 1); string nextText = string.Empty; if (next != null) { nextText = HtmlUtil.RemoveHtmlTags(next.Text, true).TrimStart('-', '"', '„').TrimStart(); } bool isNextClose = next != null && next.StartTime.TotalMilliseconds - p.EndTime.TotalMilliseconds < 400; string tempNoHtml = HtmlUtil.RemoveHtmlTags(p.Text).TrimEnd(); if (IsOneLineUrl(p.Text) || p.Text.Contains(ExpectedChars) || p.Text.EndsWith('\'')) { // ignore urls } else if (!string.IsNullOrEmpty(nextText) && next != null && next.Text.Length > 0 && Utilities.UppercaseLetters.Contains(nextText[0]) && tempNoHtml.Length > 0 && !ExpectedString1.Contains(tempNoHtml[tempNoHtml.Length - 1])) { string tempTrimmed = tempNoHtml.TrimEnd().TrimEnd('\'', '"', '“', '”').TrimEnd(); if (tempTrimmed.Length > 0 && !ExpectedString2.Contains(tempTrimmed[tempTrimmed.Length - 1]) && p.Text != p.Text.ToUpper()) { //don't end the sentence if the next word is an I word as they're always capped. bool isNextCloseAndStartsWithI = isNextClose && (nextText.StartsWith("I ", StringComparison.Ordinal) || nextText.StartsWith("I'", StringComparison.Ordinal)); if (!isNextCloseAndStartsWithI) { //test to see if the first word of the next line is a name if (!callbacks.IsName(next.Text.Split(WordSplitChars)[0]) && callbacks.AllowFix(p, fixAction)) { string oldText = p.Text; if (p.Text.EndsWith('>')) { int lastLessThan = p.Text.LastIndexOf('<'); if (lastLessThan > 0) p.Text = p.Text.Insert(lastLessThan, "."); } else { if (p.Text.EndsWith('“') && tempNoHtml.StartsWith('„')) p.Text = p.Text.TrimEnd('“') + ".“"; else if (p.Text.EndsWith('"') && tempNoHtml.StartsWith('"')) p.Text = p.Text.TrimEnd('"') + ".\""; else p.Text += "."; } if (p.Text != oldText) { missigPeriodsAtEndOfLine++; callbacks.AddFixToListView(p, fixAction, oldText, p.Text); } } } } } else if (next != null && !string.IsNullOrEmpty(p.Text) && Utilities.AllLettersAndNumbers.Contains(p.Text[p.Text.Length - 1])) { if (p.Text != p.Text.ToUpper()) { var st = new StripableText(next.Text); if (st.StrippedText.Length > 0 && st.StrippedText != st.StrippedText.ToUpper() && Utilities.UppercaseLetters.Contains(st.StrippedText[0])) { if (callbacks.AllowFix(p, fixAction)) { int j = p.Text.Length - 1; while (j >= 0 && !@".!?¿¡".Contains(p.Text[j])) j--; string endSign = "."; if (j >= 0 && p.Text[j] == '¿') endSign = "?"; if (j >= 0 && p.Text[j] == '¡') endSign = "!"; string oldText = p.Text; missigPeriodsAtEndOfLine++; p.Text += endSign; callbacks.AddFixToListView(p, fixAction, oldText, p.Text); } } } } if (p.Text.Length > 4) { int indexOfNewLine = p.Text.IndexOf(Environment.NewLine + " -", 3, StringComparison.Ordinal); if (indexOfNewLine < 0) indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "-", 3, StringComparison.Ordinal); if (indexOfNewLine < 0) indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "<i>-", 3, StringComparison.Ordinal); if (indexOfNewLine < 0) indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "<i> -", 3, StringComparison.Ordinal); if (indexOfNewLine > 0 && Configuration.Settings.General.UppercaseLetters.Contains(char.ToUpper(p.Text[indexOfNewLine - 1])) && callbacks.AllowFix(p, fixAction)) { string oldText = p.Text; string text = p.Text.Substring(0, indexOfNewLine); var st = new StripableText(text); if (st.Pre.TrimEnd().EndsWith('¿')) // Spanish ¿ p.Text = p.Text.Insert(indexOfNewLine, "?"); else if (st.Pre.TrimEnd().EndsWith('¡')) // Spanish ¡ p.Text = p.Text.Insert(indexOfNewLine, "!"); else p.Text = p.Text.Insert(indexOfNewLine, "."); missigPeriodsAtEndOfLine++; callbacks.AddFixToListView(p, fixAction, oldText, p.Text); } } } callbacks.UpdateFixStatus(missigPeriodsAtEndOfLine, language.AddPeriods, language.XPeriodsAdded); }
private void FixSpanishInvertedLetter(char mark, string inverseMark, Paragraph p, Paragraph last, ref bool wasLastLineClosed, string fixAction, ref int fixCount, IFixCallbacks callbacks) { if (p.Text.Contains(mark)) { bool skip = false; if (last != null && p.Text.Contains(mark) && !p.Text.Contains(inverseMark) && last.Text.Contains(inverseMark) && !last.Text.Contains(mark)) { skip = true; } if (!skip && Utilities.CountTagInText(p.Text, mark) == Utilities.CountTagInText(p.Text, inverseMark) && HtmlUtil.RemoveHtmlTags(p.Text).TrimStart(inverseMark[0]).Contains(inverseMark) == false && HtmlUtil.RemoveHtmlTags(p.Text).TrimEnd(mark).Contains(mark) == false) { skip = true; } if (!skip) { int startIndex = 0; int markIndex = p.Text.IndexOf(mark); if (!wasLastLineClosed && ((p.Text.IndexOf('!') > 0 && p.Text.IndexOf('!') < markIndex) || (p.Text.IndexOf('?') > 0 && p.Text.IndexOf('?') < markIndex) || (p.Text.IndexOf('.') > 0 && p.Text.IndexOf('.') < markIndex))) { wasLastLineClosed = true; } while (markIndex > 0 && startIndex < p.Text.Length) { int inverseMarkIndex = p.Text.IndexOf(inverseMark, startIndex, StringComparison.Ordinal); if (wasLastLineClosed && (inverseMarkIndex < 0 || inverseMarkIndex > markIndex)) { if (callbacks.AllowFix(p, fixAction)) { int j = markIndex - 1; while (j > startIndex && (p.Text[j] == '.' || p.Text[j] == '!' || p.Text[j] == '?')) { j--; } while (j > startIndex && (p.Text[j] != '.' || IsSpanishAbbreviation(p.Text, j, callbacks)) && p.Text[j] != '!' && p.Text[j] != '?' && !(j > 3 && p.Text.Substring(j - 3, 3) == Environment.NewLine + "-") && !(j > 4 && p.Text.Substring(j - 4, 4) == Environment.NewLine + " -") && !(j > 6 && p.Text.Substring(j - 6, 6) == Environment.NewLine + "<i>-")) { j--; } if (@".!?".Contains(p.Text[j])) { j++; } if (j + 3 < p.Text.Length && p.Text.Substring(j + 1, 2) == Environment.NewLine) { j += 3; } else if (j + 2 < p.Text.Length && p.Text.Substring(j, 2) == Environment.NewLine) { j += 2; } if (j >= startIndex) { string part = p.Text.Substring(j, markIndex - j + 1); string speaker = string.Empty; int speakerEnd = part.IndexOf(')'); if (part.StartsWith('(') && speakerEnd > 0 && speakerEnd < part.IndexOf(mark)) { while (Environment.NewLine.Contains(part[speakerEnd + 1])) { speakerEnd++; } speaker = part.Substring(0, speakerEnd + 1); part = part.Substring(speakerEnd + 1); } speakerEnd = part.IndexOf(']'); if (part.StartsWith('[') && speakerEnd > 0 && speakerEnd < part.IndexOf(mark)) { while (Environment.NewLine.Contains(part[speakerEnd + 1])) { speakerEnd++; } speaker = part.Substring(0, speakerEnd + 1); part = part.Substring(speakerEnd + 1); } var st = new StripableText(part); if (j == 0 && mark == '!' && st.Pre == "¿" && Utilities.CountTagInText(p.Text, mark) == 1 && HtmlUtil.RemoveHtmlTags(p.Text).EndsWith(mark)) { p.Text = inverseMark + p.Text; } else if (j == 0 && mark == '?' && st.Pre == "¡" && Utilities.CountTagInText(p.Text, mark) == 1 && HtmlUtil.RemoveHtmlTags(p.Text).EndsWith(mark)) { p.Text = inverseMark + p.Text; } else { string temp = inverseMark; int addToIndex = 0; while (p.Text.Length > markIndex + 1 && p.Text[markIndex + 1] == mark && Utilities.CountTagInText(p.Text, mark) > Utilities.CountTagInText(p.Text + temp, inverseMark)) { temp += inverseMark; st.Post += mark; markIndex++; addToIndex++; } p.Text = p.Text.Remove(j, markIndex - j + 1).Insert(j, speaker + st.Pre + temp + st.StrippedText + st.Post); markIndex += addToIndex; } } } } else if (last != null && !wasLastLineClosed && inverseMarkIndex == p.Text.IndexOf(mark) && !last.Text.Contains(inverseMark)) { string lastOldtext = last.Text; int idx = last.Text.Length - 2; while (idx > 0 && (last.Text.Substring(idx, 2) != ". ") && (last.Text.Substring(idx, 2) != "! ") && (last.Text.Substring(idx, 2) != "? ")) { idx--; } last.Text = last.Text.Insert(idx, inverseMark); fixCount++; callbacks.AddFixToListView(last, fixAction, lastOldtext, last.Text); } startIndex = markIndex + 2; if (startIndex < p.Text.Length) { markIndex = p.Text.IndexOf(mark, startIndex); } else { markIndex = -1; } wasLastLineClosed = true; } } if (p.Text.EndsWith(mark + "...", StringComparison.Ordinal) && p.Text.Length > 4) { p.Text = p.Text.Remove(p.Text.Length - 4, 4) + "..." + mark; } } else if (Utilities.CountTagInText(p.Text, inverseMark) == 1) { int idx = p.Text.IndexOf(inverseMark, StringComparison.Ordinal); while (idx < p.Text.Length && !@".!?".Contains(p.Text[idx])) { idx++; } if (idx < p.Text.Length) { p.Text = p.Text.Insert(idx, mark.ToString(CultureInfo.InvariantCulture)); if (p.Text.Contains("¡¿") && p.Text.Contains("!?")) { p.Text = p.Text.Replace("!?", "?!"); } if (p.Text.Contains("¿¡") && p.Text.Contains("?!")) { p.Text = p.Text.Replace("?!", "!?"); } } } }
public void Fix(Subtitle subtitle, IFixCallbacks callbacks) { var language = Configuration.Settings.Language.FixCommonErrors; string fixAction = language.StartWithUppercaseLetterAfterColon; int noOfFixes = 0; for (int i = 0; i < subtitle.Paragraphs.Count; i++) { var p = new Paragraph(subtitle.Paragraphs[i]); Paragraph last = subtitle.GetParagraphOrDefault(i - 1); string oldText = p.Text; int skipCount = 0; if (last != null) { string lastText = HtmlUtil.RemoveHtmlTags(last.Text); if (lastText.EndsWith(':') || lastText.EndsWith(';')) { var st = new StripableText(p.Text); if (st.StrippedText.Length > 0 && st.StrippedText[0] != char.ToUpper(st.StrippedText[0])) { p.Text = st.Pre + char.ToUpper(st.StrippedText[0]) + st.StrippedText.Substring(1) + st.Post; } } } if (oldText.Contains(ExpectedChars)) { bool lastWasColon = false; for (int j = 0; j < p.Text.Length; j++) { var s = p.Text[j]; if (s == ':' || s == ';') { lastWasColon = true; } else if (lastWasColon) { // skip whitespace index if (j + 2 < p.Text.Length && p.Text[j] == ' ') { s = p.Text[++j]; } var startFromJ = p.Text.Substring(j); if (startFromJ.Length > 3 && startFromJ[0] == '<' && startFromJ[2] == '>' && (startFromJ[1] == 'i' || startFromJ[1] == 'b' || startFromJ[1] == 'u')) { skipCount = 2; } else if (startFromJ.StartsWith("<font ", StringComparison.OrdinalIgnoreCase) && p.Text.Substring(j).Contains('>')) { skipCount = (j + startFromJ.IndexOf('>', 6)) - j; } else if (Helper.IsTurkishLittleI(s, callbacks.Encoding, callbacks.Language)) { p.Text = p.Text.Remove(j, 1).Insert(j, Helper.GetTurkishUppercaseLetter(s, callbacks.Encoding).ToString(CultureInfo.InvariantCulture)); lastWasColon = false; } else if (char.IsLower(s)) { // iPhone bool change = true; if (s == 'i' && p.Text.Length > j + 1) { if (p.Text[j + 1] == char.ToUpper(p.Text[j + 1])) { change = false; } } if (change) { p.Text = p.Text.Remove(j, 1).Insert(j, char.ToUpper(s).ToString(CultureInfo.InvariantCulture)); } lastWasColon = false; } else if (!(" " + Environment.NewLine).Contains(s)) { lastWasColon = false; } // move the: 'j' pointer and reset skipCount to 0 if (skipCount > 0) { j += skipCount; skipCount = 0; } } } } if (oldText != p.Text && callbacks.AllowFix(p, fixAction)) { noOfFixes++; subtitle.Paragraphs[i].Text = p.Text; callbacks.AddFixToListView(p, fixAction, oldText, p.Text); } } callbacks.UpdateFixStatus(noOfFixes, language.StartWithUppercaseLetterAfterColon, noOfFixes.ToString(CultureInfo.InvariantCulture)); }