public void StripableTextFontDontTouch() { var st = new StripableText("{MAN} Hi, how are you today!"); Assert.AreEqual(st.Pre, ""); Assert.AreEqual(st.Post, "!"); Assert.AreEqual(st.StrippedText, "{MAN} Hi, how are you today"); }
public void StripableOnlyPre3() { var st = new StripableText("<i>"); Assert.AreEqual(st.Pre, "<i>"); Assert.AreEqual(st.Post, ""); Assert.AreEqual(st.StrippedText, ""); }
public void StripableTextItalic2() { var st = new StripableText("<i>O</i>"); Assert.AreEqual(st.Pre, "<i>"); Assert.AreEqual(st.Post, "</i>"); Assert.AreEqual(st.StrippedText, "O"); }
public void StripableTextItalic3() { var st = new StripableText("<i>Hi!"); Assert.AreEqual(st.Pre, "<i>"); Assert.AreEqual(st.Post, "!"); Assert.AreEqual(st.StrippedText, "Hi"); }
public void StripableTextFont() { var st = new StripableText("<font color=\"red\">Hi!</font>"); Assert.AreEqual(st.Pre, "<font color=\"red\">"); Assert.AreEqual(st.Post, "!</font>"); Assert.AreEqual(st.StrippedText, "Hi"); }
public void StripableTextAss() { var st = new StripableText("{\\an9}Hi!"); Assert.AreEqual(st.Pre, "{\\an9}"); Assert.AreEqual(st.Post, "!"); Assert.AreEqual(st.StrippedText, "Hi"); }
private void FixSpanishInvertedLetter(char mark, string inverseMark, Paragraph p, Paragraph last, ref bool wasLastLineClosed, string fixAction, ref int fixCount) { if (p.Text.Contains(mark)) { bool skip = false; if (last != null && p.Text.Contains(mark) && !p.Text.Contains(inverseMark) && last.Text.Contains(inverseMark) && !last.Text.Contains(mark)) skip = true; if (!skip && Utilities.CountTagInText(p.Text, mark) == Utilities.CountTagInText(p.Text, inverseMark) && HtmlUtil.RemoveHtmlTags(p.Text).TrimStart(inverseMark[0]).Contains(inverseMark) == false && HtmlUtil.RemoveHtmlTags(p.Text).TrimEnd(mark).Contains(mark) == false) { skip = true; } if (!skip) { int startIndex = 0; int markIndex = p.Text.IndexOf(mark); if (!wasLastLineClosed && ((p.Text.IndexOf('!') > 0 && p.Text.IndexOf('!') < markIndex) || (p.Text.IndexOf('?') > 0 && p.Text.IndexOf('?') < markIndex) || (p.Text.IndexOf('.') > 0 && p.Text.IndexOf('.') < markIndex))) wasLastLineClosed = true; while (markIndex > 0 && startIndex < p.Text.Length) { int inverseMarkIndex = p.Text.IndexOf(inverseMark, startIndex, StringComparison.Ordinal); if (wasLastLineClosed && (inverseMarkIndex < 0 || inverseMarkIndex > markIndex)) { if (AllowFix(p, fixAction)) { int j = markIndex - 1; while (j > startIndex && (p.Text[j] == '.' || p.Text[j] == '!' || p.Text[j] == '?')) j--; while (j > startIndex && (p.Text[j] != '.' || IsSpanishAbbreviation(p.Text, j)) && p.Text[j] != '!' && p.Text[j] != '?' && !(j > 3 && p.Text.Substring(j - 3, 3) == Environment.NewLine + "-") && !(j > 4 && p.Text.Substring(j - 4, 4) == Environment.NewLine + " -") && !(j > 6 && p.Text.Substring(j - 6, 6) == Environment.NewLine + "<i>-")) j--; if (@".!?".Contains(p.Text[j])) { j++; } if (j + 3 < p.Text.Length && p.Text.Substring(j + 1, 2) == Environment.NewLine) { j += 3; } else if (j + 2 < p.Text.Length && p.Text.Substring(j, 2) == Environment.NewLine) { j += 2; } if (j >= startIndex) { string part = p.Text.Substring(j, markIndex - j + 1); string speaker = string.Empty; int speakerEnd = part.IndexOf(')'); if (part.StartsWith('(') && speakerEnd > 0 && speakerEnd < part.IndexOf(mark)) { while (Environment.NewLine.Contains(part[speakerEnd + 1])) speakerEnd++; speaker = part.Substring(0, speakerEnd + 1); part = part.Substring(speakerEnd + 1); } speakerEnd = part.IndexOf(']'); if (part.StartsWith('[') && speakerEnd > 0 && speakerEnd < part.IndexOf(mark)) { while (Environment.NewLine.Contains(part[speakerEnd + 1])) speakerEnd++; speaker = part.Substring(0, speakerEnd + 1); part = part.Substring(speakerEnd + 1); } var st = new StripableText(part); if (j == 0 && mark == '!' && st.Pre == "¿" && Utilities.CountTagInText(p.Text, mark) == 1 && HtmlUtil.RemoveHtmlTags(p.Text).EndsWith(mark)) { p.Text = inverseMark + p.Text; } else if (j == 0 && mark == '?' && st.Pre == "¡" && Utilities.CountTagInText(p.Text, mark) == 1 && HtmlUtil.RemoveHtmlTags(p.Text).EndsWith(mark)) { p.Text = inverseMark + p.Text; } else { string temp = inverseMark; int addToIndex = 0; while (p.Text.Length > markIndex + 1 && p.Text[markIndex + 1] == mark && Utilities.CountTagInText(p.Text, mark) > Utilities.CountTagInText(p.Text + temp, inverseMark)) { temp += inverseMark; st.Post += mark; markIndex++; addToIndex++; } p.Text = p.Text.Remove(j, markIndex - j + 1).Insert(j, speaker + st.Pre + temp + st.StrippedText + st.Post); markIndex += addToIndex; } } } } else if (last != null && !wasLastLineClosed && inverseMarkIndex == p.Text.IndexOf(mark) && !last.Text.Contains(inverseMark)) { string lastOldtext = last.Text; int idx = last.Text.Length - 2; while (idx > 0 && (last.Text.Substring(idx, 2) != ". ") && (last.Text.Substring(idx, 2) != "! ") && (last.Text.Substring(idx, 2) != "? ")) idx--; last.Text = last.Text.Insert(idx, inverseMark); fixCount++; AddFixToListView(last, fixAction, lastOldtext, last.Text); } startIndex = markIndex + 2; if (startIndex < p.Text.Length) markIndex = p.Text.IndexOf(mark, startIndex); else markIndex = -1; wasLastLineClosed = true; } } if (p.Text.EndsWith(mark + "...", StringComparison.Ordinal) && p.Text.Length > 4) { p.Text = p.Text.Remove(p.Text.Length - 4, 4) + "..." + mark; } } else if (Utilities.CountTagInText(p.Text, inverseMark) == 1) { int idx = p.Text.IndexOf(inverseMark, StringComparison.Ordinal); while (idx < p.Text.Length && !@".!?".Contains(p.Text[idx])) { idx++; } if (idx < p.Text.Length) { p.Text = p.Text.Insert(idx, mark.ToString(CultureInfo.InvariantCulture)); if (p.Text.Contains("¡¿") && p.Text.Contains("!?")) p.Text = p.Text.Replace("!?", "?!"); if (p.Text.Contains("¿¡") && p.Text.Contains("?!")) p.Text = p.Text.Replace("?!", "!?"); } } }
public static string FixStartWithUppercaseLetterAfterParagraph(Paragraph p, Paragraph prev, Encoding encoding, string language) { if (p.Text != null && p.Text.Length > 1) { string text = p.Text; string pre = string.Empty; if (text.Length > 4 && text.StartsWith("<i> ", StringComparison.Ordinal)) { pre = "<i> "; text = text.Substring(4); } if (text.Length > 3 && text.StartsWith("<i>", StringComparison.Ordinal)) { pre = "<i>"; text = text.Substring(3); } if (text.Length > 4 && text.StartsWith("<I> ", StringComparison.Ordinal)) { pre = "<I> "; text = text.Substring(4); } if (text.Length > 3 && text.StartsWith("<I>", StringComparison.Ordinal)) { pre = "<I>"; text = text.Substring(3); } if (text.Length > 2 && text.StartsWith('♪')) { pre = pre + "♪"; text = text.Substring(1); } if (text.Length > 2 && text.StartsWith(' ')) { pre = pre + " "; text = text.Substring(1); } if (text.Length > 2 && text.StartsWith('♫')) { pre = pre + "♫"; text = text.Substring(1); } if (text.Length > 2 && text.StartsWith(' ')) { pre = pre + " "; text = text.Substring(1); } var firstLetter = text[0]; string prevText = " ."; if (prev != null) prevText = HtmlUtil.RemoveHtmlTags(prev.Text); bool isPrevEndOfLine = FixCommonErrorsHelper.IsPreviousTextEndOfParagraph(prevText); if (prevText == " .") isPrevEndOfLine = true; if ((!text.StartsWith("www.", StringComparison.Ordinal) && !text.StartsWith("http:", StringComparison.Ordinal) && !text.StartsWith("https:", StringComparison.Ordinal)) && (char.IsLower(firstLetter) || IsTurkishLittleI(firstLetter, encoding, language)) && !char.IsDigit(firstLetter) && isPrevEndOfLine) { bool isMatchInKnowAbbreviations = language == "en" && (prevText.EndsWith(" o.r.", StringComparison.Ordinal) || prevText.EndsWith(" a.m.", StringComparison.Ordinal) || prevText.EndsWith(" p.m.", StringComparison.Ordinal)); if (!isMatchInKnowAbbreviations) { if (IsTurkishLittleI(firstLetter, encoding, language)) p.Text = pre + GetTurkishUppercaseLetter(firstLetter, encoding) + text.Substring(1); else if (language == "en" && (text.StartsWith("l ", StringComparison.Ordinal) || text.StartsWith("l-I", StringComparison.Ordinal) || text.StartsWith("ls ", StringComparison.Ordinal) || text.StartsWith("lnterested") || text.StartsWith("lsn't ", StringComparison.Ordinal) || text.StartsWith("ldiot", StringComparison.Ordinal) || text.StartsWith("ln", StringComparison.Ordinal) || text.StartsWith("lm", StringComparison.Ordinal) || text.StartsWith("ls", StringComparison.Ordinal) || text.StartsWith("lt", StringComparison.Ordinal) || text.StartsWith("lf ", StringComparison.Ordinal) || text.StartsWith("lc", StringComparison.Ordinal) || text.StartsWith("l'm ", StringComparison.Ordinal)) || text.StartsWith("l am ", StringComparison.Ordinal)) // l > I p.Text = pre + "I" + text.Substring(1); else p.Text = pre + char.ToUpper(firstLetter) + text.Substring(1); } } } if (p.Text != null && p.Text.Contains(Environment.NewLine)) { var arr = p.Text.SplitToLines(); if (arr.Length == 2 && arr[1].Length > 1) { string text = arr[1]; string pre = string.Empty; if (text.Length > 4 && text.StartsWith("<i> ", StringComparison.Ordinal)) { pre = "<i> "; text = text.Substring(4); } if (text.Length > 3 && text.StartsWith("<i>", StringComparison.Ordinal)) { pre = "<i>"; text = text.Substring(3); } if (text.Length > 4 && text.StartsWith("<I> ", StringComparison.Ordinal)) { pre = "<I> "; text = text.Substring(4); } if (text.Length > 3 && text.StartsWith("<I>", StringComparison.Ordinal)) { pre = "<I>"; text = text.Substring(3); } if (text.Length > 2 && text.StartsWith('♪')) { pre = pre + "♪"; text = text.Substring(1); } if (text.Length > 2 && text.StartsWith(' ')) { pre = pre + " "; text = text.Substring(1); } if (text.Length > 2 && text.StartsWith('♫')) { pre = pre + "♫"; text = text.Substring(1); } if (text.Length > 2 && text.StartsWith(' ')) { pre = pre + " "; text = text.Substring(1); } char firstLetter = text[0]; string prevText = HtmlUtil.RemoveHtmlTags(arr[0]); bool isPrevEndOfLine = FixCommonErrorsHelper.IsPreviousTextEndOfParagraph(prevText); if ((!text.StartsWith("www.", StringComparison.Ordinal) && !text.StartsWith("http:", StringComparison.Ordinal) && !text.StartsWith("https:", StringComparison.Ordinal)) && (char.IsLower(firstLetter) || IsTurkishLittleI(firstLetter, encoding, language)) && !prevText.EndsWith("...", StringComparison.Ordinal) && isPrevEndOfLine) { bool isMatchInKnowAbbreviations = language == "en" && (prevText.EndsWith(" o.r.", StringComparison.Ordinal) || prevText.EndsWith(" a.m.", StringComparison.Ordinal) || prevText.EndsWith(" p.m.", StringComparison.Ordinal)); if (!isMatchInKnowAbbreviations) { if (IsTurkishLittleI(firstLetter, encoding, language)) text = pre + GetTurkishUppercaseLetter(firstLetter, encoding) + text.Substring(1); else if (language == "en" && (text.StartsWith("l ", StringComparison.Ordinal) || text.StartsWith("l-I", StringComparison.Ordinal) || text.StartsWith("ls ") || text.StartsWith("lnterested") || text.StartsWith("lsn't ", StringComparison.Ordinal) || text.StartsWith("ldiot", StringComparison.Ordinal) || text.StartsWith("ln", StringComparison.Ordinal) || text.StartsWith("lm", StringComparison.Ordinal) || text.StartsWith("ls", StringComparison.Ordinal) || text.StartsWith("lt", StringComparison.Ordinal) || text.StartsWith("lf ", StringComparison.Ordinal) || text.StartsWith("lc", StringComparison.Ordinal) || text.StartsWith("l'm ", StringComparison.Ordinal)) || text.StartsWith("l am ", StringComparison.Ordinal)) // l > I text = pre + "I" + text.Substring(1); else text = pre + char.ToUpper(firstLetter) + text.Substring(1); p.Text = arr[0] + Environment.NewLine + text; } } arr = p.Text.SplitToLines(); if ((arr[0].StartsWith('-') || arr[0].StartsWith("<i>-", StringComparison.Ordinal)) && (arr[1].StartsWith('-') || arr[1].StartsWith("<i>-", StringComparison.Ordinal)) && !arr[0].StartsWith("--", StringComparison.Ordinal) && !arr[0].StartsWith("<i>--", StringComparison.Ordinal) && !arr[1].StartsWith("--", StringComparison.Ordinal) && !arr[1].StartsWith("<i>--", StringComparison.Ordinal)) { if (isPrevEndOfLine && arr[1].StartsWith("<i>- ", StringComparison.Ordinal) && arr[1].Length > 6) { p.Text = arr[0] + Environment.NewLine + "<i>- " + char.ToUpper(arr[1][5]) + arr[1].Remove(0, 6); } else if (isPrevEndOfLine && arr[1].StartsWith("- ", StringComparison.Ordinal) && arr[1].Length > 3) { p.Text = arr[0] + Environment.NewLine + "- " + char.ToUpper(arr[1][2]) + arr[1].Remove(0, 3); } arr = p.Text.SplitToLines(); prevText = " ."; if (prev != null && p.StartTime.TotalMilliseconds - 10000 < prev.EndTime.TotalMilliseconds) prevText = HtmlUtil.RemoveHtmlTags(prev.Text); bool isPrevLineEndOfLine = FixCommonErrorsHelper.IsPreviousTextEndOfParagraph(prevText); if (isPrevLineEndOfLine && arr[0].StartsWith("<i>- ", StringComparison.Ordinal) && arr[0].Length > 6) { p.Text = "<i>- " + char.ToUpper(arr[0][5]) + arr[0].Remove(0, 6) + Environment.NewLine + arr[1]; } else if (isPrevLineEndOfLine && arr[0].StartsWith("- ", StringComparison.Ordinal) && arr[0].Length > 3) { p.Text = "- " + char.ToUpper(arr[0][2]) + arr[0].Remove(0, 3) + Environment.NewLine + arr[1]; } } } } if (p.Text.Length > 4) { int len = 0; int indexOfNewLine = p.Text.IndexOf(Environment.NewLine + " -", 1, StringComparison.Ordinal); if (indexOfNewLine < 0) { indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "- <i> ♪", 1, StringComparison.Ordinal); len = "- <i> ♪".Length; } if (indexOfNewLine < 0) { indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "-", 1, StringComparison.Ordinal); len = "-".Length; } if (indexOfNewLine < 0) { indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "<i>-", 1, StringComparison.Ordinal); len = "<i>-".Length; } if (indexOfNewLine < 0) { indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "<i> -", 1, StringComparison.Ordinal); len = "<i> -".Length; } if (indexOfNewLine < 0) { indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "♪ -", 1, StringComparison.Ordinal); len = "♪ -".Length; } if (indexOfNewLine < 0) { indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "♪ <i> -", 1, StringComparison.Ordinal); len = "♪ <i> -".Length; } if (indexOfNewLine < 0) { indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "♪ <i>-", 1, StringComparison.Ordinal); len = "♪ <i>-".Length; } if (indexOfNewLine > 0) { string text = p.Text.Substring(indexOfNewLine + len); var st = new StripableText(text); if (st.StrippedText.Length > 0 && IsTurkishLittleI(st.StrippedText[0], encoding, language) && !st.Pre.EndsWith('[') && !st.Pre.Contains("...")) { text = st.Pre + GetTurkishUppercaseLetter(st.StrippedText[0], encoding) + st.StrippedText.Substring(1) + st.Post; p.Text = p.Text.Remove(indexOfNewLine + len).Insert(indexOfNewLine + len, text); } else if (st.StrippedText.Length > 0 && st.StrippedText[0] != char.ToUpper(st.StrippedText[0]) && !st.Pre.EndsWith('[') && !st.Pre.Contains("...")) { text = st.Pre + char.ToUpper(st.StrippedText[0]) + st.StrippedText.Substring(1) + st.Post; p.Text = p.Text.Remove(indexOfNewLine + len).Insert(indexOfNewLine + len, text); } } } return p.Text; }
public void FixUppercaseIInsideWords() { string fixAction = _language.FixUppercaseIInsideLowercaseWord; int uppercaseIsInsideLowercaseWords = 0; // bool isLineContinuation = false; for (int i = 0; i < Subtitle.Paragraphs.Count; i++) { Paragraph p = Subtitle.Paragraphs[i]; string oldText = p.Text; Match match = ReAfterLowercaseLetter.Match(p.Text); while (match.Success) { if (!(match.Index > 1 && p.Text.Substring(match.Index - 1, 2) == "Mc") // irish names, McDonalds etc. && p.Text[match.Index + 1] == 'I' && AllowFix(p, fixAction)) { p.Text = p.Text.Substring(0, match.Index + 1) + "l"; if (match.Index + 2 < oldText.Length) p.Text += oldText.Substring(match.Index + 2); uppercaseIsInsideLowercaseWords++; AddFixToListView(p, fixAction, oldText, p.Text); } match = match.NextMatch(); } var st = new StripableText(p.Text); match = ReBeforeLowercaseLetter.Match(st.StrippedText); while (match.Success) { string word = GetWholeWord(st.StrippedText, match.Index); if (!IsName(word)) { if (AllowFix(p, fixAction)) { if (word.Equals("internal", StringComparison.OrdinalIgnoreCase) || word.Equals("island", StringComparison.OrdinalIgnoreCase) || word.Equals("islands", StringComparison.OrdinalIgnoreCase)) { } else if (match.Index == 0) { // first letter in paragraph //too risky! - perhaps if periods is fixed at the same time... or too complicated!? //if (isLineContinuation) //{ // st.StrippedText = st.StrippedText.Remove(match.Index, 1).Insert(match.Index, "l"); // p.Text = st.MergedString; // uppercaseIsInsideLowercaseWords++; // AddFixToListView(p, fixAction, oldText, p.Text); //} } else { if (match.Index > 2 && st.StrippedText[match.Index - 1] == ' ') { if ((Utilities.AllLettersAndNumbers + @",").Contains(st.StrippedText[match.Index - 2]) && match.Length >= 2 && Utilities.LowercaseVowels.Contains(char.ToLower(match.Value[1]))) { st.StrippedText = st.StrippedText.Remove(match.Index, 1).Insert(match.Index, "l"); p.Text = st.MergedString; uppercaseIsInsideLowercaseWords++; AddFixToListView(p, fixAction, oldText, p.Text); } } else if (match.Index > Environment.NewLine.Length + 1 && Environment.NewLine.Contains(st.StrippedText[match.Index - 1])) { if ((Utilities.AllLettersAndNumbers + @",").Contains(st.StrippedText[match.Index - Environment.NewLine.Length + 1]) && match.Length >= 2 && Utilities.LowercaseVowels.Contains(match.Value[1])) { st.StrippedText = st.StrippedText.Remove(match.Index, 1).Insert(match.Index, "l"); p.Text = st.MergedString; uppercaseIsInsideLowercaseWords++; AddFixToListView(p, fixAction, oldText, p.Text); } } else if (match.Index > 1 && ((st.StrippedText[match.Index - 1] == '\"') || (st.StrippedText[match.Index - 1] == '\'') || (st.StrippedText[match.Index - 1] == '>') || (st.StrippedText[match.Index - 1] == '-'))) { } else { var before = '\0'; var after = '\0'; if (match.Index > 0) before = st.StrippedText[match.Index - 1]; if (match.Index < st.StrippedText.Length - 2) after = st.StrippedText[match.Index + 1]; if (before != '\0' && char.IsUpper(before) && after != '\0' && char.IsLower(after) && !Utilities.LowercaseVowels.Contains(char.ToLower(before)) && !Utilities.LowercaseVowels.Contains(after)) { st.StrippedText = st.StrippedText.Remove(match.Index, 1).Insert(match.Index, "i"); p.Text = st.MergedString; uppercaseIsInsideLowercaseWords++; AddFixToListView(p, fixAction, oldText, p.Text); } else if (@"‘’¡¿„“()[]♪'. @".Contains(before) && !Utilities.LowercaseVowels.Contains(char.ToLower(after))) { } else { st.StrippedText = st.StrippedText.Remove(match.Index, 1).Insert(match.Index, "l"); p.Text = st.MergedString; uppercaseIsInsideLowercaseWords++; AddFixToListView(p, fixAction, oldText, p.Text); } } } } } match = match.NextMatch(); } //isLineContinuation = p.Text.Length > 0 && Utilities.GetLetters(true, true, false).Contains(p.Text[p.Text.Length - 1].ToString()); } UpdateFixStatus(uppercaseIsInsideLowercaseWords, _language.FixUppercaseIInsindeLowercaseWords, _language.XUppercaseIsFoundInsideLowercaseWords); }
public void FixMissingPeriodsAtEndOfLine() { string fixAction = _language.FixMissingPeriodAtEndOfLine; int missigPeriodsAtEndOfLine = 0; for (int i = 0; i < Subtitle.Paragraphs.Count; i++) { Paragraph p = Subtitle.Paragraphs[i]; Paragraph next = Subtitle.GetParagraphOrDefault(i + 1); string nextText = string.Empty; if (next != null) nextText = HtmlUtil.RemoveHtmlTags(next.Text).TrimStart('-', '"', '„').TrimStart(); string tempNoHtml = HtmlUtil.RemoveHtmlTags(p.Text).TrimEnd(); if (IsOneLineUrl(p.Text) || p.Text.Contains(new[] { '♪', '♫' }) || p.Text.EndsWith('\'')) { // ignore urls } else if (!string.IsNullOrEmpty(nextText) && next != null && next.Text.Length > 0 && Utilities.UppercaseLetters.Contains(nextText[0]) && tempNoHtml.Length > 0 && !@",.!?:;>-])♪♫…".Contains(tempNoHtml[tempNoHtml.Length - 1])) { string tempTrimmed = tempNoHtml.TrimEnd().TrimEnd('\'', '"', '“', '”').TrimEnd(); if (tempTrimmed.Length > 0 && !@")]*#¶.!?".Contains(tempTrimmed[tempTrimmed.Length - 1]) && p.Text != p.Text.ToUpper()) { //don't end the sentence if the next word is an I word as they're always capped. if (!next.Text.StartsWith("I ", StringComparison.Ordinal) && !next.Text.StartsWith("I'", StringComparison.Ordinal)) { //test to see if the first word of the next line is a name if (!IsName(next.Text.Split(new[] { ' ', '.', ',', '-', '?', '!', ':', ';', '"', '(', ')', '[', ']', '{', '}', '|', '<', '>', '/', '+', '\r', '\n' })[0]) && AllowFix(p, fixAction)) { string oldText = p.Text; if (p.Text.EndsWith('>')) { int lastLessThan = p.Text.LastIndexOf('<'); if (lastLessThan > 0) p.Text = p.Text.Insert(lastLessThan, "."); } else { if (p.Text.EndsWith('“') && tempNoHtml.StartsWith('„')) p.Text = p.Text.TrimEnd('“') + ".“"; else if (p.Text.EndsWith('"') && tempNoHtml.StartsWith('"')) p.Text = p.Text.TrimEnd('"') + ".\""; else p.Text += "."; } if (p.Text != oldText) { missigPeriodsAtEndOfLine++; AddFixToListView(p, fixAction, oldText, p.Text); } } } } } else if (next != null && !string.IsNullOrEmpty(p.Text) && Utilities.AllLettersAndNumbers.Contains(p.Text[p.Text.Length - 1])) { if (p.Text != p.Text.ToUpper()) { var st = new StripableText(next.Text); if (st.StrippedText.Length > 0 && st.StrippedText != st.StrippedText.ToUpper() && Utilities.UppercaseLetters.Contains(st.StrippedText[0])) { if (AllowFix(p, fixAction)) { int j = p.Text.Length - 1; while (j >= 0 && !@".!?¿¡".Contains(p.Text[j])) j--; string endSign = "."; if (j >= 0 && p.Text[j] == '¿') endSign = "?"; if (j >= 0 && p.Text[j] == '¡') endSign = "!"; string oldText = p.Text; missigPeriodsAtEndOfLine++; p.Text += endSign; AddFixToListView(p, fixAction, oldText, p.Text); } } } } if (p.Text.Length > 4) { int indexOfNewLine = p.Text.IndexOf(Environment.NewLine + " -", 3, StringComparison.Ordinal); if (indexOfNewLine < 0) indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "-", 3, StringComparison.Ordinal); if (indexOfNewLine < 0) indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "<i>-", 3, StringComparison.Ordinal); if (indexOfNewLine < 0) indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "<i> -", 3, StringComparison.Ordinal); if (indexOfNewLine > 0 && Configuration.Settings.General.UppercaseLetters.Contains(char.ToUpper(p.Text[indexOfNewLine - 1])) && AllowFix(p, fixAction)) { string oldText = p.Text; string text = p.Text.Substring(0, indexOfNewLine); var st = new StripableText(text); if (st.Pre.TrimEnd().EndsWith('¿')) // Spanish ¿ p.Text = p.Text.Insert(indexOfNewLine, "?"); else if (st.Pre.TrimEnd().EndsWith('¡')) // Spanish ¡ p.Text = p.Text.Insert(indexOfNewLine, "!"); else p.Text = p.Text.Insert(indexOfNewLine, "."); missigPeriodsAtEndOfLine++; AddFixToListView(p, fixAction, oldText, p.Text); } } } UpdateFixStatus(missigPeriodsAtEndOfLine, _language.AddPeriods, _language.XPeriodsAdded); }
private void GeneratePreview() { Cursor = Cursors.WaitCursor; listViewFixes.BeginUpdate(); listViewFixes.Items.Clear(); foreach (Paragraph p in _subtitle.Paragraphs) { string text = p.Text; foreach (ListViewItem item in listViewNames.Items) { string name = item.SubItems[1].Text; string textNoTags = HtmlUtil.RemoveHtmlTags(text); if (textNoTags != textNoTags.ToUpper()) { if (item.Checked && text != null && text.Contains(name, StringComparison.OrdinalIgnoreCase) && name.Length > 1 && name != name.ToLower()) { var st = new StripableText(text); st.FixCasing(new List<string> { name }, true, false, false, string.Empty); text = st.MergedString; } } } if (text != p.Text) AddToPreviewListView(p, text); } listViewFixes.EndUpdate(); groupBoxLinesFound.Text = string.Format(Configuration.Settings.Language.ChangeCasingNames.LinesFoundX, listViewFixes.Items.Count); Cursor = Cursors.Default; }
private string FixCasing(string text, string lastLine, List<string> namesEtc) { string original = text; if (radioButtonNormal.Checked) { if (checkBoxOnlyAllUpper.Checked && text != text.ToUpper()) return text; if (text.Length > 1) { // first all to lower text = text.ToLower().Trim(); text = text.FixExtraSpaces(); var st = new StripableText(text); st.FixCasing(namesEtc, false, true, true, lastLine); // fix all casing but names (that's a seperate option) text = st.MergedString; } } else if (radioButtonUppercase.Checked) { var st = new StripableText(text); text = st.Pre + st.StrippedText.ToUpper() + st.Post; text = HtmlUtil.FixUpperTags(text); // tags inside text } else if (radioButtonLowercase.Checked) { text = text.ToLower(); } if (original != text) _noOfLinesChanged++; return text; }
private string FixLowercaseIToUppercaseI(string input, string lastLine) { var sb = new StringBuilder(); var lines = input.SplitToLines(); for (int i = 0; i < lines.Length; i++) { string l = lines[i]; if (i > 0) lastLine = lines[i - 1]; lastLine = HtmlUtil.RemoveHtmlTags(lastLine); if (string.IsNullOrEmpty(lastLine) || lastLine.EndsWith('.') || lastLine.EndsWith('!') || lastLine.EndsWith('?')) { var st = new StripableText(l); if (st.StrippedText.StartsWith('i') && !st.Pre.EndsWith('[') && !st.Pre.EndsWith('(') && !st.Pre.EndsWith("...", StringComparison.Ordinal)) { if (string.IsNullOrEmpty(lastLine) || (!lastLine.EndsWith("...", StringComparison.Ordinal) && !EndsWithAbbreviation(lastLine, _abbreviationList))) l = st.Pre + "I" + st.StrippedText.Remove(0, 1) + st.Post; } } sb.AppendLine(l); } return sb.ToString().TrimEnd('\r', '\n'); }
public void StripableTextItalicAndMore() { var st = new StripableText("<i>...<b>Hi!</b></i>"); Assert.AreEqual(st.Pre, "<i>...<b>"); Assert.AreEqual(st.Post, "!</b></i>"); Assert.AreEqual(st.StrippedText, "Hi"); }
private void FixStartWithUppercaseLetterAfterColon() { string fixAction = _language.StartWithUppercaseLetterAfterColon; int noOfFixes = 0; listViewFixes.BeginUpdate(); for (int i = 0; i < Subtitle.Paragraphs.Count; i++) { Paragraph p = Subtitle.Paragraphs[i]; Paragraph last = Subtitle.GetParagraphOrDefault(i - 1); string oldText = p.Text; int skipCount = 0; if (last != null) { string lastText = HtmlUtil.RemoveHtmlTags(last.Text); if (lastText.EndsWith(':') || lastText.EndsWith(';')) { var st = new StripableText(p.Text); if (st.StrippedText.Length > 0 && st.StrippedText[0] != char.ToUpper(st.StrippedText[0])) p.Text = st.Pre + char.ToUpper(st.StrippedText[0]) + st.StrippedText.Substring(1) + st.Post; } } if (oldText.Contains(new[] { ':', ';' })) { bool lastWasColon = false; for (int j = 0; j < p.Text.Length; j++) { var s = p.Text[j]; if (s == ':' || s == ';') { lastWasColon = true; } else if (lastWasColon) { var startFromJ = p.Text.Substring(j); if (skipCount > 0) skipCount--; else if (startFromJ.StartsWith("<i>", StringComparison.OrdinalIgnoreCase)) skipCount = 2; else if (startFromJ.StartsWith("<b>", StringComparison.OrdinalIgnoreCase)) skipCount = 2; else if (startFromJ.StartsWith("<u>", StringComparison.OrdinalIgnoreCase)) skipCount = 2; else if (startFromJ.StartsWith("<font ", StringComparison.OrdinalIgnoreCase) && p.Text.Substring(j).Contains('>')) skipCount = startFromJ.IndexOf('>') - startFromJ.IndexOf("<font ", StringComparison.OrdinalIgnoreCase); else if (IsTurkishLittleI(s, _encoding, Language)) { p.Text = p.Text.Remove(j, 1).Insert(j, GetTurkishUppercaseLetter(s, _encoding).ToString(CultureInfo.InvariantCulture)); lastWasColon = false; } else if (char.IsLower(s)) { // iPhone bool change = true; if (s == 'i' && p.Text.Length > j + 1) { if (p.Text[j + 1] == char.ToUpper(p.Text[j + 1])) change = false; } if (change) p.Text = p.Text.Remove(j, 1).Insert(j, char.ToUpper(s).ToString(CultureInfo.InvariantCulture)); lastWasColon = false; } else if (!(" " + Environment.NewLine).Contains(s)) lastWasColon = false; } } } if (oldText != p.Text) { noOfFixes++; AddFixToListView(p, fixAction, oldText, p.Text); } } listViewFixes.EndUpdate(); if (noOfFixes > 0) { _totalFixes += noOfFixes; LogStatus(_language.StartWithUppercaseLetterAfterColon, noOfFixes.ToString(CultureInfo.InvariantCulture)); } }
public string RemoveInterjections(string text) { string oldText = text; var arr = Configuration.Settings.Tools.Interjections.Split(new[] { ';' }, StringSplitOptions.RemoveEmptyEntries); if (_interjectionList == null) { _interjectionList = new List<string>(); foreach (string s in arr) { if (s.Length > 0) { if (!_interjectionList.Contains(s)) _interjectionList.Add(s); string lower = s.ToLower(); if (!_interjectionList.Contains(lower)) _interjectionList.Add(lower); string upper = s.ToUpper(); if (!_interjectionList.Contains(upper)) _interjectionList.Add(upper); string pascalCasing = char.ToUpper(s[0]) + s.Substring(1); if (!_interjectionList.Contains(pascalCasing)) _interjectionList.Add(pascalCasing); } } _interjectionList.Sort(CompareLength); } bool doRepeat = true; while (doRepeat) { doRepeat = false; foreach (string s in _interjectionList) { if (text.Contains(s)) { var regex = new Regex("\\b" + s + "\\b"); var match = regex.Match(text); if (match.Success) { int index = match.Index; string temp = text.Remove(index, s.Length); if (temp.Remove(0, index) == " —" && temp.EndsWith("— —")) { temp = temp.TrimEnd('—').TrimEnd(); if (temp.TrimEnd().EndsWith(Environment.NewLine + "—")) temp = temp.TrimEnd().TrimEnd('—').TrimEnd(); } else if (temp.Remove(0, index) == " —" && temp.EndsWith("- —")) { temp = temp.TrimEnd('—').TrimEnd(); if (temp.TrimEnd().EndsWith(Environment.NewLine + "-")) temp = temp.TrimEnd().TrimEnd('-').TrimEnd(); } else if (index == 2 && temp.StartsWith("- —", StringComparison.Ordinal)) { temp = temp.Remove(2, 2); } else if (index == 2 && temp.StartsWith("- —", StringComparison.Ordinal)) { temp = temp.Remove(2, 1); } else if (index == 0 && temp.StartsWith(" —", StringComparison.Ordinal)) { temp = temp.Remove(0, 2); } else if (index == 0 && temp.StartsWith('—')) { temp = temp.Remove(0, 1); } string pre = string.Empty; if (index > 0) doRepeat = true; bool removeAfter = true; if (temp.Length > index - s.Length + 3 && index > s.Length) { if (temp.Substring(index - s.Length + 1, 3) == ", !") { temp = temp.Remove(index - s.Length + 1, 2); removeAfter = false; } else if (temp.Substring(index - s.Length + 1, 3) == ", ?") { temp = temp.Remove(index - s.Length + 1, 2); removeAfter = false; } else if (temp.Substring(index - s.Length + 1, 3) == ", .") { temp = temp.Remove(index - s.Length + 1, 2); removeAfter = false; } } if (removeAfter && temp.Length > index - s.Length + 2 && index > s.Length) { if (temp.Substring(index - s.Length, 3) == ", !") { temp = temp.Remove(index - s.Length, 2); removeAfter = false; } else if (temp.Substring(index - s.Length, 3) == ", ?") { temp = temp.Remove(index - s.Length, 2); removeAfter = false; } else if (temp.Substring(index - s.Length, 3) == ", .") { temp = temp.Remove(index - s.Length, 2); removeAfter = false; } else if (index > 0 && temp.Substring(index - s.Length).StartsWith(", -—")) { temp = temp.Remove(index - s.Length, 3); removeAfter = false; } else if (index > 0 && temp.Substring(index - s.Length).StartsWith(", --")) { temp = temp.Remove(index - s.Length, 2); removeAfter = false; } } if (removeAfter && temp.Length > index - s.Length + 2 && index > s.Length) { if (temp.Substring(index - s.Length + 1, 2) == "-!") { temp = temp.Remove(index - s.Length + 1, 1); removeAfter = false; } else if (temp.Substring(index - s.Length + 1, 2) == "-?") { temp = temp.Remove(index - s.Length + 1, 1); removeAfter = false; } else if (temp.Substring(index - s.Length + 1, 2) == "-.") { temp = temp.Remove(index - s.Length + 1, 1); removeAfter = false; } } if (index > 3 && index - 2 < temp.Length && temp.Substring(index - 2).StartsWith(", —", StringComparison.Ordinal)) { temp = temp.Remove(index - 2, 1); index--; } else if (index > 3 && index - 2 < temp.Length && temp.Substring(index - 2).StartsWith(", —", StringComparison.Ordinal)) { temp = temp.Remove(index - 2, 1); index--; } if (removeAfter) { if (index == 0) { if (!string.IsNullOrEmpty(temp) && temp.StartsWith('-')) temp = temp.Remove(0, 1).Trim(); } else if (index == 3 && !string.IsNullOrEmpty(temp) && temp.StartsWith("<i>-", StringComparison.Ordinal)) { temp = temp.Remove(3, 1); } else if (index > 0 && temp.Length > index) { pre = text.Substring(0, index); temp = temp.Remove(0, index); if (pre.EndsWith('-') && temp.StartsWith('-')) temp = temp.Remove(0, 1); if (pre.EndsWith("- ") && temp.StartsWith('-')) temp = temp.Remove(0, 1); } while (temp.Length > 0 && (temp.StartsWith(' ') || temp.StartsWith(',') || temp.StartsWith('.') || temp.StartsWith('!') || temp.StartsWith('?'))) { temp = temp.Remove(0, 1); doRepeat = true; } if (temp.Length > 0 && s[0].ToString(CultureInfo.InvariantCulture) != s[0].ToString(CultureInfo.InvariantCulture).ToLower()) { temp = char.ToUpper(temp[0]) + temp.Substring(1); doRepeat = true; } if (pre.EndsWith(' ') && temp.StartsWith('-')) temp = temp.Remove(0, 1); if (pre.EndsWith(',') && temp.StartsWith('—')) pre = pre.TrimEnd(',') + " "; temp = pre + temp; } if (temp.EndsWith(Environment.NewLine + "- ")) temp = temp.Remove(temp.Length - 4, 4); var st = new StripableText(temp); if (st.StrippedText.Length == 0) return string.Empty; if (!temp.Contains(Environment.NewLine) && text.Contains(Environment.NewLine) && temp.StartsWith('-')) temp = temp.Remove(0, 1).Trim(); text = temp; } } } } var lines = text.SplitToLines(); if (text != oldText && lines.Length == 2) { if (lines[0] == "-" && lines[1] == "-") return string.Empty; if (lines[0].StartsWith('-') && lines[0].Length > 1 && lines[1].Trim() == "-") return lines[0].Remove(0, 1).Trim(); if (lines[1].StartsWith('-') && lines[1].Length > 1 && lines[0].Trim() == "-") return lines[1].Remove(0, 1).Trim(); if (lines[1].StartsWith("<i>-", StringComparison.Ordinal) && lines[1].Length > 4 && lines[0].Trim() == "-") return "<i>" + lines[1].Remove(0, 4).Trim(); if (lines[0].Length > 1 && (lines[1] == "-") || lines[1] == "." || lines[1] == "!" || lines[1] == "?") { if (oldText.Contains(Environment.NewLine + "-") && lines[0].StartsWith('-')) lines[0] = lines[0].Remove(0, 1); return lines[0].Trim(); } if (HtmlUtil.RemoveHtmlTags(lines[0], false).Trim() == "-") { if (HtmlUtil.RemoveHtmlTags(lines[1], false).Trim() == "-") return string.Empty; if (lines[1].StartsWith('-') && lines[1].Length > 1) return lines[1].Remove(0, 1).Trim(); if (lines[1].StartsWith("<i>-", StringComparison.Ordinal) && lines[1].Length > 4) return "<i>" + lines[1].Remove(0, 4).Trim(); return lines[1]; } if (HtmlUtil.RemoveHtmlTags(lines[1], false).Trim() == "-") { if (HtmlUtil.RemoveHtmlTags(lines[0], false).Trim() == "-") return string.Empty; if (lines[0].StartsWith('-') && lines[0].Length > 1) return lines[0].Remove(0, 1).Trim(); if (lines[0].StartsWith("<i>-", StringComparison.Ordinal) && lines[0].Length > 4) return "<i>" + lines[0].Remove(0, 4).Trim(); return lines[0]; } } if (lines.Length == 2 && string.IsNullOrWhiteSpace(lines[1].Replace(".", string.Empty).Replace("?", string.Empty).Replace("!", string.Empty).Replace("-", string.Empty).Replace("—", string.Empty))) { text = lines[0]; lines = text.SplitToLines(); } else if (lines.Length == 2 && string.IsNullOrWhiteSpace(lines[0].Replace(".", string.Empty).Replace("?", string.Empty).Replace("!", string.Empty).Replace("-", string.Empty).Replace("—", string.Empty))) { text = lines[1]; lines = text.SplitToLines(); } if (text != oldText && lines.Length == 1 && Utilities.GetNumberOfLines(oldText) == 2) { if ((oldText.StartsWith('-') || oldText.StartsWith("<i>-", StringComparison.Ordinal)) && (oldText.Contains("." + Environment.NewLine) || oldText.Contains(".</i>" + Environment.NewLine) || oldText.Contains("!" + Environment.NewLine) || oldText.Contains("!</i>" + Environment.NewLine) || oldText.Contains("?" + Environment.NewLine) || oldText.Contains("?</i>" + Environment.NewLine))) { if (text.StartsWith("<i>-")) text = "<i>" + text.Remove(0, 4).TrimStart(); else text = text.TrimStart('-').TrimStart(); } else if ((oldText.Contains(Environment.NewLine + "-") || oldText.Contains(Environment.NewLine + "<i>-")) && (oldText.Contains("." + Environment.NewLine) || oldText.Contains(".</i>" + Environment.NewLine) || oldText.Contains("!" + Environment.NewLine) || oldText.Contains("!</i>" + Environment.NewLine) || oldText.Contains("?" + Environment.NewLine) || oldText.Contains("?</i>" + Environment.NewLine))) { if (text.StartsWith("<i>-")) text = "<i>" + text.Remove(0, 4).TrimStart(); else text = text.TrimStart('-').TrimStart(); } } return text; }
private void FixStartWithUppercaseLetterAfterPeriodInsideParagraph() { string fixAction = _language.StartWithUppercaseLetterAfterPeriodInsideParagraph; int noOfFixes = 0; for (int i = 0; i < Subtitle.Paragraphs.Count; i++) { Paragraph p = Subtitle.Paragraphs[i]; string oldText = p.Text; StripableText st = new StripableText(p.Text); if (p.Text.Length > 3) { string text = st.StrippedText.Replace(" ", " "); int start = text.IndexOfAny(new[] { '.', '!', '?' }); while (start != -1 && start < text.Length) { if (start > 0 && char.IsDigit(text[start - 1])) { // ignore periods after a number } else if (start + 4 < text.Length && text[start + 1] == ' ') { if (!IsAbbreviation(text, start)) { var subText = new StripableText(text.Substring(start + 2)); if (subText.StrippedText.Length > 0 && IsTurkishLittleI(subText.StrippedText[0], _encoding, Language)) { if (subText.StrippedText.Length > 1 && !(subText.Pre.Contains('\'') && subText.StrippedText.StartsWith('s'))) { text = text.Substring(0, start + 2) + subText.Pre + GetTurkishUppercaseLetter(subText.StrippedText[0], _encoding) + subText.StrippedText.Substring(1) + subText.Post; if (AllowFix(p, fixAction)) { p.Text = st.Pre + text + st.Post; } } } else if (subText.StrippedText.Length > 0 && Configuration.Settings.General.UppercaseLetters.Contains(char.ToUpper(subText.StrippedText[0]))) { if (subText.StrippedText.Length > 1 && !(subText.Pre.Contains('\'') && subText.StrippedText.StartsWith('s'))) { text = text.Substring(0, start + 2) + subText.Pre + char.ToUpper(subText.StrippedText[0]) + subText.StrippedText.Substring(1) + subText.Post; if (AllowFix(p, fixAction)) { p.Text = st.Pre + text + st.Post; } } } } } start += 4; if (start < text.Length) start = text.IndexOfAny(new[] { '.', '!', '?' }, start); } } if (oldText != p.Text) { noOfFixes++; AddFixToListView(p, fixAction, oldText, p.Text); } } if (noOfFixes > 0) { _totalFixes += noOfFixes; LogStatus(_language.StartWithUppercaseLetterAfterPeriodInsideParagraph, noOfFixes.ToString(CultureInfo.InvariantCulture)); } }
public string RemoveColon(string text) { if (!Settings.RemoveTextBeforeColon || text.IndexOf(':') < 0) return text; string preAssTag = string.Empty; if (text.StartsWith("{\\", StringComparison.Ordinal) && text.IndexOf('}') > 0) { int indexOfEndBracket = text.IndexOf('}') + 1; preAssTag = text.Substring(0, indexOfEndBracket); text = text.Remove(0, indexOfEndBracket).TrimStart(); } // House 7x01 line 52: and she would like you to do three things: // Okay or remove??? var noTagText = HtmlUtil.RemoveHtmlTags(text); if (noTagText.IndexOf(':') > 0 && noTagText.IndexOf(':') == noTagText.Length - 1 && noTagText != noTagText.ToUpper() && noTagText.Length > 10) return text; string newText = string.Empty; var lines = text.Trim().SplitToLines(); int noOfNames = 0; int count = 0; bool removedInFirstLine = false; bool removedInSecondLine = false; foreach (string line in lines) { int indexOfColon = line.IndexOf(':'); if (indexOfColon > 0 && IsNotInsideBrackets(text, indexOfColon)) { var pre = line.Substring(0, indexOfColon); var noTagPre = HtmlUtil.RemoveHtmlTags(pre, true); if (Settings.RemoveTextBeforeColonOnlyUppercase && noTagPre != noTagPre.ToUpper()) { newText = (newText + Environment.NewLine + line).Trim(); } else { var st = new StripableText(pre); if (count == 1 && newText.Length > 1 && removedInFirstLine && Utilities.CountTagInText(line, ':') == 1 && ".?!".IndexOf(newText[newText.Length - 1]) < 0 && newText.LineEndsWithHtmlTag(true) && line != line.ToUpper()) { if (pre.Contains("<i>") && line.Contains("</i>")) newText = newText + Environment.NewLine + "<i>" + line; else if (pre.Contains("<b>") && line.Contains("</b>")) newText = newText + Environment.NewLine + "<b>" + line; else if (pre.Contains("<u>") && line.Contains("</u>")) newText = newText + Environment.NewLine + "<u>" + line; else if (pre.Contains('[') && line.Contains(']')) newText = newText + Environment.NewLine + "[" + line; else if (pre.Contains('(') && line.EndsWith(')')) newText = newText + Environment.NewLine + "(" + line; else newText = newText + Environment.NewLine + line; } else if (count == 1 && newText.Length > 1 && indexOfColon > 15 && line.Substring(0, indexOfColon).Contains(' ') && Utilities.CountTagInText(line, ':') == 1 && ".?!".IndexOf(newText[newText.Length - 1]) < 0 && newText.LineEndsWithHtmlTag(true) && line != line.ToUpper()) { if (pre.Contains("<i>") && line.Contains("</i>")) newText = newText + Environment.NewLine + "<i>" + line; else if (pre.Contains("<b>") && line.Contains("</b>")) newText = newText + Environment.NewLine + "<b>" + line; else if (pre.Contains("<u>") && line.Contains("</u>")) newText = newText + Environment.NewLine + "<u>" + line; else if (pre.Contains('[') && line.Contains(']')) newText = newText + Environment.NewLine + "[" + line; else if (pre.Contains('(') && line.EndsWith(')')) newText = newText + Environment.NewLine + "(" + line; else newText = newText + Environment.NewLine + line; } else if (Utilities.CountTagInText(line, ':') == 1) { bool remove = true; if (indexOfColon > 0 && indexOfColon < line.Length - 1) { remove = !Utilities.IsBetweenNumbers(line, indexOfColon); } if (!DoRemove(pre)) remove = false; if (remove && Settings.ColonSeparateLine) { if (indexOfColon == line.Length - 1 || line.Substring(indexOfColon + 1).StartsWith(Environment.NewLine, StringComparison.Ordinal)) remove = true; else remove = false; } if (remove) { var content = line.Substring(indexOfColon + 1).Trim(); if (content.Length > 0) { if (pre.Contains("<i>") && content.Contains("</i>")) newText = newText + Environment.NewLine + "<i>" + content; else if (pre.Contains("<b>") && content.Contains("</b>")) newText = newText + Environment.NewLine + "<b>" + content; else if (pre.Contains('[') && content.Contains(']')) newText = newText + Environment.NewLine + "[" + content; else if (pre.Contains('(') && content.EndsWith(')')) newText = newText + Environment.NewLine + "(" + content; else newText = newText + Environment.NewLine + content; if (count == 0) removedInFirstLine = true; else if (count == 1) removedInSecondLine = true; } newText = newText.Trim(); if (text.StartsWith('(') && newText.EndsWith(')') && !newText.Contains('(')) newText = newText.TrimEnd(')'); else if (text.StartsWith('[') && newText.EndsWith(']') && !newText.Contains('[')) newText = newText.TrimEnd(']'); else if (newText.EndsWith("</i>", StringComparison.Ordinal) && text.StartsWith("<i>", StringComparison.Ordinal) && !newText.StartsWith("<i>", StringComparison.Ordinal)) newText = "<i>" + newText; else if (newText.EndsWith("</b>", StringComparison.Ordinal) && text.StartsWith("<b>", StringComparison.Ordinal) && !newText.StartsWith("<b>", StringComparison.Ordinal)) newText = "<b>" + newText; else if (newText.EndsWith("</u>", StringComparison.Ordinal) && text.StartsWith("<u>", StringComparison.Ordinal) && !newText.StartsWith("<u>", StringComparison.Ordinal)) newText = "<u>" + newText; if (!IsHIDescription(st.StrippedText)) noOfNames++; } else { newText = (newText + Environment.NewLine + line).Trim(); if (newText.EndsWith("</i>", StringComparison.Ordinal) && text.StartsWith("<i>", StringComparison.Ordinal) && !newText.StartsWith("<i>", StringComparison.Ordinal)) newText = "<i>" + newText; else if (newText.EndsWith("</b>", StringComparison.Ordinal) && text.StartsWith("<b>", StringComparison.Ordinal) && !newText.StartsWith("<b>", StringComparison.Ordinal)) newText = "<b>" + newText; else if ((newText.EndsWith("</u>", StringComparison.Ordinal) && text.StartsWith("<u>", StringComparison.Ordinal) && !newText.StartsWith("<u>", StringComparison.Ordinal))) newText = "<u>" + newText; } } else { string s2 = line; for (int k = 0; k < 2; k++) { if (s2.Contains(':')) { int colonIndex = s2.IndexOf(':'); string start = s2.Substring(0, colonIndex); if (!Settings.RemoveTextBeforeColonOnlyUppercase || start == start.ToUpper()) { int endIndex = start.LastIndexOfAny(new[] { '.', '!', '?' }); if (colonIndex > 0 && colonIndex < s2.Length - 1) { if (char.IsDigit(s2[colonIndex - 1]) && char.IsDigit(s2[colonIndex + 1])) endIndex = 0; } if (endIndex < 0) s2 = s2.Remove(0, colonIndex - endIndex); else if (endIndex > 0) s2 = s2.Remove(endIndex + 1, colonIndex - endIndex); } if (count == 0) removedInFirstLine = true; else if (count == 1) removedInSecondLine = true; } } newText = newText + Environment.NewLine + s2; newText = newText.Trim(); } } } else { newText = (newText + Environment.NewLine + line).Trim(); if (newText.EndsWith("</i>", StringComparison.Ordinal) && text.StartsWith("<i>", StringComparison.Ordinal) && !newText.StartsWith("<i>", StringComparison.Ordinal)) newText = "<i>" + newText; else if (newText.EndsWith("</b>", StringComparison.Ordinal) && text.StartsWith("<b>", StringComparison.Ordinal) && !newText.StartsWith("<b>", StringComparison.Ordinal)) newText = "<b>" + newText; } count++; } newText = newText.Trim(); if (noOfNames > 0 && Utilities.GetNumberOfLines(newText) == 2) { int indexOfDialogChar = newText.IndexOf('-'); bool insertDash = true; var arr = newText.SplitToLines(); if (arr.Length == 2 && arr[0].Length > 1 && arr[1].Length > 1) { string arr0 = new StripableText(arr[0]).StrippedText; string arr1 = new StripableText(arr[1]).StrippedText; //line continuation? if (arr0.Length > 0 && arr1.Length > 1 && (Utilities.LowercaseLetters + ",").Contains(arr0.Substring(arr0.Length - 1), StringComparison.Ordinal) && Utilities.LowercaseLetters.Contains(arr1[0])) { if (new StripableText(arr[1]).Pre.Contains("...") == false) insertDash = false; } string tempArr0QuoteTrimmed = arr[0].TrimEnd('"'); if (arr0.Length > 0 && arr1.Length > 1 && !(tempArr0QuoteTrimmed.EndsWith('.') || tempArr0QuoteTrimmed.EndsWith('!') || tempArr0QuoteTrimmed.EndsWith('?') || tempArr0QuoteTrimmed.EndsWith("</i>", StringComparison.Ordinal)) && !(new StripableText(arr[1]).Pre.Contains('-'))) { insertDash = false; } if (removedInFirstLine && !removedInSecondLine && !text.StartsWith('-') && !text.StartsWith("<i>-", StringComparison.Ordinal)) { if (!insertDash || (!arr[1].StartsWith('-') && !arr[1].StartsWith("<i>-", StringComparison.Ordinal))) insertDash = false; } } if (insertDash) { if (indexOfDialogChar < 0 || indexOfDialogChar > 4) { var st = new StripableText(newText, string.Empty, string.Empty); newText = st.Pre + "- " + st.StrippedText + st.Post; } int indexOfNewLine = newText.IndexOf(Environment.NewLine, StringComparison.Ordinal); string second = newText.Substring(indexOfNewLine).Trim(); indexOfDialogChar = second.IndexOf('-'); if (indexOfDialogChar < 0 || indexOfDialogChar > 6) { var st = new StripableText(second, String.Empty, String.Empty); second = st.Pre + "- " + st.StrippedText + st.Post; newText = newText.Remove(indexOfNewLine) + Environment.NewLine + second; } } } else if (!newText.Contains(Environment.NewLine) && newText.Contains('-')) { var st = new StripableText(newText); if (st.Pre.Contains('-')) newText = st.Pre.Replace("-", string.Empty) + st.StrippedText + st.Post; } else if (Utilities.GetNumberOfLines(newText) == 2 && removedInFirstLine == false && removedInSecondLine) { string noTags = HtmlUtil.RemoveHtmlTags(newText, true).Trim(); bool insertDash = noTags.StartsWith('-') && Utilities.CountTagInText(noTags, '-') == 1; if (insertDash) { if (newText.Contains(Environment.NewLine + "<i>")) newText = newText.Replace(Environment.NewLine + "<i>", Environment.NewLine + "<i>- "); else newText = newText.Replace(Environment.NewLine, Environment.NewLine + "- "); } } if (text.Contains("<i>", StringComparison.Ordinal) && !newText.Contains("<i>", StringComparison.Ordinal) && newText.EndsWith("</i>", StringComparison.Ordinal)) newText = "<i>" + newText; if (string.IsNullOrWhiteSpace(newText)) return string.Empty; return preAssTag + newText; }
public string FixOcrErrorsViaHardcodedRules(string input, string lastLine, HashSet<string> abbreviationList) { if (!Configuration.Settings.Tools.OcrFixUseHardcodedRules) return input; input = input.Replace(",...", "..."); if (input.StartsWith("..") && !input.StartsWith("...", StringComparison.Ordinal)) input = "." + input; string pre = string.Empty; if (input.StartsWith("- ", StringComparison.Ordinal)) { pre = "- "; input = input.Remove(0, 2); } else if (input.StartsWith('-')) { pre = "-"; input = input.Remove(0, 1); } bool hasDotDot = input.Contains("..") || input.Contains(". ."); if (hasDotDot) { if (input.Length > 5 && input.StartsWith("..") && Utilities.AllLettersAndNumbers.Contains(input[2])) input = "..." + input.Remove(0, 2); if (input.Length > 7 && input.StartsWith("<i>..") && Utilities.AllLettersAndNumbers.Contains(input[5])) input = "<i>..." + input.Remove(0, 5); if (input.Length > 5 && input.StartsWith(".. ") && Utilities.AllLettersAndNumbers.Contains(input[3])) input = "..." + input.Remove(0, 3); if (input.Length > 7 && input.StartsWith("<i>.. ") && Utilities.AllLettersAndNumbers.Contains(input[6])) input = "<i>..." + input.Remove(0, 6); if (input.Contains(Environment.NewLine + ".. ")) input = input.Replace(Environment.NewLine + ".. ", Environment.NewLine + "..."); if (input.Contains(Environment.NewLine + "<i>.. ")) input = input.Replace(Environment.NewLine + "<i>.. ", Environment.NewLine + "<i>..."); if (input.StartsWith(". ..", StringComparison.Ordinal)) input = "..." + input.Remove(0, 4); if (input.StartsWith(".. .", StringComparison.Ordinal)) input = "..." + input.Remove(0, 4); if (input.StartsWith(". . .")) input = "..." + input.Remove(0, 5); if (input.StartsWith("... ", StringComparison.Ordinal)) input = input.Remove(3, 1); } input = pre + input; if (hasDotDot) { if (input.StartsWith("<i>. ..", StringComparison.Ordinal)) input = "<i>..." + input.Remove(0, 7); if (input.StartsWith("<i>.. .", StringComparison.Ordinal)) input = "<i>..." + input.Remove(0, 7); if (input.StartsWith("<i>. . .", StringComparison.Ordinal)) input = "<i>..." + input.Remove(0, 8); if (input.StartsWith("<i>... ", StringComparison.Ordinal)) input = input.Remove(6, 1); if (input.StartsWith(". . <i>.", StringComparison.Ordinal)) input = "<i>..." + input.Remove(0, 8); if (input.StartsWith("...<i>", StringComparison.Ordinal) && (input.IndexOf("</i>", StringComparison.Ordinal) > input.IndexOf(' '))) input = "<i>..." + input.Remove(0, 6); if (input.EndsWith(". ..", StringComparison.Ordinal)) input = input.Remove(input.Length - 4, 4) + "..."; if (input.EndsWith(".. .", StringComparison.Ordinal)) input = input.Remove(input.Length - 4, 4) + "..."; if (input.EndsWith(". . .", StringComparison.Ordinal)) input = input.Remove(input.Length - 5, 5) + "..."; if (input.EndsWith(". ...")) input = input.Remove(input.Length - 5, 5) + "..."; if (input.EndsWith(". ..</i>", StringComparison.Ordinal)) input = input.Remove(input.Length - 8, 8) + "...</i>"; if (input.EndsWith(".. .</i>", StringComparison.Ordinal)) input = input.Remove(input.Length - 8, 8) + "...</i>"; if (input.EndsWith(". . .</i>", StringComparison.Ordinal)) input = input.Remove(input.Length - 9, 9) + "...</i>"; if (input.EndsWith(". ...</i>", StringComparison.Ordinal)) input = input.Remove(input.Length - 9, 9) + "...</i>"; if (input.EndsWith(".</i> . .", StringComparison.Ordinal)) input = input.Remove(input.Length - 9, 9) + "...</i>"; if (input.EndsWith(".</i>..", StringComparison.Ordinal)) input = input.Remove(input.Length - 7, 7) + "...</i>"; input = input.Replace(".</i> . ." + Environment.NewLine, "...</i>" + Environment.NewLine); input = input.Replace(".. ?", "..?"); input = input.Replace("..?", "...?"); input = input.Replace("....?", "...?"); input = input.Replace(".. !", "..!"); input = input.Replace("..!", "...!"); input = input.Replace("....!", "...!"); input = input.Replace("... ?", "...?"); input = input.Replace("... !", "...!"); input = input.Replace("....", "..."); input = input.Replace("....", "..."); if (input.StartsWith("- ...") && lastLine != null && lastLine.EndsWith("...") && !(input.Contains(Environment.NewLine + "-"))) input = input.Remove(0, 2); if (input.StartsWith("-...") && lastLine != null && lastLine.EndsWith("...") && !(input.Contains(Environment.NewLine + "-"))) input = input.Remove(0, 1); } if (input.Length > 2 && input[0] == '-' && Utilities.UppercaseLetters.Contains(input[1])) { input = input.Insert(1, " "); } if (input.Length > 5 && input.StartsWith("<i>-", StringComparison.Ordinal) && Utilities.UppercaseLetters.Contains(input[4])) { input = input.Insert(4, " "); } int idx = input.IndexOf(Environment.NewLine + "-", StringComparison.Ordinal); if (idx > 0 && idx + Environment.NewLine.Length + 1 < input.Length && Utilities.UppercaseLetters.Contains(input[idx + Environment.NewLine.Length + 1])) { input = input.Insert(idx + Environment.NewLine.Length + 1, " "); } idx = input.IndexOf(Environment.NewLine + "<i>-", StringComparison.Ordinal); if (idx > 0 && Utilities.UppercaseLetters.Contains(input[idx + Environment.NewLine.Length + 4])) { input = input.Insert(idx + Environment.NewLine.Length + 4, " "); } if (string.IsNullOrEmpty(lastLine) || lastLine.EndsWith('.') || lastLine.EndsWith('!') || lastLine.EndsWith('?') || lastLine.EndsWith(']') || lastLine.EndsWith('♪')) { lastLine = HtmlUtil.RemoveHtmlTags(lastLine); var st = new StripableText(input); if (lastLine == null || (!lastLine.EndsWith("...") && !EndsWithAbbreviation(lastLine, abbreviationList))) { if (st.StrippedText.Length > 0 && !char.IsUpper(st.StrippedText[0]) && !st.Pre.EndsWith('[') && !st.Pre.EndsWith('(') && !st.Pre.EndsWith("...")) { if (!HtmlUtil.StartsWithUrl(st.StrippedText)) { var uppercaseLetter = char.ToUpper(st.StrippedText[0]); if (st.StrippedText.Length > 1 && uppercaseLetter == 'L' && @"abcdfghjklmnpqrstvwxz".Contains(st.StrippedText[1])) uppercaseLetter = 'I'; if ((st.StrippedText.StartsWith("lo ") || st.StrippedText == "lo.") && _threeLetterIsoLanguageName == "ita") uppercaseLetter = 'I'; if ((st.StrippedText.StartsWith("k ", StringComparison.Ordinal) || st.StrippedText.StartsWith("m ", StringComparison.Ordinal) || st.StrippedText.StartsWith("n ") || st.StrippedText.StartsWith("r ") || st.StrippedText.StartsWith("s ") || st.StrippedText.StartsWith("t ")) && st.Pre.EndsWith('\'') && _threeLetterIsoLanguageName == "nld") uppercaseLetter = st.StrippedText[0]; if ((st.StrippedText.StartsWith("l-I'll ", StringComparison.Ordinal) || st.StrippedText.StartsWith("l-l'll ", StringComparison.Ordinal)) && _threeLetterIsoLanguageName == "eng") { uppercaseLetter = 'I'; st.StrippedText = "I-I" + st.StrippedText.Remove(0, 3); } st.StrippedText = uppercaseLetter + st.StrippedText.Substring(1); input = st.Pre + st.StrippedText + st.Post; } } } } // lines ending with ". should often end at ... (of no other quotes exists near by) if ((lastLine == null || !lastLine.Contains('"')) && input.EndsWith("\".") && input.IndexOf('"') == input.LastIndexOf('"') && input.Length > 3) { var lastChar = input[input.Length - 3]; if (!char.IsDigit(lastChar)) { int position = input.Length - 2; input = input.Remove(position).Insert(position, "..."); } } // change '<number><space>1' to '<number>1' if (input.Contains('1')) { Match match = RegExNumber1.Match(input); while (match.Success) { bool doFix = true; if (match.Index + 4 < input.Length && input[match.Index + 3] == '/' && char.IsDigit(input[match.Index + 4])) doFix = false; if (doFix) { input = input.Substring(0, match.Index + 1) + input.Substring(match.Index + 2); match = RegExNumber1.Match(input); } else { match = RegExNumber1.Match(input, match.Index + 1); } } } // change '' to " input = input.Replace("''", "\""); // change 'sequeI of' to 'sequel of' if (input.Contains('I')) { var match = RegExUppercaseI.Match(input); while (match.Success) { bool doFix = true; if (match.Index >= 1 && input.Substring(match.Index - 1).StartsWith("Mc")) doFix = false; if (match.Index >= 2 && input.Substring(match.Index - 2).StartsWith("Mac")) doFix = false; if (doFix) input = input.Substring(0, match.Index + 1) + "l" + input.Substring(match.Index + 2); if (match.Index + 1 < input.Length) match = RegExUppercaseI.Match(input, match.Index + 1); else break; // end while } } // change 'NlCE' to 'NICE' if (input.Contains('l')) { var match = RegExLowercaseL.Match(input); while (match.Success) { input = input.Substring(0, match.Index + 1) + "I" + input.Substring(match.Index + 2); match = RegExLowercaseL.Match(input); } } return input; }
public string RemoveTextFromHearImpaired(string text) { if (Settings.RemoveWhereContains) { foreach (var removeIfTextContain in Settings.RemoveIfTextContains) { if (text.Contains(removeIfTextContain)) return string.Empty; } } string oldText = text; text = RemoveColon(text); string pre = " >-\"'‘`´♪¿¡.…—"; string post = " -\"'`´♪.!?:…—"; if (Settings.RemoveTextBetweenCustomTags) { pre = pre.Replace(Settings.CustomStart, string.Empty); post = post.Replace(Settings.CustomEnd, string.Empty); } var st = new StripableText(text, pre, post); var sb = new StringBuilder(); var parts = st.StrippedText.Trim().SplitToLines(); int lineNumber = 0; bool removedDialogInFirstLine = false; int noOfNamesRemoved = 0; int noOfNamesRemovedNotInLineOne = 0; foreach (string s in parts) { var stSub = new StripableText(s, pre, post); string tempStrippedtext = stSub.StrippedText; if (lineNumber == parts.Length - 1 && st.Post.Contains('?')) tempStrippedtext += "?"; else if (stSub.Post.Contains('?')) tempStrippedtext += "?"; if (!StartAndEndsWithHearImpariedTags(tempStrippedtext)) { if (removedDialogInFirstLine && stSub.Pre.Contains("- ", StringComparison.Ordinal)) stSub.Pre = stSub.Pre.Replace("- ", string.Empty); string newText = stSub.StrippedText; newText = RemoveHearImpairedTags(newText); if (stSub.StrippedText.Length - newText.Length > 2) { string removedText = GetRemovedString(stSub.StrippedText, newText); if (!IsHIDescription(removedText)) { noOfNamesRemoved++; if (lineNumber > 0) noOfNamesRemovedNotInLineOne++; } } sb.AppendLine(stSub.Pre + newText + stSub.Post); } else { if (!IsHIDescription(stSub.StrippedText)) { noOfNamesRemoved++; if (lineNumber > 0) noOfNamesRemovedNotInLineOne++; } if (st.Pre.Contains("- ") && lineNumber == 0) { st.Pre = st.Pre.Replace("- ", string.Empty); removedDialogInFirstLine = true; } else if (st.Pre == "-" && lineNumber == 0) { st.Pre = string.Empty; removedDialogInFirstLine = true; } if (st.Pre.Contains("<i>") && stSub.Post.Contains("</i>")) st.Pre = st.Pre.Replace("<i>", string.Empty); if (s.Contains("<i>") && !s.Contains("</i>") && st.Post.Contains("</i>")) st.Post = st.Post.Replace("</i>", string.Empty); } lineNumber++; } text = st.Pre + sb.ToString().Trim() + st.Post; text = text.Replace(" ", " ").Trim(); text = text.Replace("<i></i>", string.Empty); text = text.Replace("<i> </i>", " "); text = text.Replace("<b></b>", string.Empty); text = text.Replace("<b> </b>", " "); text = text.Replace("<u></u>", string.Empty); text = text.Replace("<u> </u>", " "); text = RemoveEmptyFontTag(text); text = text.Replace(" ", " ").Trim(); text = RemoveColon(text); text = RemoveLineIfAllUppercase(text); text = RemoveHearImpairedtagsInsideLine(text); if (Settings.RemoveInterjections) text = RemoveInterjections(text); st = new StripableText(text, " >-\"'‘`´♪¿¡.…—", " -\"'`´♪.!?:…—"); text = st.StrippedText; if (StartAndEndsWithHearImpariedTags(text)) { text = RemoveStartEndTags(text); } text = RemoveHearImpairedTags(text); // fix 3 lines to two liners - if only two lines if (noOfNamesRemoved >= 1 && Utilities.GetNumberOfLines(text) == 3) { char[] chars = { '!', '?', '.' }; string[] a = HtmlUtil.RemoveHtmlTags(text).Replace(" ", string.Empty).Split(chars, StringSplitOptions.RemoveEmptyEntries); if (a.Length == 2) { var temp = new StripableText(text); temp.StrippedText = temp.StrippedText.Replace(Environment.NewLine, " "); int splitIndex = temp.StrippedText.LastIndexOfAny(chars); if (splitIndex > 0) { text = temp.Pre + temp.StrippedText.Insert(splitIndex + 1, Environment.NewLine) + temp.Post; } } } if (!text.StartsWith('-') && noOfNamesRemoved >= 1 && Utilities.GetNumberOfLines(text) == 2) { string[] arr = text.SplitToLines(); string part0 = arr[0].Trim().Replace("</i>", string.Empty).Trim(); if (!part0.EndsWith(',') && (!part0.EndsWith('-') || noOfNamesRemovedNotInLineOne > 0)) { if (part0.Length > 0 && @".!?".Contains(part0[part0.Length - 1])) { if (noOfNamesRemovedNotInLineOne > 0) { if (!st.Pre.Contains('-')) text = "- " + text.Replace(Environment.NewLine, Environment.NewLine + "- "); if (!text.Contains(Environment.NewLine + "-") && !text.Contains(Environment.NewLine + "<i>-")) text = text.Replace(Environment.NewLine, Environment.NewLine + "- "); } } } } if (!string.IsNullOrEmpty(text) || (st.Pre.Contains('♪') || st.Post.Contains('♪'))) text = st.Pre + text + st.Post; if (oldText.TrimStart().StartsWith("- ", StringComparison.Ordinal) && text != null && !text.Contains(Environment.NewLine) && (oldText.Contains(Environment.NewLine + "- ", StringComparison.Ordinal) || oldText.Contains(Environment.NewLine + " - ", StringComparison.Ordinal) || oldText.Contains(Environment.NewLine + "<i>- ", StringComparison.Ordinal) || oldText.Contains(Environment.NewLine + "<i> - ", StringComparison.Ordinal))) { text = text.TrimStart().TrimStart('-').TrimStart(); } if (oldText.TrimStart().StartsWith("-", StringComparison.Ordinal) && !oldText.TrimStart().StartsWith("--", StringComparison.Ordinal) && text != null && !text.Contains(Environment.NewLine) && (oldText.Contains(Environment.NewLine + "-", StringComparison.Ordinal) && !oldText.Contains(Environment.NewLine + "--", StringComparison.Ordinal) || oldText.Contains(Environment.NewLine + " - ", StringComparison.Ordinal) || oldText.Contains(Environment.NewLine + "<i>- ", StringComparison.Ordinal) || oldText.Contains(Environment.NewLine + "<i> - ", StringComparison.Ordinal))) { text = text.TrimStart().TrimStart('-').TrimStart(); } if (oldText.TrimStart().StartsWith("<i>- ", StringComparison.Ordinal) && text != null && text.StartsWith("<i>- ", StringComparison.Ordinal) && !text.Contains(Environment.NewLine, StringComparison.Ordinal) && (oldText.Contains(Environment.NewLine + "- ", StringComparison.Ordinal) || oldText.Contains(Environment.NewLine + " - ", StringComparison.Ordinal) || oldText.Contains(Environment.NewLine + "<i>- ", StringComparison.Ordinal) || oldText.Contains(Environment.NewLine + "<i> - ", StringComparison.Ordinal))) { text = text.Remove(3, 2); } if (text != null && !text.Contains(Environment.NewLine, StringComparison.Ordinal) && (oldText.Contains(':') && !text.Contains(':') || oldText.Contains('[') && !text.Contains('[') || oldText.Contains('(') && !text.Contains('(') || oldText.Contains('{') && !text.Contains('{')) && (oldText.Contains(Environment.NewLine + "- ", StringComparison.Ordinal) || oldText.Contains(Environment.NewLine + " - ", StringComparison.Ordinal) || oldText.Contains(Environment.NewLine + "<i>- ", StringComparison.Ordinal) || oldText.Contains(Environment.NewLine + "<i> - ", StringComparison.Ordinal))) { text = text.TrimStart().TrimStart('-').TrimStart(); } if (oldText != text) { // insert spaces before "-" text = text.Replace(Environment.NewLine + "- <i>", Environment.NewLine + "<i>- "); text = text.Replace(Environment.NewLine + "-<i>", Environment.NewLine + "<i>- "); if (text.StartsWith('-') && text.Length > 2 && text[1] != ' ' && text[1] != '-') text = text.Insert(1, " "); if (text.StartsWith("<i>-", StringComparison.Ordinal) && text.Length > 5 && text[4] != ' ' && text[4] != '-') text = text.Insert(4, " "); if (text.Contains(Environment.NewLine + "-", StringComparison.Ordinal)) { int index = text.IndexOf(Environment.NewLine + "-", StringComparison.Ordinal); if (index + 4 < text.Length && text[index + Environment.NewLine.Length + 1] != ' ' && text[index + Environment.NewLine.Length + 1] != '-') text = text.Insert(index + Environment.NewLine.Length + 1, " "); } if (text.Contains(Environment.NewLine + "<i>-", StringComparison.Ordinal)) { int index = text.IndexOf(Environment.NewLine + "<i>-", StringComparison.Ordinal); if (index + 5 < text.Length && text[index + Environment.NewLine.Length + 4] != ' ' && text[index + Environment.NewLine.Length + 4] != '-') text = text.Insert(index + Environment.NewLine.Length + 4, " "); } } return text.Trim(); }
public static string FixHyphensRemove(Subtitle subtitle, int i) { Paragraph p = subtitle.Paragraphs[i]; string text = p.Text; if (text.TrimStart().StartsWith('-') || text.TrimStart().StartsWith("<i>-", StringComparison.OrdinalIgnoreCase) || text.TrimStart().StartsWith("<i> -", StringComparison.OrdinalIgnoreCase) || text.Contains(Environment.NewLine + '-') || text.Contains(Environment.NewLine + " -") || text.Contains(Environment.NewLine + "<i>-") || text.Contains(Environment.NewLine + "<i> -") || text.Contains(Environment.NewLine + "<I>-") || text.Contains(Environment.NewLine + "<I> -")) { Paragraph prev = subtitle.GetParagraphOrDefault(i - 1); if (prev == null || !HtmlUtil.RemoveHtmlTags(prev.Text).TrimEnd().EndsWith('-') || HtmlUtil.RemoveHtmlTags(prev.Text).TrimEnd().EndsWith("--", StringComparison.Ordinal)) { string[] noTaglines = HtmlUtil.RemoveHtmlTags(p.Text).SplitToLines(); int startHyphenCount = noTaglines.Count(line => line.TrimStart().StartsWith('-')); if (startHyphenCount == 1) { bool remove = true; string[] noTagparts = HtmlUtil.RemoveHtmlTags(text).SplitToLines(); if (noTagparts.Length == 2) { if (noTagparts[0].TrimStart().StartsWith('-') && noTagparts[1].Contains(": ")) { remove = false; } if (noTagparts[1].TrimStart().StartsWith('-') && noTagparts[0].Contains(": ")) { remove = false; } } if (remove) { int idx = text.IndexOf('-'); StripableText st = new StripableText(text); if (idx < 5 && st.Pre.Length >= idx) { text = text.Remove(idx, 1).TrimStart(); idx = text.IndexOf('-'); st = new StripableText(text); if (idx < 5 && idx >= 0 && st.Pre.Length >= idx) { text = text.Remove(idx, 1).TrimStart(); st = new StripableText(text); } idx = text.IndexOf('-'); if (idx < 5 && idx >= 0 && st.Pre.Length >= idx) { text = text.Remove(idx, 1).TrimStart(); } text = RemoveSpacesBeginLine(text); } else { int indexOfNewLine = text.IndexOf(Environment.NewLine, StringComparison.Ordinal); if (indexOfNewLine > 0) { idx = text.IndexOf('-', indexOfNewLine); if (idx >= 0 && indexOfNewLine + 5 > indexOfNewLine) { text = text.Remove(idx, 1).TrimStart().Replace(Environment.NewLine + " ", Environment.NewLine); idx = text.IndexOf('-', indexOfNewLine); if (idx >= 0 && indexOfNewLine + 5 > indexOfNewLine) { text = text.Remove(idx, 1).TrimStart(); text = RemoveSpacesBeginLine(text); } } } } } } } } else if (text.StartsWith("<font ", StringComparison.Ordinal)) { Paragraph prev = subtitle.GetParagraphOrDefault(i - 1); if (prev == null || !HtmlUtil.RemoveHtmlTags(prev.Text).TrimEnd().EndsWith('-') || HtmlUtil.RemoveHtmlTags(prev.Text).TrimEnd().EndsWith("--", StringComparison.Ordinal)) { StripableText st = new StripableText(text); if (st.Pre.EndsWith('-') || st.Pre.EndsWith("- ", StringComparison.Ordinal)) { text = st.Pre.TrimEnd('-', ' ') + st.StrippedText + st.Post; } } } return text; }
private void MergeLinesWithContinuation() { var temp = new Subtitle(); bool skipNext = false; for (int i = 0; i < _subtitle.Paragraphs.Count; i++) { Paragraph p = _subtitle.Paragraphs[i]; if (!skipNext) { Paragraph next = _subtitle.GetParagraphOrDefault(i + 1); bool merge = !(p.Text.Contains(Environment.NewLine) || next == null); if (merge && (p.Text.TrimEnd().EndsWith('!') || p.Text.TrimEnd().EndsWith('.'))) { var st = new StripableText(p.Text); if (st.StrippedText.Length > 0 && Utilities.UppercaseLetters.Contains(st.StrippedText[0].ToString(CultureInfo.InvariantCulture))) merge = false; } if (merge && (p.Text.Length >= Configuration.Settings.General.SubtitleLineMaximumLength - 5 || next.Text.Length >= Configuration.Settings.General.SubtitleLineMaximumLength - 5)) merge = false; if (merge) { temp.Paragraphs.Add(new Paragraph { Text = p.Text + Environment.NewLine + next.Text }); skipNext = true; } else { temp.Paragraphs.Add(new Paragraph(p)); } } else { skipNext = false; } } _subtitle = temp; }