private static string ToUpperFirstLetter(string text, IFixCallbacks callbacks) { if (string.IsNullOrEmpty(text) || !char.IsLetter(text[0]) || char.IsUpper(text[0])) { return(text); } // Skip words like iPhone, iPad... if (text[0] == 'i' && text.Length > 1 && char.IsUpper(text[1])) { return(text); } if (Helper.IsTurkishLittleI(text[0], callbacks.Encoding, callbacks.Language)) { text = Helper.GetTurkishUppercaseLetter(text[0], callbacks.Encoding) + text.Substring(1); } else { text = char.ToUpper(text[0]) + text.Substring(1); // text.CapitalizeFirstLetter(); } return(text); }
private static string ToUpperFirstLetter(string textBefore, string text, IFixCallbacks callbacks) { if (string.IsNullOrEmpty(text) || !char.IsLetter(text[0]) || char.IsUpper(text[0])) { return(text); } if (textBefore != null && textBefore.EndsWith("...", System.StringComparison.Ordinal)) { if (callbacks.Language == "en" && text.StartsWith("i ")) { } else { return(text); // too hard to say if uppercase after "..." } } if (textBefore != null && textBefore.EndsWith(" - ", System.StringComparison.Ordinal) && !textBefore.EndsWith(". - ", System.StringComparison.Ordinal)) { return(text); } // Skip words like iPhone, iPad... if (text[0] == 'i' && text.Length > 1 && char.IsUpper(text[1])) { return(text); } if (Helper.IsTurkishLittleI(text[0], callbacks.Encoding, callbacks.Language)) { return(Helper.GetTurkishUppercaseLetter(text[0], callbacks.Encoding) + text.Substring(1)); } text = char.ToUpper(text[0]) + text.Substring(1); return(text); }
private static string DoFix(Paragraph p, Paragraph prev, Encoding encoding, string language) { if (p.Text != null && p.Text.Length > 1) { string text = p.Text; string pre = string.Empty; if (text.Length > 4 && text.StartsWith("<i> ", StringComparison.Ordinal)) { pre = "<i> "; text = text.Substring(4); } if (text.Length > 3 && text.StartsWith("<i>", StringComparison.Ordinal)) { pre = "<i>"; text = text.Substring(3); } if (text.Length > 4 && text.StartsWith("<I> ", StringComparison.Ordinal)) { pre = "<I> "; text = text.Substring(4); } if (text.Length > 3 && text.StartsWith("<I>", StringComparison.Ordinal)) { pre = "<I>"; text = text.Substring(3); } if (text.Length > 2 && text.StartsWith('♪')) { pre = pre + "♪"; text = text.Substring(1); } if (text.Length > 2 && text.StartsWith(' ')) { pre = pre + " "; text = text.Substring(1); } if (text.Length > 2 && text.StartsWith('♫')) { pre = pre + "♫"; text = text.Substring(1); } if (text.Length > 2 && text.StartsWith(' ')) { pre = pre + " "; text = text.Substring(1); } var firstLetter = text[0]; string prevText = " ."; if (prev != null) { prevText = HtmlUtil.RemoveHtmlTags(prev.Text); } bool isPrevEndOfLine = Helper.IsPreviousTextEndOfParagraph(prevText); if (prevText == " .") { isPrevEndOfLine = true; } if ((!text.StartsWith("www.", StringComparison.Ordinal) && !text.StartsWith("http:", StringComparison.Ordinal) && !text.StartsWith("https:", StringComparison.Ordinal)) && (char.IsLower(firstLetter) || Helper.IsTurkishLittleI(firstLetter, encoding, language)) && !char.IsDigit(firstLetter) && isPrevEndOfLine) { bool isMatchInKnowAbbreviations = language == "en" && (prevText.EndsWith(" o.r.", StringComparison.Ordinal) || prevText.EndsWith(" a.m.", StringComparison.Ordinal) || prevText.EndsWith(" p.m.", StringComparison.Ordinal)); if (!isMatchInKnowAbbreviations) { if (Helper.IsTurkishLittleI(firstLetter, encoding, language)) { p.Text = pre + Helper.GetTurkishUppercaseLetter(firstLetter, encoding) + text.Substring(1); } else if (language == "en" && (text.StartsWith("l ", StringComparison.Ordinal) || text.StartsWith("l-I", StringComparison.Ordinal) || text.StartsWith("ls ", StringComparison.Ordinal) || text.StartsWith("lnterested") || text.StartsWith("lsn't ", StringComparison.Ordinal) || text.StartsWith("ldiot", StringComparison.Ordinal) || text.StartsWith("ln", StringComparison.Ordinal) || text.StartsWith("lm", StringComparison.Ordinal) || text.StartsWith("ls", StringComparison.Ordinal) || text.StartsWith("lt", StringComparison.Ordinal) || text.StartsWith("lf ", StringComparison.Ordinal) || text.StartsWith("lc", StringComparison.Ordinal) || text.StartsWith("l'm ", StringComparison.Ordinal)) || text.StartsWith("l am ", StringComparison.Ordinal)) // l > I { p.Text = pre + "I" + text.Substring(1); } else { p.Text = pre + char.ToUpper(firstLetter) + text.Substring(1); } } } } if (p.Text != null && p.Text.Contains(Environment.NewLine)) { var arr = p.Text.SplitToLines(); if (arr.Length == 2 && arr[1].Length > 1) { string text = arr[1]; string pre = string.Empty; if (text.Length > 4 && text.StartsWith("<i> ", StringComparison.Ordinal)) { pre = "<i> "; text = text.Substring(4); } if (text.Length > 3 && text.StartsWith("<i>", StringComparison.Ordinal)) { pre = "<i>"; text = text.Substring(3); } if (text.Length > 4 && text.StartsWith("<I> ", StringComparison.Ordinal)) { pre = "<I> "; text = text.Substring(4); } if (text.Length > 3 && text.StartsWith("<I>", StringComparison.Ordinal)) { pre = "<I>"; text = text.Substring(3); } if (text.Length > 2 && text.StartsWith('♪')) { pre = pre + "♪"; text = text.Substring(1); } if (text.Length > 2 && text.StartsWith(' ')) { pre = pre + " "; text = text.Substring(1); } if (text.Length > 2 && text.StartsWith('♫')) { pre = pre + "♫"; text = text.Substring(1); } if (text.Length > 2 && text.StartsWith(' ')) { pre = pre + " "; text = text.Substring(1); } char firstLetter = text[0]; string prevText = HtmlUtil.RemoveHtmlTags(arr[0]); bool isPrevEndOfLine = Helper.IsPreviousTextEndOfParagraph(prevText); if ((!text.StartsWith("www.", StringComparison.Ordinal) && !text.StartsWith("http:", StringComparison.Ordinal) && !text.StartsWith("https:", StringComparison.Ordinal)) && (char.IsLower(firstLetter) || Helper.IsTurkishLittleI(firstLetter, encoding, language)) && !prevText.EndsWith("...", StringComparison.Ordinal) && isPrevEndOfLine) { bool isMatchInKnowAbbreviations = language == "en" && (prevText.EndsWith(" o.r.", StringComparison.Ordinal) || prevText.EndsWith(" a.m.", StringComparison.Ordinal) || prevText.EndsWith(" p.m.", StringComparison.Ordinal)); if (!isMatchInKnowAbbreviations) { if (Helper.IsTurkishLittleI(firstLetter, encoding, language)) { text = pre + Helper.GetTurkishUppercaseLetter(firstLetter, encoding) + text.Substring(1); } else if (language == "en" && (text.StartsWith("l ", StringComparison.Ordinal) || text.StartsWith("l-I", StringComparison.Ordinal) || text.StartsWith("ls ") || text.StartsWith("lnterested") || text.StartsWith("lsn't ", StringComparison.Ordinal) || text.StartsWith("ldiot", StringComparison.Ordinal) || text.StartsWith("ln", StringComparison.Ordinal) || text.StartsWith("lm", StringComparison.Ordinal) || text.StartsWith("ls", StringComparison.Ordinal) || text.StartsWith("lt", StringComparison.Ordinal) || text.StartsWith("lf ", StringComparison.Ordinal) || text.StartsWith("lc", StringComparison.Ordinal) || text.StartsWith("l'm ", StringComparison.Ordinal)) || text.StartsWith("l am ", StringComparison.Ordinal)) // l > I { text = pre + "I" + text.Substring(1); } else { text = pre + char.ToUpper(firstLetter) + text.Substring(1); } p.Text = arr[0] + Environment.NewLine + text; } } arr = p.Text.SplitToLines(); if ((arr[0].StartsWith('-') || arr[0].StartsWith("<i>-", StringComparison.Ordinal)) && (arr[1].StartsWith('-') || arr[1].StartsWith("<i>-", StringComparison.Ordinal)) && !arr[0].StartsWith("--", StringComparison.Ordinal) && !arr[0].StartsWith("<i>--", StringComparison.Ordinal) && !arr[1].StartsWith("--", StringComparison.Ordinal) && !arr[1].StartsWith("<i>--", StringComparison.Ordinal)) { if (isPrevEndOfLine && arr[1].StartsWith("<i>- ", StringComparison.Ordinal) && arr[1].Length > 6) { p.Text = arr[0] + Environment.NewLine + "<i>- " + char.ToUpper(arr[1][5]) + arr[1].Remove(0, 6); } else if (isPrevEndOfLine && arr[1].StartsWith("- ", StringComparison.Ordinal) && arr[1].Length > 3) { p.Text = arr[0] + Environment.NewLine + "- " + char.ToUpper(arr[1][2]) + arr[1].Remove(0, 3); } arr = p.Text.SplitToLines(); prevText = " ."; if (prev != null && p.StartTime.TotalMilliseconds - 10000 < prev.EndTime.TotalMilliseconds) { prevText = HtmlUtil.RemoveHtmlTags(prev.Text); } bool isPrevLineEndOfLine = Helper.IsPreviousTextEndOfParagraph(prevText); if (isPrevLineEndOfLine && arr[0].StartsWith("<i>- ", StringComparison.Ordinal) && arr[0].Length > 6) { p.Text = "<i>- " + char.ToUpper(arr[0][5]) + arr[0].Remove(0, 6) + Environment.NewLine + arr[1]; } else if (isPrevLineEndOfLine && arr[0].StartsWith("- ", StringComparison.Ordinal) && arr[0].Length > 3) { p.Text = "- " + char.ToUpper(arr[0][2]) + arr[0].Remove(0, 3) + Environment.NewLine + arr[1]; } } } } if (p.Text != null && p.Text.Length > 4) { int len = 0; int indexOfNewLine = p.Text.IndexOf(Environment.NewLine + " -", 1, StringComparison.Ordinal); if (indexOfNewLine < 0) { indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "- <i> ♪", 1, StringComparison.Ordinal); len = "- <i> ♪".Length; } if (indexOfNewLine < 0) { indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "-", 1, StringComparison.Ordinal); len = "-".Length; } if (indexOfNewLine < 0) { indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "<i>-", 1, StringComparison.Ordinal); len = "<i>-".Length; } if (indexOfNewLine < 0) { indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "<i> -", 1, StringComparison.Ordinal); len = "<i> -".Length; } if (indexOfNewLine < 0) { indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "♪ -", 1, StringComparison.Ordinal); len = "♪ -".Length; } if (indexOfNewLine < 0) { indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "♪ <i> -", 1, StringComparison.Ordinal); len = "♪ <i> -".Length; } if (indexOfNewLine < 0) { indexOfNewLine = p.Text.IndexOf(Environment.NewLine + "♪ <i>-", 1, StringComparison.Ordinal); len = "♪ <i>-".Length; } if (indexOfNewLine > 0) { string text = p.Text.Substring(indexOfNewLine + len); var st = new StripableText(text); if (st.StrippedText.Length > 0 && Helper.IsTurkishLittleI(st.StrippedText[0], encoding, language) && !st.Pre.EndsWith('[') && !st.Pre.Contains("...")) { text = st.Pre + Helper.GetTurkishUppercaseLetter(st.StrippedText[0], encoding) + st.StrippedText.Substring(1) + st.Post; p.Text = p.Text.Remove(indexOfNewLine + len).Insert(indexOfNewLine + len, text); } else if (st.StrippedText.Length > 0 && st.StrippedText[0] != char.ToUpper(st.StrippedText[0]) && !st.Pre.EndsWith('[') && !st.Pre.Contains("...")) { text = st.Pre + char.ToUpper(st.StrippedText[0]) + st.StrippedText.Substring(1) + st.Post; p.Text = p.Text.Remove(indexOfNewLine + len).Insert(indexOfNewLine + len, text); } } } return(p.Text); }
public void Fix(Subtitle subtitle, IFixCallbacks callbacks) { var language = Configuration.Settings.Language.FixCommonErrors; string fixAction = language.StartWithUppercaseLetterAfterPeriodInsideParagraph; int noOfFixes = 0; for (int i = 0; i < subtitle.Paragraphs.Count; i++) { Paragraph p = subtitle.Paragraphs[i]; string oldText = p.Text; var st = new StripableText(p.Text); if (p.Text.Length > 3) { string text = st.StrippedText.Replace(" ", " "); int start = text.IndexOfAny(ExpectedChars); while (start >= 0 && start < text.Length) { if (start > 0 && char.IsDigit(text[start - 1])) { // ignore periods after a number } else if (start + 4 < text.Length && text[start + 1] == ' ') { if (!IsAbbreviation(text, start, callbacks)) { var subText = new StripableText(text.Substring(start + 2)); if (subText.StrippedText.Length > 0 && Helper.IsTurkishLittleI(subText.StrippedText[0], callbacks.Encoding, callbacks.Language)) { if (subText.StrippedText.Length > 1 && !(subText.Pre.Contains('\'') && subText.StrippedText.StartsWith('s'))) { text = text.Substring(0, start + 2) + subText.Pre + Helper.GetTurkishUppercaseLetter(subText.StrippedText[0], callbacks.Encoding) + subText.StrippedText.Substring(1) + subText.Post; if (callbacks.AllowFix(p, fixAction)) { p.Text = st.Pre + text + st.Post; } } } else if (subText.StrippedText.Length > 0 && Configuration.Settings.General.UppercaseLetters.Contains(char.ToUpper(subText.StrippedText[0]))) { if (subText.StrippedText.Length > 1 && !(subText.Pre.Contains('\'') && subText.StrippedText.StartsWith('s'))) { text = text.Substring(0, start + 2) + subText.Pre + char.ToUpper(subText.StrippedText[0]) + subText.StrippedText.Substring(1) + subText.Post; if (callbacks.AllowFix(p, fixAction)) { p.Text = st.Pre + text + st.Post; } } } } } start += 4; if (start < text.Length) { start = text.IndexOfAny(ExpectedChars, start); } } } if (oldText != p.Text) { noOfFixes++; callbacks.AddFixToListView(p, fixAction, oldText, p.Text); } } callbacks.UpdateFixStatus(noOfFixes, language.StartWithUppercaseLetterAfterPeriodInsideParagraph, noOfFixes.ToString(CultureInfo.InvariantCulture)); }
public void Fix(Subtitle subtitle, IFixCallbacks callbacks) { string fixAction = Language.StartWithUppercaseLetterAfterColon; int noOfFixes = 0; for (int i = 0; i < subtitle.Paragraphs.Count; i++) { var p = new Paragraph(subtitle.Paragraphs[i]); Paragraph last = subtitle.GetParagraphOrDefault(i - 1); string oldText = p.Text; int skipCount = 0; if (last != null) { string lastText = HtmlUtil.RemoveHtmlTags(last.Text); if (lastText.EndsWith(':') || lastText.EndsWith(';')) { var st = new StrippableText(p.Text); if (st.StrippedText.Length > 0 && st.StrippedText[0] != char.ToUpper(st.StrippedText[0])) { p.Text = st.Pre + char.ToUpper(st.StrippedText[0]) + st.StrippedText.Substring(1) + st.Post; } } } if (oldText.Contains(ExpectedChars)) { bool lastWasColon = false; for (int j = 0; j < p.Text.Length; j++) { var s = p.Text[j]; if (s == ':' || s == ';') { lastWasColon = true; } else if (lastWasColon) { // skip whitespace index if (j + 2 < p.Text.Length && p.Text[j] == ' ') { s = p.Text[++j]; } var startFromJ = p.Text.Substring(j); if (startFromJ.Length > 3 && startFromJ[0] == '<' && startFromJ[2] == '>' && (startFromJ[1] == 'i' || startFromJ[1] == 'b' || startFromJ[1] == 'u')) { skipCount = 2; } else if (startFromJ.StartsWith("<font ", StringComparison.OrdinalIgnoreCase) && p.Text.Substring(j).Contains('>')) { skipCount = (j + startFromJ.IndexOf('>', 6)) - j; } else if (Helper.IsTurkishLittleI(s, callbacks.Encoding, callbacks.Language)) { p.Text = p.Text.Remove(j, 1).Insert(j, Helper.GetTurkishUppercaseLetter(s, callbacks.Encoding).ToString(CultureInfo.InvariantCulture)); lastWasColon = false; } else if (char.IsLower(s)) { // iPhone bool change = true; if (s == 'i' && p.Text.Length > j + 1) { if (p.Text[j + 1] == char.ToUpper(p.Text[j + 1])) { change = false; } } if (change) { p.Text = p.Text.Remove(j, 1).Insert(j, char.ToUpper(s).ToString(CultureInfo.InvariantCulture)); } lastWasColon = false; } else if (!(" " + Environment.NewLine).Contains(s)) { lastWasColon = false; } // move the: 'j' pointer and reset skipCount to 0 if (skipCount > 0) { j += skipCount; skipCount = 0; } } } } if (oldText != p.Text && callbacks.AllowFix(p, fixAction)) { noOfFixes++; subtitle.Paragraphs[i].Text = p.Text; callbacks.AddFixToListView(subtitle.Paragraphs[i], fixAction, oldText, p.Text); } } callbacks.UpdateFixStatus(noOfFixes, Language.StartWithUppercaseLetterAfterColon); }