public void StrippableTextItalic() { var st = new StrippableText("<i>Hi!</i>"); Assert.AreEqual(st.Pre, "<i>"); Assert.AreEqual(st.Post, "!</i>"); Assert.AreEqual(st.StrippedText, "Hi"); }
public void StrippableTextFontDontTouch() { var st = new StrippableText("{MAN} Hi, how are you today!"); Assert.AreEqual(st.Pre, ""); Assert.AreEqual(st.Post, "!"); Assert.AreEqual(st.StrippedText, "{MAN} Hi, how are you today"); }
public void StrippableTextFont() { var st = new StrippableText("<font color=\"red\">Hi!</font>"); Assert.AreEqual(st.Pre, "<font color=\"red\">"); Assert.AreEqual(st.Post, "!</font>"); Assert.AreEqual(st.StrippedText, "Hi"); }
public void StrippableTextAss() { var st = new StrippableText("{\\an9}Hi!"); Assert.AreEqual(st.Pre, "{\\an9}"); Assert.AreEqual(st.Post, "!"); Assert.AreEqual(st.StrippedText, "Hi"); }
public void StrippableOnlyText() { var st = new StrippableText("H"); Assert.AreEqual(st.Pre, ""); Assert.AreEqual(st.Post, ""); Assert.AreEqual(st.StrippedText, "H"); }
public string UppercaseNouns(string text) { var textNoTags = HtmlUtil.RemoveHtmlTags(text, true); if (textNoTags != textNoTags.ToUpperInvariant() && !string.IsNullOrEmpty(text)) { var st = new StrippableText(text); st.FixCasing(_germanNouns, true, false, false, string.Empty); foreach (var regex in _regularExpressionList.Keys) { st.StrippedText = regex.Replace(st.StrippedText, _regularExpressionList[regex]); } return(st.MergedString); } return(text); }
private string Fix(string text, string lastLine, List <string> nameList, CultureInfo subtitleCulture, double millisecondsFromLast) { string original = text; if (FixNormal) { if (FixNormalOnlyAllUppercase && text != text.ToUpper(subtitleCulture)) { return(text); } if (text.Length > 1) { // first all to lower text = text.ToLower(subtitleCulture).Trim(); text = text.FixExtraSpaces(); var st = new StrippableText(text); st.FixCasing(nameList, false, true, true, lastLine, millisecondsFromLast); // fix all casing but names (that's a seperate option) text = st.MergedString; } } else if (FixMakeUppercase) { var st = new StrippableText(text); text = st.Pre + st.StrippedText.ToUpper(subtitleCulture) + st.Post; text = HtmlUtil.FixUpperTags(text); // tags inside text } else if (FixMakeLowercase) { text = text.ToLower(subtitleCulture); } if (original != text) { NoOfLinesChanged++; } return(text); }
private void MergeLinesWithContinuation() { var temp = new Subtitle(); bool skipNext = false; for (int i = 0; i < _subtitle.Paragraphs.Count; i++) { Paragraph p = _subtitle.Paragraphs[i]; if (!skipNext) { Paragraph next = _subtitle.GetParagraphOrDefault(i + 1); bool merge = !(p.Text.Contains(Environment.NewLine) || next == null) && Configuration.Settings.Tools.ListViewSyntaxMoreThanXLinesX > 1; if (merge && (p.Text.TrimEnd().EndsWith('!') || p.Text.TrimEnd().EndsWith('.'))) { var st = new StrippableText(p.Text); if (st.StrippedText.Length > 0 && char.IsUpper(st.StrippedText[0])) merge = false; } if (merge && (p.Text.Length >= Configuration.Settings.General.SubtitleLineMaximumLength - 5 || next.Text.Length >= Configuration.Settings.General.SubtitleLineMaximumLength - 5)) merge = false; if (merge) { temp.Paragraphs.Add(new Paragraph { Text = p.Text + Environment.NewLine + next.Text }); skipNext = true; } else { temp.Paragraphs.Add(new Paragraph(p)); } } else { skipNext = false; } } _subtitle = temp; }
private void GeneratePreview() { Cursor = Cursors.WaitCursor; listViewFixes.BeginUpdate(); listViewFixes.Items.Clear(); foreach (Paragraph p in _subtitle.Paragraphs) { string text = p.Text; foreach (ListViewItem item in listViewNames.Items) { string name = item.SubItems[1].Text; string textNoTags = HtmlUtil.RemoveHtmlTags(text); if (textNoTags != textNoTags.ToUpper()) { if (item.Checked && text != null && text.Contains(name, StringComparison.OrdinalIgnoreCase) && name.Length > 1 && name != name.ToLower()) { var st = new StrippableText(text); st.FixCasing(new List<string> { name }, true, false, false, string.Empty); text = st.MergedString; } } } if (text != p.Text) AddToPreviewListView(p, text); } listViewFixes.EndUpdate(); groupBoxLinesFound.Text = string.Format(Configuration.Settings.Language.ChangeCasingNames.LinesFoundX, listViewFixes.Items.Count); Cursor = Cursors.Default; }
private string FixCasing(string text, string lastLine, List<string> namesEtc) { string original = text; if (radioButtonNormal.Checked) { if (checkBoxOnlyAllUpper.Checked && text != text.ToUpper()) return text; if (text.Length > 1) { // first all to lower text = text.ToLower().Trim(); text = text.FixExtraSpaces(); var st = new StrippableText(text); st.FixCasing(namesEtc, false, true, true, lastLine); // fix all casing but names (that's a seperate option) text = st.MergedString; } } else if (radioButtonUppercase.Checked) { var st = new StrippableText(text); text = st.Pre + st.StrippedText.ToUpper() + st.Post; text = HtmlUtil.FixUpperTags(text); // tags inside text } else if (radioButtonLowercase.Checked) { text = text.ToLower(); } if (original != text) _noOfLinesChanged++; return text; }
public string FixOcrErrorsViaHardcodedRules(string input, string lastLine, HashSet<string> abbreviationList) { if (!Configuration.Settings.Tools.OcrFixUseHardcodedRules) return input; input = input.Replace(",...", "..."); if (input.StartsWith("..") && !input.StartsWith("...", StringComparison.Ordinal)) input = "." + input; string pre = string.Empty; if (input.StartsWith("- ", StringComparison.Ordinal)) { pre = "- "; input = input.Remove(0, 2); } else if (input.StartsWith('-')) { pre = "-"; input = input.Remove(0, 1); } bool hasDotDot = input.Contains("..") || input.Contains(". ."); if (hasDotDot) { if (input.Length > 5 && input.StartsWith("..", StringComparison.Ordinal) && Utilities.AllLettersAndNumbers.Contains(input[2])) input = "..." + input.Remove(0, 2); if (input.Length > 7 && input.StartsWith("<i>..", StringComparison.Ordinal) && Utilities.AllLettersAndNumbers.Contains(input[5])) input = "<i>..." + input.Remove(0, 5); if (input.Length > 5 && input.StartsWith(".. ") && Utilities.AllLettersAndNumbers.Contains(input[3])) input = "..." + input.Remove(0, 3); if (input.Length > 7 && input.StartsWith("<i>.. ", StringComparison.Ordinal) && Utilities.AllLettersAndNumbers.Contains(input[6])) input = "<i>..." + input.Remove(0, 6); if (input.Contains(Environment.NewLine + ".. ")) input = input.Replace(Environment.NewLine + ".. ", Environment.NewLine + "..."); if (input.Contains(Environment.NewLine + "<i>.. ")) input = input.Replace(Environment.NewLine + "<i>.. ", Environment.NewLine + "<i>..."); if (input.StartsWith(". ..", StringComparison.Ordinal)) input = "..." + input.Remove(0, 4); if (input.StartsWith(".. .", StringComparison.Ordinal)) input = "..." + input.Remove(0, 4); if (input.StartsWith(". . .")) input = "..." + input.Remove(0, 5); if (input.StartsWith("... ", StringComparison.Ordinal)) input = input.Remove(3, 1); } input = pre + input; if (hasDotDot) { if (input.StartsWith("<i>. ..", StringComparison.Ordinal)) input = "<i>..." + input.Remove(0, 7); if (input.StartsWith("<i>.. .", StringComparison.Ordinal)) input = "<i>..." + input.Remove(0, 7); if (input.StartsWith("<i>. . .", StringComparison.Ordinal)) input = "<i>..." + input.Remove(0, 8); if (input.StartsWith("<i>... ", StringComparison.Ordinal)) input = input.Remove(6, 1); if (input.StartsWith(". . <i>.", StringComparison.Ordinal)) input = "<i>..." + input.Remove(0, 8); if (input.StartsWith("...<i>", StringComparison.Ordinal) && (input.IndexOf("</i>", StringComparison.Ordinal) > input.IndexOf(' '))) input = "<i>..." + input.Remove(0, 6); if (input.EndsWith(". ..", StringComparison.Ordinal)) input = input.Remove(input.Length - 4, 4) + "..."; if (input.EndsWith(".. .", StringComparison.Ordinal)) input = input.Remove(input.Length - 4, 4) + "..."; if (input.EndsWith(". . .", StringComparison.Ordinal)) input = input.Remove(input.Length - 5, 5) + "..."; if (input.EndsWith(". ...", StringComparison.Ordinal)) input = input.Remove(input.Length - 5, 5) + "..."; if (input.EndsWith(". ..</i>", StringComparison.Ordinal)) input = input.Remove(input.Length - 8, 8) + "...</i>"; if (input.EndsWith(".. .</i>", StringComparison.Ordinal)) input = input.Remove(input.Length - 8, 8) + "...</i>"; if (input.EndsWith(". . .</i>", StringComparison.Ordinal)) input = input.Remove(input.Length - 9, 9) + "...</i>"; if (input.EndsWith(". ...</i>", StringComparison.Ordinal)) input = input.Remove(input.Length - 9, 9) + "...</i>"; if (input.EndsWith(".</i> . .", StringComparison.Ordinal)) input = input.Remove(input.Length - 9, 9) + "...</i>"; if (input.EndsWith(".</i>..", StringComparison.Ordinal)) input = input.Remove(input.Length - 7, 7) + "...</i>"; input = input.Replace(".</i> . ." + Environment.NewLine, "...</i>" + Environment.NewLine); input = input.Replace(".. ?", "..?"); input = input.Replace("..?", "...?"); input = input.Replace("....?", "...?"); input = input.Replace(".. !", "..!"); input = input.Replace("..!", "...!"); input = input.Replace("....!", "...!"); input = input.Replace("... ?", "...?"); input = input.Replace("... !", "...!"); input = input.Replace("....", "..."); input = input.Replace("....", "..."); if (input.StartsWith("- ...", StringComparison.Ordinal) && lastLine != null && lastLine.EndsWith("...", StringComparison.Ordinal) && !(input.Contains(Environment.NewLine + "-"))) input = input.Remove(0, 2); if (input.StartsWith("-...", StringComparison.Ordinal) && lastLine != null && lastLine.EndsWith("...", StringComparison.Ordinal) && !(input.Contains(Environment.NewLine + "-"))) input = input.Remove(0, 1); } if (input.Length > 2 && input[0] == '-' && char.IsUpper(input[1])) { input = input.Insert(1, " "); } if (input.Length > 5 && input.StartsWith("<i>-", StringComparison.Ordinal) && char.IsUpper(input[4])) { input = input.Insert(4, " "); } int nlLen = Environment.NewLine.Length; int idx = input.IndexOf(Environment.NewLine + "-", StringComparison.Ordinal); if (idx > 0 && idx + nlLen + 1 < input.Length && char.IsUpper(input[idx + nlLen + 1])) { input = input.Insert(idx + Environment.NewLine.Length + 1, " "); } idx = input.IndexOf(Environment.NewLine + "<i>-", StringComparison.Ordinal); if (idx > 0 && idx + nlLen + 4 < input.Length && char.IsUpper(input[idx + nlLen + 4])) { input = input.Insert(idx + nlLen + 4, " "); } if (string.IsNullOrEmpty(lastLine) || lastLine.EndsWith('.') || lastLine.EndsWith('!') || lastLine.EndsWith('?') || lastLine.EndsWith(']') || lastLine.EndsWith('♪')) { lastLine = HtmlUtil.RemoveHtmlTags(lastLine); var st = new StrippableText(input); if (lastLine == null || (!lastLine.EndsWith("...", StringComparison.Ordinal) && !EndsWithAbbreviation(lastLine, abbreviationList))) { if (st.StrippedText.Length > 0 && !char.IsUpper(st.StrippedText[0]) && !st.Pre.EndsWith('[') && !st.Pre.EndsWith('(') && !st.Pre.EndsWith("...")) { if (!HtmlUtil.StartsWithUrl(st.StrippedText)) { var uppercaseLetter = char.ToUpper(st.StrippedText[0]); if (st.StrippedText.Length > 1 && uppercaseLetter == 'L' && @"abcdfghjklmnpqrstvwxz".Contains(st.StrippedText[1])) uppercaseLetter = 'I'; if ((st.StrippedText.StartsWith("lo ", StringComparison.Ordinal) || st.StrippedText.Equals("lo.", StringComparison.Ordinal)) && _threeLetterIsoLanguageName.Equals("ita", StringComparison.Ordinal)) uppercaseLetter = 'I'; if ((st.StrippedText.StartsWith("k ", StringComparison.Ordinal) || st.StrippedText.StartsWith("m ", StringComparison.Ordinal) || st.StrippedText.StartsWith("n ", StringComparison.Ordinal) || st.StrippedText.StartsWith("r ", StringComparison.Ordinal) || st.StrippedText.StartsWith("s ", StringComparison.Ordinal) || st.StrippedText.StartsWith("t ", StringComparison.Ordinal)) && st.Pre.EndsWith('\'') && _threeLetterIsoLanguageName.Equals("nld", StringComparison.Ordinal)) uppercaseLetter = st.StrippedText[0]; if ((st.StrippedText.StartsWith("l-I'll ", StringComparison.Ordinal) || st.StrippedText.StartsWith("l-l'll ", StringComparison.Ordinal)) && _threeLetterIsoLanguageName.Equals("eng", StringComparison.Ordinal)) { uppercaseLetter = 'I'; st.StrippedText = "I-I" + st.StrippedText.Remove(0, 3); } st.StrippedText = uppercaseLetter + st.StrippedText.Substring(1); input = st.Pre + st.StrippedText + st.Post; } } } } // lines ending with ". should often end at ... (of no other quotes exists near by) if ((lastLine == null || !lastLine.Contains('"')) && input.EndsWith("\".", StringComparison.Ordinal) && input.IndexOf('"') == input.LastIndexOf('"') && input.Length > 3) { var lastChar = input[input.Length - 3]; if (!char.IsDigit(lastChar)) { int position = input.Length - 2; input = input.Remove(position).Insert(position, "..."); } } // change '<number><space>1' to '<number>1' if (input.Contains('1')) { var match = RegexNumber1.Match(input); while (match.Success) { input = input.Remove(match.Index, 1); match = RegexNumber1.Match(input, match.Index); } } // change '' to " input = input.Replace("''", "\""); // change 'sequeI of' to 'sequel of' if (input.Contains('I')) { var match = RegexUppercaseI.Match(input); while (match.Success) { bool doFix = true; if (match.Index >= 1 && input.Substring(match.Index - 1).StartsWith("Mc", StringComparison.Ordinal)) doFix = false; if (match.Index >= 2 && input.Substring(match.Index - 2).StartsWith("Mac", StringComparison.Ordinal)) doFix = false; if (doFix) input = input.Substring(0, match.Index + 1) + "l" + input.Substring(match.Index + 2); if (match.Index + 1 < input.Length) match = RegexUppercaseI.Match(input, match.Index + 1); else break; // end while } } // change 'NlCE' to 'NICE' if (input.Contains('l')) { var match = RegexLowercaseL.Match(input); while (match.Success) { input = input.Substring(0, match.Index + 1) + "I" + input.Substring(match.Index + 2); match = RegexLowercaseL.Match(input); } } return input; }
private string FixLowercaseIToUppercaseI(string input, string lastLine) { var sb = new StringBuilder(); var lines = input.SplitToLines(); for (int i = 0; i < lines.Length; i++) { string l = lines[i]; if (i > 0) lastLine = lines[i - 1]; lastLine = HtmlUtil.RemoveHtmlTags(lastLine); if (string.IsNullOrEmpty(lastLine) || lastLine.EndsWith('.') || lastLine.EndsWith('!') || lastLine.EndsWith('?')) { var st = new StrippableText(l); if (st.StrippedText.StartsWith('i') && !st.Pre.EndsWith('[') && !st.Pre.EndsWith('(') && !st.Pre.EndsWith("...", StringComparison.Ordinal)) { if (string.IsNullOrEmpty(lastLine) || (!lastLine.EndsWith("...", StringComparison.Ordinal) && !EndsWithAbbreviation(lastLine, _abbreviationList))) l = st.Pre + "I" + st.StrippedText.Remove(0, 1) + st.Post; } } sb.AppendLine(l); } return sb.ToString().TrimEnd('\r', '\n'); }
public void StrippableTextItalic2() { var st = new StrippableText("<i>O</i>"); Assert.AreEqual(st.Pre, "<i>"); Assert.AreEqual(st.Post, "</i>"); Assert.AreEqual(st.StrippedText, "O"); }