private void FindAllNames() { string language = LanguageAutoDetect.AutoDetectGoogleLanguage(_subtitle); if (string.IsNullOrEmpty(language)) { language = "en_US"; } var namesList = new NamesList(Configuration.DictionariesDirectory, language, Configuration.Settings.WordLists.UseOnlineNames, Configuration.Settings.WordLists.NamesUrl); // Will contains both one word names and multi names var namesEtcList = namesList.GetAllNames(); string text = HtmlUtil.RemoveHtmlTags(_subtitle.GetAllTexts()); string textToLower = text.ToLower(); listViewNames.BeginUpdate(); foreach (string name in namesEtcList) { int startIndex = textToLower.IndexOf(name.ToLower(), StringComparison.Ordinal); if (startIndex >= 0) { while (startIndex >= 0 && startIndex < text.Length && textToLower.Substring(startIndex).Contains(name.ToLower()) && name.Length > 1 && name != name.ToLower()) { bool startOk = (startIndex == 0) || (text[startIndex - 1] == ' ') || (text[startIndex - 1] == '-') || (text[startIndex - 1] == '"') || (text[startIndex - 1] == '\'') || (text[startIndex - 1] == '>') || (Environment.NewLine.EndsWith(text[startIndex - 1].ToString(CultureInfo.InvariantCulture))); if (startOk) { int end = startIndex + name.Length; bool endOk = end <= text.Length; if (endOk) { endOk = end == text.Length || ExpectedEndChars.Contains(text[end]); } if (endOk && text.Substring(startIndex, name.Length) != name) // do not add names where casing already is correct { if (!_usedNames.Contains(name)) { _usedNames.Add(name); AddToListViewNames(name); break; // break while } } } startIndex = textToLower.IndexOf(name.ToLower(), startIndex + 2, StringComparison.Ordinal); } } } listViewNames.EndUpdate(); groupBoxNames.Text = string.Format(Configuration.Settings.Language.ChangeCasingNames.NamesFoundInSubtitleX, listViewNames.Items.Count); }
private void MakeSureNamesListIsLoaded() { if (_namesEtcList == null) { _namesEtcList = new List <string>(); string languageTwoLetterCode = LanguageAutoDetect.AutoDetectGoogleLanguage(_subtitle); // Will contains both one word names and multi names var namesList = new NamesList(Configuration.DictionariesFolder, languageTwoLetterCode, Configuration.Settings.WordLists.UseOnlineNamesEtc, Configuration.Settings.WordLists.NamesEtcUrl); _namesEtcList = namesList.GetAllNames(); } }
internal void FixCasing(Subtitle subtitle, string language) { var namesList = new NamesList(Configuration.DictionariesFolder, language, Configuration.Settings.WordLists.UseOnlineNamesEtc, Configuration.Settings.WordLists.NamesEtcUrl); var namesEtc = namesList.GetAllNames(); // Longer names must be first namesEtc.Sort((s1, s2) => s2.Length.CompareTo(s1.Length)); string lastLine = string.Empty; foreach (Paragraph p in subtitle.Paragraphs) { p.Text = FixCasing(p.Text, lastLine, namesEtc); // fix casing of English alone i to I if (radioButtonNormal.Checked && language.StartsWith("en") && p.Text.Contains('i')) { Match match = AloneI.Match(p.Text); while (match.Success) { if (p.Text[match.Index] == 'i') { string prev = string.Empty; string next = string.Empty; if (match.Index > 0) { prev = p.Text[match.Index - 1].ToString(CultureInfo.InvariantCulture); } if (match.Index + 1 < p.Text.Length) { next = p.Text[match.Index + 1].ToString(CultureInfo.InvariantCulture); } if (prev != ">" && next != ">") { string oldText = p.Text; p.Text = p.Text.Substring(0, match.Index) + "I"; if (match.Index + 1 < oldText.Length) { p.Text += oldText.Substring(match.Index + 1); } } } match = match.NextMatch(); } } lastLine = p.Text; } }
public void Initialize(Subtitle subtitle) { _subtitle = subtitle; string language = LanguageAutoDetect.AutoDetectGoogleLanguage(_subtitle); if (string.IsNullOrEmpty(language)) { language = "en_US"; } _namesList = new NamesList(Configuration.DictionariesDirectory, language, Configuration.Settings.WordLists.UseOnlineNames, Configuration.Settings.WordLists.NamesUrl); _namesListInclMulti = _namesList.GetAllNames(); // Will contains both one word names and multi names FindAllNames(); GeneratePreview(); }
internal void FixCasing(Subtitle subtitle, string language) { var namesList = new NamesList(Configuration.DictionariesFolder, language, Configuration.Settings.WordLists.UseOnlineNamesEtc, Configuration.Settings.WordLists.NamesEtcUrl); var namesEtc = namesList.GetAllNames(); // Longer names must be first namesEtc.Sort((s1, s2) => s2.Length.CompareTo(s1.Length)); string lastLine = string.Empty; foreach (Paragraph p in subtitle.Paragraphs) { p.Text = FixCasing(p.Text, lastLine, namesEtc); // fix casing of English alone i to I if (radioButtonNormal.Checked && language.StartsWith("en", StringComparison.Ordinal)) { p.Text = FixEnglishAloneILowerToUpper(p.Text); } lastLine = p.Text; } }
private void FindAllNames() { string language = Utilities.AutoDetectLanguageName("en_US", _subtitle); if (string.IsNullOrEmpty(language)) { language = "en_US"; } var namesList = new NamesList(Configuration.DictionariesFolder, language, Configuration.Settings.WordLists.UseOnlineNamesEtc, Configuration.Settings.WordLists.NamesEtcUrl); // Will contains both one word names and multi names var namesEtcList = namesList.GetAllNames(); if (language.StartsWith("en")) { if (namesEtcList.Contains("Black")) { namesEtcList.Remove("Black"); } if (namesEtcList.Contains("Bill")) { namesEtcList.Remove("Bill"); } if (namesEtcList.Contains("Long")) { namesEtcList.Remove("Long"); } if (namesEtcList.Contains("Don")) { namesEtcList.Remove("Don"); } } var sb = new StringBuilder(); foreach (Paragraph p in _subtitle.Paragraphs) { sb.AppendLine(p.Text); } string text = HtmlUtil.RemoveHtmlTags(sb.ToString()); string textToLower = text.ToLower(); foreach (string name in namesEtcList) { int startIndex = textToLower.IndexOf(name.ToLower(), StringComparison.Ordinal); if (startIndex >= 0) { while (startIndex >= 0 && startIndex < text.Length && textToLower.Substring(startIndex).Contains(name.ToLower()) && name.Length > 1 && name != name.ToLower()) { bool startOk = (startIndex == 0) || (text[startIndex - 1] == ' ') || (text[startIndex - 1] == '-') || (text[startIndex - 1] == '"') || (text[startIndex - 1] == '\'') || (text[startIndex - 1] == '>') || (Environment.NewLine.EndsWith(text[startIndex - 1].ToString(CultureInfo.InvariantCulture))); if (startOk) { int end = startIndex + name.Length; bool endOk = end <= text.Length; if (endOk) { endOk = end == text.Length || (@" ,.!?:;')-<""" + Environment.NewLine).Contains(text[end]); } if (endOk && text.Substring(startIndex, name.Length) != name) // do not add names where casing already is correct { if (!_usedNames.Contains(name)) { _usedNames.Add(name); AddToListViewNames(name); break; // break while } } } startIndex = textToLower.IndexOf(name.ToLower(), startIndex + 2, StringComparison.Ordinal); } } } groupBoxNames.Text = string.Format(Configuration.Settings.Language.ChangeCasingNames.NamesFoundInSubtitleX, listViewNames.Items.Count); }