internal void Initialize(Subtitle subtitle, int firstSelectedIndex) { var watermark = ReadWaterMark(subtitle.GetAllTexts().Trim()); labelWatermark.Text = string.Format(_language.WatermarkX, watermark); if (watermark.Length == 0) { buttonRemove.Enabled = false; textBoxWatermark.Focus(); } else { groupBoxGenerate.Enabled = false; buttonOK.Focus(); } _firstSelectedIndex = firstSelectedIndex; var current = subtitle.GetParagraphOrDefault(_firstSelectedIndex); if (current != null) { radioButtonCurrentLine.Text = string.Format(_language.CurrentLineOnlyX, current.Text.Replace(Environment.NewLine, Configuration.Settings.General.ListViewLineSeparatorString)); } else { radioButtonCurrentLine.Text = string.Format(_language.CurrentLineOnlyX, string.Empty); radioButtonCurrentLine.Enabled = false; } }
public static string AutoDetectGoogleLanguageOrNull(Subtitle subtitle) { var s = new Subtitle(subtitle); s.RemoveEmptyLines(); string languageId = AutoDetectGoogleLanguage(s.GetAllTexts(), s.Paragraphs.Count / 14); if (string.IsNullOrEmpty(languageId)) { languageId = null; } return(languageId); }
public static string AutoDetectLanguageName(string languageName, Subtitle subtitle) { if (string.IsNullOrEmpty(languageName)) languageName = "en_US"; int bestCount = subtitle.Paragraphs.Count / 14; string text = subtitle.GetAllTexts(); List<string> dictionaryNames = Utilities.GetDictionaryLanguages(); bool containsEnGb = false; bool containsEnUs = false; bool containsHrHr = false; bool containsSrLatn = false; foreach (string name in dictionaryNames) { if (name.Contains("[en_GB]")) containsEnGb = true; if (name.Contains("[en_US]")) containsEnUs = true; if (name.Contains("[hr_HR]")) containsHrHr = true; if (name.Contains("[sr-Latn]")) containsSrLatn = true; } foreach (string name in dictionaryNames) { string shortName = string.Empty; int start = name.IndexOf('['); int end = name.IndexOf(']'); if (start > 0 && end > start) { start++; shortName = name.Substring(start, end - start); } int count; switch (shortName) { case "da_DK": count = GetCount(text, AutoDetectWordsDanish); if (count > bestCount) { int norwegianCount = GetCount(text, "ut", "deg", "meg", "merkelig", "mye", "spørre"); int dutchCount = GetCount(text, AutoDetectWordsDutch); if (norwegianCount < 2 && dutchCount < count) languageName = shortName; } break; case "nb_NO": count = GetCount(text, AutoDetectWordsNorwegian); if (count > bestCount) { int danishCount = GetCount(text, "siger", "dig", "mig", "mærkelig", "tilbage", "spørge"); int dutchCount = GetCount(text, AutoDetectWordsDutch); if (danishCount < 2 && dutchCount < count) languageName = shortName; } break; case "sv_SE": count = GetCount(text, AutoDetectWordsSwedish); if (count > bestCount) languageName = shortName; break; case "en_US": count = GetCount(text, AutoDetectWordsEnglish); if (count > bestCount) { int dutchCount = GetCount(text, AutoDetectWordsDutch); if (dutchCount < count) { languageName = shortName; if (containsEnGb) { int usCount = GetCount(text, "color", "flavor", "honor", "humor", "neighbor", "honor"); int gbCount = GetCount(text, "colour", "flavour", "honour", "humour", "neighbour", "honour"); if (gbCount > usCount) languageName = "en_GB"; } } } break; case "en_GB": count = GetCount(text, AutoDetectWordsEnglish); if (count > bestCount) { int dutchCount = GetCount(text, AutoDetectWordsDutch); if (dutchCount < count) { languageName = shortName; if (containsEnUs) { int usCount = GetCount(text, "color", "flavor", "honor", "humor", "neighbor", "honor"); int gbCount = GetCount(text, "colour", "flavour", "honour", "humour", "neighbour", "honour"); if (gbCount < usCount) languageName = "en_US"; } } } break; case "es_ES": count = GetCount(text, AutoDetectWordsSpanish); if (count > bestCount) { int frenchCount = GetCount(text, "[Cc]'est", "pas", "vous", "pour", "suis", "Pourquoi", "maison", "souviens", "quelque"); // not spanish words int portugueseCount = GetCount(text, "[NnCc]ão", "Então", "h?ouve", "pessoal", "rapariga", "tivesse", "fizeste", "jantar", "conheço", "atenção", "foste", "milhões", "devias", "ganhar", "raios"); // not spanish words if (frenchCount < 2 && portugueseCount < 2) languageName = shortName; } break; case "it_IT": count = GetCount(text, AutoDetectWordsItalian); if (count > bestCount) { int frenchCount = GetCount(text, "[Cc]'est", "pas", "vous", "pour", "suis", "Pourquoi", "maison", "souviens", "quelque"); // not italian words int spanishCount = GetCount(text, "Hola", "nada", "Vamos", "pasa", "los", "como"); // not italian words if (frenchCount < 2 && spanishCount < 2) languageName = shortName; } break; case "fr_FR": count = GetCount(text, AutoDetectWordsFrench); if (count > bestCount) { int romanianCount = GetCount(text, "[Vv]reau", "[Ss]înt", "[Aa]cum", "pentru", "domnule", "aici"); int spanishCount = GetCount(text, "Hola", "nada", "Vamos", "pasa", "los", "como"); // not french words int italianCount = GetCount(text, AutoDetectWordsItalian); if (romanianCount < 5 && spanishCount < 2 && italianCount < 2) languageName = shortName; } break; case "de_DE": count = GetCount(text, AutoDetectWordsGerman); if (count > bestCount) languageName = shortName; break; case "nl_NL": count = GetCount(text, AutoDetectWordsDutch); if (count > bestCount) languageName = shortName; break; case "pl_PL": count = GetCount(text, AutoDetectWordsPolish); if (count > bestCount) languageName = shortName; break; case "el_GR": count = GetCount(text, AutoDetectWordsGreek); if (count > bestCount) languageName = shortName; break; case "ru_RU": count = GetCount(text, AutoDetectWordsRussian); if (count > bestCount) languageName = shortName; break; case "uk_UA": count = GetCount(text, AutoDetectWordsUkrainian); if (count > bestCount) languageName = shortName; break; case "ro_RO": count = GetCount(text, AutoDetectWordsRomanian1); if (count <= bestCount) count = GetCount(text, AutoDetectWordsRomanian2); if (count > bestCount) languageName = shortName; break; case "hr_HR": // Croatian count = GetCount(text, AutoDetectWordsCroatianAndSerbian); if (count > bestCount) { languageName = shortName; if (containsSrLatn) { int croatianCount = GetCount(text, AutoDetectWordsCroatian); int serbianCount = GetCount(text, AutoDetectWordsSerbian); if (serbianCount > croatianCount) languageName = "sr-Latn"; } } break; case "sr-Latn": // Serbian (Latin) count = GetCount(text, AutoDetectWordsCroatianAndSerbian); if (count > bestCount) { languageName = shortName; if (containsHrHr) { int croatianCount = GetCount(text, AutoDetectWordsCroatian); int serbianCount = GetCount(text, AutoDetectWordsSerbian); if (serbianCount < croatianCount) languageName = "hr_HR"; } } break; case "sr": // Serbian (Cyrillic) count = GetCount(text, AutoDetectWordsSerbianCyrillic); if (count > bestCount) languageName = shortName; break; case "pt_PT": // Portuguese count = GetCount(text, AutoDetectWordsPortuguese); if (count > bestCount) languageName = shortName; break; case "pt_BR": // Portuguese (Brasil) count = GetCount(text, AutoDetectWordsPortuguese); if (count > bestCount) languageName = shortName; break; case "hu_HU": // Hungarian count = GetCount(text, AutoDetectWordsHungarian); if (count > bestCount) languageName = shortName; break; case "cs_CZ": // Czech count = GetCount(text, AutoDetectWordsCzech); if (count > bestCount) languageName = shortName; break; case "sk_SK": // Slovak count = GetCount(text, AutoDetectWordsSlovak); if (count > bestCount) languageName = shortName; break; } } return languageName; }
public static string AutoDetectGoogleLanguageOrNull(Subtitle subtitle) { string languageId = AutoDetectGoogleLanguage(subtitle.GetAllTexts(), subtitle.Paragraphs.Count / 14); if (string.IsNullOrEmpty(languageId)) languageId = null; return languageId; }
public static string AutoDetectLanguageName(string languageName, Subtitle subtitle) { if (string.IsNullOrEmpty(languageName)) { languageName = "en_US"; } int bestCount = subtitle.Paragraphs.Count / 14; string text = subtitle.GetAllTexts(); List <string> dictionaryNames = Utilities.GetDictionaryLanguages(); bool containsEnGb = false; bool containsEnUs = false; bool containsHrHr = false; bool containsSrLatn = false; foreach (string name in dictionaryNames) { if (name.Contains("[en_GB]")) { containsEnGb = true; } if (name.Contains("[en_US]")) { containsEnUs = true; } if (name.Contains("[hr_HR]")) { containsHrHr = true; } if (name.Contains("[sr_Latn]")) { containsSrLatn = true; } } foreach (string name in dictionaryNames) { string shortName = string.Empty; int start = name.IndexOf('['); int end = name.IndexOf(']'); if (start > 0 && end > start) { start++; shortName = name.Substring(start, end - start); } int count; switch (shortName) { case "da_DK": count = GetCount(text, AutoDetectWordsDanish); if (count > bestCount) { int norwegianCount = GetCount(text, "ut", "deg", "meg", "merkelig", "mye", "spørre"); int dutchCount = GetCount(text, AutoDetectWordsDutch); if (norwegianCount < 2 && dutchCount < count) { languageName = shortName; bestCount = count; } } break; case "nb_NO": count = GetCount(text, AutoDetectWordsNorwegian); if (count > bestCount) { int danishCount = GetCount(text, "siger", "dig", "mig", "mærkelig", "tilbage", "spørge"); int dutchCount = GetCount(text, AutoDetectWordsDutch); if (danishCount < 2 && dutchCount < count) { languageName = shortName; bestCount = count; } } break; case "sv_SE": count = GetCount(text, AutoDetectWordsSwedish); if (count > bestCount) { languageName = shortName; bestCount = count; } break; case "en_US": count = GetCount(text, AutoDetectWordsEnglish); if (count > bestCount) { int dutchCount = GetCount(text, AutoDetectWordsDutch); if (dutchCount < count) { languageName = shortName; bestCount = count; if (containsEnGb) { int usCount = GetCount(text, "color", "flavor", "honor", "humor", "neighbor", "honor"); int gbCount = GetCount(text, "colour", "flavour", "honour", "humour", "neighbour", "honour"); if (gbCount > usCount) { languageName = "en_GB"; } } } } break; case "en_GB": count = GetCount(text, AutoDetectWordsEnglish); if (count > bestCount) { int dutchCount = GetCount(text, AutoDetectWordsDutch); if (dutchCount < count) { languageName = shortName; bestCount = count; if (containsEnUs) { int usCount = GetCount(text, "color", "flavor", "honor", "humor", "neighbor", "honor"); int gbCount = GetCount(text, "colour", "flavour", "honour", "humour", "neighbour", "honour"); if (gbCount < usCount) { languageName = "en_US"; } } } } break; case "es_ES": count = GetCount(text, AutoDetectWordsSpanish); if (count > bestCount) { int frenchCount = GetCount(text, "[Cc]'est", "pas", "vous", "pour", "suis", "Pourquoi", "maison", "souviens", "quelque"); // not spanish words int portugueseCount = GetCount(text, "[NnCc]ão", "Então", "h?ouve", "pessoal", "rapariga", "tivesse", "fizeste", "jantar", "conheço", "atenção", "foste", "milhões", "devias", "ganhar", "raios"); // not spanish words if (frenchCount < 2 && portugueseCount < 2) { languageName = shortName; bestCount = count; } } break; case "it_IT": count = GetCount(text, AutoDetectWordsItalian); if (count > bestCount) { int frenchCount = GetCount(text, "[Cc]'est", "pas", "vous", "pour", "suis", "Pourquoi", "maison", "souviens", "quelque"); // not italian words int spanishCount = GetCount(text, "Hola", "nada", "Vamos", "pasa", "los", "como"); // not italian words if (frenchCount < 2 && spanishCount < 2) { languageName = shortName; bestCount = count; } } break; case "fr_FR": count = GetCount(text, AutoDetectWordsFrench); if (count > bestCount) { int romanianCount = GetCount(text, "[Vv]reau", "[Ss]înt", "[Aa]cum", "pentru", "domnule", "aici"); int spanishCount = GetCount(text, "Hola", "nada", "Vamos", "pasa", "los", "como"); // not french words int italianCount = GetCount(text, AutoDetectWordsItalian); if (romanianCount < 5 && spanishCount < 2 && italianCount < 2) { languageName = shortName; bestCount = count; } } break; case "de_DE": count = GetCount(text, AutoDetectWordsGerman); if (count > bestCount) { languageName = shortName; bestCount = count; } break; case "nl_NL": count = GetCount(text, AutoDetectWordsDutch); if (count > bestCount) { languageName = shortName; bestCount = count; } break; case "pl_PL": count = GetCount(text, AutoDetectWordsPolish); if (count > bestCount) { languageName = shortName; bestCount = count; } break; case "el_GR": count = GetCount(text, AutoDetectWordsGreek); if (count > bestCount) { languageName = shortName; bestCount = count; } break; case "ru_RU": count = GetCount(text, AutoDetectWordsRussian); if (count > bestCount) { languageName = shortName; bestCount = count; } break; case "uk_UA": count = GetCount(text, AutoDetectWordsUkrainian); if (count > bestCount) { languageName = shortName; bestCount = count; } break; case "ro_RO": count = GetCount(text, AutoDetectWordsRomanian1); if (count <= bestCount) { count = GetCount(text, AutoDetectWordsRomanian2); } if (count > bestCount) { languageName = shortName; bestCount = count; } break; case "hr_HR": // Croatian count = GetCount(text, AutoDetectWordsCroatianAndSerbian); if (count > bestCount) { bestCount = count; languageName = shortName; if (containsSrLatn) { int croatianCount = GetCount(text, AutoDetectWordsCroatian); int serbianCount = GetCount(text, AutoDetectWordsSerbian); if (serbianCount > croatianCount) { languageName = "sr-Latn"; } } } break; case "sr_Latn": // Serbian (Latin) count = GetCount(text, AutoDetectWordsCroatianAndSerbian); if (count > bestCount) { languageName = shortName; bestCount = count; if (containsHrHr) { int croatianCount = GetCount(text, AutoDetectWordsCroatian); int serbianCount = GetCount(text, AutoDetectWordsSerbian); if (serbianCount < croatianCount) { languageName = "hr_HR"; } } } break; case "sr": // Serbian (Cyrillic) count = GetCount(text, AutoDetectWordsSerbianCyrillic); if (count > bestCount) { languageName = shortName; bestCount = count; } break; case "pt_PT": // Portuguese Portugal case "pt_BR": // Portuguese Brazil count = GetCount(text, AutoDetectWordsPortuguese); if (count > bestCount) { languageName = shortName; bestCount = count; } break; case "hu_HU": // Hungarian count = GetCount(text, AutoDetectWordsHungarian); if (count > bestCount) { languageName = shortName; bestCount = count; } break; case "cs_CZ": // Czech count = GetCount(text, AutoDetectWordsCzech); if (count > bestCount) { var lithuanianCount = GetCount(text, AutoDetectWordsLithuanian); if (count > lithuanianCount) { languageName = shortName; bestCount = count; } } break; case "sk_SK": // Slovak count = GetCount(text, AutoDetectWordsSlovak); if (count > bestCount) { languageName = shortName; bestCount = count; } break; case "lv_LV": // Latvian count = GetCount(text, AutoDetectWordsLatvian); if (count > bestCount) { languageName = shortName; bestCount = count; } break; case "lt_LT": // Lithuanian case "lt": // Lithuanian (Neutral) count = GetCount(text, AutoDetectWordsLithuanian); if (count > bestCount) { languageName = shortName; bestCount = count; } break; case "hi_IN": // Hindi case "hi": count = GetCount(text, AutoDetectWordsHindi); if (count > bestCount) { languageName = shortName; bestCount = count; } break; } } return(languageName); }