Esempio n. 1
0
        internal void Initialize(Subtitle subtitle, int firstSelectedIndex)
        {
            var watermark = ReadWaterMark(subtitle.GetAllTexts().Trim());
            labelWatermark.Text = string.Format(_language.WatermarkX, watermark);
            if (watermark.Length == 0)
            {
                buttonRemove.Enabled = false;
                textBoxWatermark.Focus();
            }
            else
            {
                groupBoxGenerate.Enabled = false;
                buttonOK.Focus();
            }

            _firstSelectedIndex = firstSelectedIndex;
            var current = subtitle.GetParagraphOrDefault(_firstSelectedIndex);
            if (current != null)
            {
                radioButtonCurrentLine.Text = string.Format(_language.CurrentLineOnlyX, current.Text.Replace(Environment.NewLine, Configuration.Settings.General.ListViewLineSeparatorString));
            }
            else
            {
                radioButtonCurrentLine.Text = string.Format(_language.CurrentLineOnlyX, string.Empty);
                radioButtonCurrentLine.Enabled = false;
            }
        }
Esempio n. 2
0
        public static string AutoDetectGoogleLanguageOrNull(Subtitle subtitle)
        {
            var s = new Subtitle(subtitle);

            s.RemoveEmptyLines();
            string languageId = AutoDetectGoogleLanguage(s.GetAllTexts(), s.Paragraphs.Count / 14);

            if (string.IsNullOrEmpty(languageId))
            {
                languageId = null;
            }

            return(languageId);
        }
        public static string AutoDetectLanguageName(string languageName, Subtitle subtitle)
        {
            if (string.IsNullOrEmpty(languageName))
                languageName = "en_US";
            int bestCount = subtitle.Paragraphs.Count / 14;

            string text = subtitle.GetAllTexts();
            List<string> dictionaryNames = Utilities.GetDictionaryLanguages();

            bool containsEnGb = false;
            bool containsEnUs = false;
            bool containsHrHr = false;
            bool containsSrLatn = false;
            foreach (string name in dictionaryNames)
            {
                if (name.Contains("[en_GB]"))
                    containsEnGb = true;
                if (name.Contains("[en_US]"))
                    containsEnUs = true;
                if (name.Contains("[hr_HR]"))
                    containsHrHr = true;
                if (name.Contains("[sr-Latn]"))
                    containsSrLatn = true;
            }

            foreach (string name in dictionaryNames)
            {
                string shortName = string.Empty;
                int start = name.IndexOf('[');
                int end = name.IndexOf(']');
                if (start > 0 && end > start)
                {
                    start++;
                    shortName = name.Substring(start, end - start);
                }

                int count;
                switch (shortName)
                {
                    case "da_DK":
                        count = GetCount(text, AutoDetectWordsDanish);
                        if (count > bestCount)
                        {
                            int norwegianCount = GetCount(text, "ut", "deg", "meg", "merkelig", "mye", "spørre");
                            int dutchCount = GetCount(text, AutoDetectWordsDutch);
                            if (norwegianCount < 2 && dutchCount < count)
                                languageName = shortName;
                        }
                        break;
                    case "nb_NO":
                        count = GetCount(text, AutoDetectWordsNorwegian);
                        if (count > bestCount)
                        {
                            int danishCount = GetCount(text, "siger", "dig", "mig", "mærkelig", "tilbage", "spørge");
                            int dutchCount = GetCount(text, AutoDetectWordsDutch);
                            if (danishCount < 2 && dutchCount < count)
                                languageName = shortName;
                        }
                        break;
                    case "sv_SE":
                        count = GetCount(text, AutoDetectWordsSwedish);
                        if (count > bestCount)
                            languageName = shortName;
                        break;
                    case "en_US":
                        count = GetCount(text, AutoDetectWordsEnglish);
                        if (count > bestCount)
                        {
                            int dutchCount = GetCount(text, AutoDetectWordsDutch);
                            if (dutchCount < count)
                            {
                                languageName = shortName;
                                if (containsEnGb)
                                {
                                    int usCount = GetCount(text, "color", "flavor", "honor", "humor", "neighbor", "honor");
                                    int gbCount = GetCount(text, "colour", "flavour", "honour", "humour", "neighbour", "honour");
                                    if (gbCount > usCount)
                                        languageName = "en_GB";
                                }
                            }
                        }
                        break;
                    case "en_GB":
                        count = GetCount(text, AutoDetectWordsEnglish);
                        if (count > bestCount)
                        {
                            int dutchCount = GetCount(text, AutoDetectWordsDutch);
                            if (dutchCount < count)
                            {
                                languageName = shortName;
                                if (containsEnUs)
                                {
                                    int usCount = GetCount(text, "color", "flavor", "honor", "humor", "neighbor", "honor");
                                    int gbCount = GetCount(text, "colour", "flavour", "honour", "humour", "neighbour", "honour");
                                    if (gbCount < usCount)
                                        languageName = "en_US";
                                }
                            }
                        }
                        break;
                    case "es_ES":
                        count = GetCount(text, AutoDetectWordsSpanish);
                        if (count > bestCount)
                        {
                            int frenchCount = GetCount(text, "[Cc]'est", "pas", "vous", "pour", "suis", "Pourquoi", "maison", "souviens", "quelque"); // not spanish words
                            int portugueseCount = GetCount(text, "[NnCc]ão", "Então", "h?ouve", "pessoal", "rapariga", "tivesse", "fizeste",
                                                                 "jantar", "conheço", "atenção", "foste", "milhões", "devias", "ganhar", "raios"); // not spanish words
                            if (frenchCount < 2 && portugueseCount < 2)
                                languageName = shortName;
                        }
                        break;
                    case "it_IT":
                        count = GetCount(text, AutoDetectWordsItalian);
                        if (count > bestCount)
                        {
                            int frenchCount = GetCount(text, "[Cc]'est", "pas", "vous", "pour", "suis", "Pourquoi", "maison", "souviens", "quelque"); // not italian words
                            int spanishCount = GetCount(text, "Hola", "nada", "Vamos", "pasa", "los", "como"); // not italian words
                            if (frenchCount < 2 && spanishCount < 2)
                                languageName = shortName;
                        }
                        break;
                    case "fr_FR":
                        count = GetCount(text, AutoDetectWordsFrench);
                        if (count > bestCount)
                        {
                            int romanianCount = GetCount(text, "[Vv]reau", "[Ss]înt", "[Aa]cum", "pentru", "domnule", "aici");
                            int spanishCount = GetCount(text, "Hola", "nada", "Vamos", "pasa", "los", "como"); // not french words
                            int italianCount = GetCount(text, AutoDetectWordsItalian);
                            if (romanianCount < 5 && spanishCount < 2 && italianCount < 2)
                                languageName = shortName;
                        }
                        break;
                    case "de_DE":
                        count = GetCount(text, AutoDetectWordsGerman);
                        if (count > bestCount)
                            languageName = shortName;
                        break;
                    case "nl_NL":
                        count = GetCount(text, AutoDetectWordsDutch);
                        if (count > bestCount)
                            languageName = shortName;
                        break;
                    case "pl_PL":
                        count = GetCount(text, AutoDetectWordsPolish);
                        if (count > bestCount)
                            languageName = shortName;
                        break;
                    case "el_GR":
                        count = GetCount(text, AutoDetectWordsGreek);
                        if (count > bestCount)
                            languageName = shortName;
                        break;
                    case "ru_RU":
                        count = GetCount(text, AutoDetectWordsRussian);
                        if (count > bestCount)
                            languageName = shortName;
                        break;
                    case "uk_UA":
                        count = GetCount(text, AutoDetectWordsUkrainian);
                        if (count > bestCount)
                            languageName = shortName;
                        break;
                    case "ro_RO":
                        count = GetCount(text, AutoDetectWordsRomanian1);
                        if (count <= bestCount)
                            count = GetCount(text, AutoDetectWordsRomanian2);
                        if (count > bestCount)
                            languageName = shortName;
                        break;
                    case "hr_HR": // Croatian
                        count = GetCount(text, AutoDetectWordsCroatianAndSerbian);
                        if (count > bestCount)
                        {
                            languageName = shortName;
                            if (containsSrLatn)
                            {
                                int croatianCount = GetCount(text, AutoDetectWordsCroatian);
                                int serbianCount = GetCount(text, AutoDetectWordsSerbian);
                                if (serbianCount > croatianCount)
                                    languageName = "sr-Latn";
                            }
                        }
                        break;
                    case "sr-Latn": // Serbian (Latin)
                        count = GetCount(text, AutoDetectWordsCroatianAndSerbian);
                        if (count > bestCount)
                        {
                            languageName = shortName;
                            if (containsHrHr)
                            {
                                int croatianCount = GetCount(text, AutoDetectWordsCroatian);
                                int serbianCount = GetCount(text, AutoDetectWordsSerbian);
                                if (serbianCount < croatianCount)
                                    languageName = "hr_HR";
                            }
                        }
                        break;
                    case "sr": // Serbian (Cyrillic)
                        count = GetCount(text, AutoDetectWordsSerbianCyrillic);
                        if (count > bestCount)
                            languageName = shortName;
                        break;
                    case "pt_PT": // Portuguese
                        count = GetCount(text, AutoDetectWordsPortuguese);
                        if (count > bestCount)
                            languageName = shortName;
                        break;
                    case "pt_BR": // Portuguese (Brasil)
                        count = GetCount(text, AutoDetectWordsPortuguese);
                        if (count > bestCount)
                            languageName = shortName;
                        break;
                    case "hu_HU": // Hungarian
                        count = GetCount(text, AutoDetectWordsHungarian);
                        if (count > bestCount)
                            languageName = shortName;
                        break;
                    case "cs_CZ": // Czech
                        count = GetCount(text, AutoDetectWordsCzech);
                        if (count > bestCount)
                            languageName = shortName;
                        break;
                    case "sk_SK": // Slovak
                        count = GetCount(text, AutoDetectWordsSlovak);
                        if (count > bestCount)
                            languageName = shortName;
                        break;
                }
            }
            return languageName;
        }
        public static string AutoDetectGoogleLanguageOrNull(Subtitle subtitle)
        {
            string languageId = AutoDetectGoogleLanguage(subtitle.GetAllTexts(), subtitle.Paragraphs.Count / 14);
            if (string.IsNullOrEmpty(languageId))
                languageId = null;

            return languageId;
        }
Esempio n. 5
0
        public static string AutoDetectLanguageName(string languageName, Subtitle subtitle)
        {
            if (string.IsNullOrEmpty(languageName))
            {
                languageName = "en_US";
            }
            int bestCount = subtitle.Paragraphs.Count / 14;

            string        text            = subtitle.GetAllTexts();
            List <string> dictionaryNames = Utilities.GetDictionaryLanguages();

            bool containsEnGb   = false;
            bool containsEnUs   = false;
            bool containsHrHr   = false;
            bool containsSrLatn = false;

            foreach (string name in dictionaryNames)
            {
                if (name.Contains("[en_GB]"))
                {
                    containsEnGb = true;
                }
                if (name.Contains("[en_US]"))
                {
                    containsEnUs = true;
                }
                if (name.Contains("[hr_HR]"))
                {
                    containsHrHr = true;
                }
                if (name.Contains("[sr_Latn]"))
                {
                    containsSrLatn = true;
                }
            }

            foreach (string name in dictionaryNames)
            {
                string shortName = string.Empty;
                int    start     = name.IndexOf('[');
                int    end       = name.IndexOf(']');
                if (start > 0 && end > start)
                {
                    start++;
                    shortName = name.Substring(start, end - start);
                }

                int count;
                switch (shortName)
                {
                case "da_DK":
                    count = GetCount(text, AutoDetectWordsDanish);
                    if (count > bestCount)
                    {
                        int norwegianCount = GetCount(text, "ut", "deg", "meg", "merkelig", "mye", "spørre");
                        int dutchCount     = GetCount(text, AutoDetectWordsDutch);
                        if (norwegianCount < 2 && dutchCount < count)
                        {
                            languageName = shortName;
                            bestCount    = count;
                        }
                    }
                    break;

                case "nb_NO":
                    count = GetCount(text, AutoDetectWordsNorwegian);
                    if (count > bestCount)
                    {
                        int danishCount = GetCount(text, "siger", "dig", "mig", "mærkelig", "tilbage", "spørge");
                        int dutchCount  = GetCount(text, AutoDetectWordsDutch);
                        if (danishCount < 2 && dutchCount < count)
                        {
                            languageName = shortName;
                            bestCount    = count;
                        }
                    }
                    break;

                case "sv_SE":
                    count = GetCount(text, AutoDetectWordsSwedish);
                    if (count > bestCount)
                    {
                        languageName = shortName;
                        bestCount    = count;
                    }
                    break;

                case "en_US":
                    count = GetCount(text, AutoDetectWordsEnglish);
                    if (count > bestCount)
                    {
                        int dutchCount = GetCount(text, AutoDetectWordsDutch);
                        if (dutchCount < count)
                        {
                            languageName = shortName;
                            bestCount    = count;
                            if (containsEnGb)
                            {
                                int usCount = GetCount(text, "color", "flavor", "honor", "humor", "neighbor", "honor");
                                int gbCount = GetCount(text, "colour", "flavour", "honour", "humour", "neighbour", "honour");
                                if (gbCount > usCount)
                                {
                                    languageName = "en_GB";
                                }
                            }
                        }
                    }
                    break;

                case "en_GB":
                    count = GetCount(text, AutoDetectWordsEnglish);
                    if (count > bestCount)
                    {
                        int dutchCount = GetCount(text, AutoDetectWordsDutch);
                        if (dutchCount < count)
                        {
                            languageName = shortName;
                            bestCount    = count;
                            if (containsEnUs)
                            {
                                int usCount = GetCount(text, "color", "flavor", "honor", "humor", "neighbor", "honor");
                                int gbCount = GetCount(text, "colour", "flavour", "honour", "humour", "neighbour", "honour");
                                if (gbCount < usCount)
                                {
                                    languageName = "en_US";
                                }
                            }
                        }
                    }
                    break;

                case "es_ES":
                    count = GetCount(text, AutoDetectWordsSpanish);
                    if (count > bestCount)
                    {
                        int frenchCount     = GetCount(text, "[Cc]'est", "pas", "vous", "pour", "suis", "Pourquoi", "maison", "souviens", "quelque"); // not spanish words
                        int portugueseCount = GetCount(text, "[NnCc]ão", "Então", "h?ouve", "pessoal", "rapariga", "tivesse", "fizeste",
                                                       "jantar", "conheço", "atenção", "foste", "milhões", "devias", "ganhar", "raios");              // not spanish words
                        if (frenchCount < 2 && portugueseCount < 2)
                        {
                            languageName = shortName;
                            bestCount    = count;
                        }
                    }
                    break;

                case "it_IT":
                    count = GetCount(text, AutoDetectWordsItalian);
                    if (count > bestCount)
                    {
                        int frenchCount  = GetCount(text, "[Cc]'est", "pas", "vous", "pour", "suis", "Pourquoi", "maison", "souviens", "quelque"); // not italian words
                        int spanishCount = GetCount(text, "Hola", "nada", "Vamos", "pasa", "los", "como");                                         // not italian words
                        if (frenchCount < 2 && spanishCount < 2)
                        {
                            languageName = shortName;
                            bestCount    = count;
                        }
                    }
                    break;

                case "fr_FR":
                    count = GetCount(text, AutoDetectWordsFrench);
                    if (count > bestCount)
                    {
                        int romanianCount = GetCount(text, "[Vv]reau", "[Ss]înt", "[Aa]cum", "pentru", "domnule", "aici");
                        int spanishCount  = GetCount(text, "Hola", "nada", "Vamos", "pasa", "los", "como");    // not french words
                        int italianCount  = GetCount(text, AutoDetectWordsItalian);
                        if (romanianCount < 5 && spanishCount < 2 && italianCount < 2)
                        {
                            languageName = shortName;
                            bestCount    = count;
                        }
                    }
                    break;

                case "de_DE":
                    count = GetCount(text, AutoDetectWordsGerman);
                    if (count > bestCount)
                    {
                        languageName = shortName;
                        bestCount    = count;
                    }
                    break;

                case "nl_NL":
                    count = GetCount(text, AutoDetectWordsDutch);
                    if (count > bestCount)
                    {
                        languageName = shortName;
                        bestCount    = count;
                    }
                    break;

                case "pl_PL":
                    count = GetCount(text, AutoDetectWordsPolish);
                    if (count > bestCount)
                    {
                        languageName = shortName;
                        bestCount    = count;
                    }
                    break;

                case "el_GR":
                    count = GetCount(text, AutoDetectWordsGreek);
                    if (count > bestCount)
                    {
                        languageName = shortName;
                        bestCount    = count;
                    }
                    break;

                case "ru_RU":
                    count = GetCount(text, AutoDetectWordsRussian);
                    if (count > bestCount)
                    {
                        languageName = shortName;
                        bestCount    = count;
                    }
                    break;

                case "uk_UA":
                    count = GetCount(text, AutoDetectWordsUkrainian);
                    if (count > bestCount)
                    {
                        languageName = shortName;
                        bestCount    = count;
                    }
                    break;

                case "ro_RO":
                    count = GetCount(text, AutoDetectWordsRomanian1);
                    if (count <= bestCount)
                    {
                        count = GetCount(text, AutoDetectWordsRomanian2);
                    }
                    if (count > bestCount)
                    {
                        languageName = shortName;
                        bestCount    = count;
                    }
                    break;

                case "hr_HR":     // Croatian
                    count = GetCount(text, AutoDetectWordsCroatianAndSerbian);
                    if (count > bestCount)
                    {
                        bestCount    = count;
                        languageName = shortName;
                        if (containsSrLatn)
                        {
                            int croatianCount = GetCount(text, AutoDetectWordsCroatian);
                            int serbianCount  = GetCount(text, AutoDetectWordsSerbian);
                            if (serbianCount > croatianCount)
                            {
                                languageName = "sr-Latn";
                            }
                        }
                    }
                    break;

                case "sr_Latn":     // Serbian (Latin)
                    count = GetCount(text, AutoDetectWordsCroatianAndSerbian);
                    if (count > bestCount)
                    {
                        languageName = shortName;
                        bestCount    = count;
                        if (containsHrHr)
                        {
                            int croatianCount = GetCount(text, AutoDetectWordsCroatian);
                            int serbianCount  = GetCount(text, AutoDetectWordsSerbian);
                            if (serbianCount < croatianCount)
                            {
                                languageName = "hr_HR";
                            }
                        }
                    }
                    break;

                case "sr":     // Serbian (Cyrillic)
                    count = GetCount(text, AutoDetectWordsSerbianCyrillic);
                    if (count > bestCount)
                    {
                        languageName = shortName;
                        bestCount    = count;
                    }
                    break;

                case "pt_PT":     // Portuguese Portugal
                case "pt_BR":     // Portuguese Brazil
                    count = GetCount(text, AutoDetectWordsPortuguese);
                    if (count > bestCount)
                    {
                        languageName = shortName;
                        bestCount    = count;
                    }
                    break;

                case "hu_HU":     // Hungarian
                    count = GetCount(text, AutoDetectWordsHungarian);
                    if (count > bestCount)
                    {
                        languageName = shortName;
                        bestCount    = count;
                    }
                    break;

                case "cs_CZ":     // Czech
                    count = GetCount(text, AutoDetectWordsCzech);
                    if (count > bestCount)
                    {
                        var lithuanianCount = GetCount(text, AutoDetectWordsLithuanian);
                        if (count > lithuanianCount)
                        {
                            languageName = shortName;
                            bestCount    = count;
                        }
                    }
                    break;

                case "sk_SK":     // Slovak
                    count = GetCount(text, AutoDetectWordsSlovak);
                    if (count > bestCount)
                    {
                        languageName = shortName;
                        bestCount    = count;
                    }
                    break;

                case "lv_LV":     // Latvian
                    count = GetCount(text, AutoDetectWordsLatvian);
                    if (count > bestCount)
                    {
                        languageName = shortName;
                        bestCount    = count;
                    }
                    break;

                case "lt_LT":     // Lithuanian
                case "lt":        // Lithuanian (Neutral)
                    count = GetCount(text, AutoDetectWordsLithuanian);
                    if (count > bestCount)
                    {
                        languageName = shortName;
                        bestCount    = count;
                    }
                    break;

                case "hi_IN":     // Hindi
                case "hi":
                    count = GetCount(text, AutoDetectWordsHindi);
                    if (count > bestCount)
                    {
                        languageName = shortName;
                        bestCount    = count;
                    }
                    break;
                }
            }
            return(languageName);
        }