예제 #1
0
        private void FindAllNames()
        {
            string language = LanguageAutoDetect.AutoDetectGoogleLanguage(_subtitle);

            if (string.IsNullOrEmpty(language))
            {
                language = "en_US";
            }

            var namesList = new NamesList(Configuration.DictionariesDirectory, language, Configuration.Settings.WordLists.UseOnlineNames, Configuration.Settings.WordLists.NamesUrl);

            // Will contains both one word names and multi names
            var namesEtcList = namesList.GetAllNames();

            string text        = HtmlUtil.RemoveHtmlTags(_subtitle.GetAllTexts());
            string textToLower = text.ToLower();

            listViewNames.BeginUpdate();
            foreach (string name in namesEtcList)
            {
                int startIndex = textToLower.IndexOf(name.ToLower(), StringComparison.Ordinal);
                if (startIndex >= 0)
                {
                    while (startIndex >= 0 && startIndex < text.Length &&
                           textToLower.Substring(startIndex).Contains(name.ToLower()) && name.Length > 1 && name != name.ToLower())
                    {
                        bool startOk = (startIndex == 0) || (text[startIndex - 1] == ' ') || (text[startIndex - 1] == '-') ||
                                       (text[startIndex - 1] == '"') || (text[startIndex - 1] == '\'') || (text[startIndex - 1] == '>') ||
                                       (Environment.NewLine.EndsWith(text[startIndex - 1].ToString(CultureInfo.InvariantCulture)));

                        if (startOk)
                        {
                            int  end   = startIndex + name.Length;
                            bool endOk = end <= text.Length;
                            if (endOk)
                            {
                                endOk = end == text.Length || ExpectedEndChars.Contains(text[end]);
                            }

                            if (endOk && text.Substring(startIndex, name.Length) != name) // do not add names where casing already is correct
                            {
                                if (!_usedNames.Contains(name))
                                {
                                    _usedNames.Add(name);
                                    AddToListViewNames(name);
                                    break; // break while
                                }
                            }
                        }

                        startIndex = textToLower.IndexOf(name.ToLower(), startIndex + 2, StringComparison.Ordinal);
                    }
                }
            }
            listViewNames.EndUpdate();
            groupBoxNames.Text = string.Format(Configuration.Settings.Language.ChangeCasingNames.NamesFoundInSubtitleX, listViewNames.Items.Count);
        }
예제 #2
0
        private void MakeSureNamesListIsLoaded()
        {
            if (_namesEtcList == null)
            {
                _namesEtcList = new List <string>();
                string languageTwoLetterCode = LanguageAutoDetect.AutoDetectGoogleLanguage(_subtitle);

                // Will contains both one word names and multi names
                var namesList = new NamesList(Configuration.DictionariesFolder, languageTwoLetterCode, Configuration.Settings.WordLists.UseOnlineNamesEtc, Configuration.Settings.WordLists.NamesEtcUrl);
                _namesEtcList = namesList.GetAllNames();
            }
        }
예제 #3
0
        internal void FixCasing(Subtitle subtitle, string language)
        {
            var namesList = new NamesList(Configuration.DictionariesFolder, language, Configuration.Settings.WordLists.UseOnlineNamesEtc, Configuration.Settings.WordLists.NamesEtcUrl);
            var namesEtc  = namesList.GetAllNames();

            // Longer names must be first
            namesEtc.Sort((s1, s2) => s2.Length.CompareTo(s1.Length));

            string lastLine = string.Empty;

            foreach (Paragraph p in subtitle.Paragraphs)
            {
                p.Text = FixCasing(p.Text, lastLine, namesEtc);

                // fix casing of English alone i to I
                if (radioButtonNormal.Checked && language.StartsWith("en") && p.Text.Contains('i'))
                {
                    Match match = AloneI.Match(p.Text);
                    while (match.Success)
                    {
                        if (p.Text[match.Index] == 'i')
                        {
                            string prev = string.Empty;
                            string next = string.Empty;
                            if (match.Index > 0)
                            {
                                prev = p.Text[match.Index - 1].ToString(CultureInfo.InvariantCulture);
                            }
                            if (match.Index + 1 < p.Text.Length)
                            {
                                next = p.Text[match.Index + 1].ToString(CultureInfo.InvariantCulture);
                            }
                            if (prev != ">" && next != ">")
                            {
                                string oldText = p.Text;
                                p.Text = p.Text.Substring(0, match.Index) + "I";
                                if (match.Index + 1 < oldText.Length)
                                {
                                    p.Text += oldText.Substring(match.Index + 1);
                                }
                            }
                        }
                        match = match.NextMatch();
                    }
                }

                lastLine = p.Text;
            }
        }
예제 #4
0
        public void Initialize(Subtitle subtitle)
        {
            _subtitle = subtitle;

            string language = LanguageAutoDetect.AutoDetectGoogleLanguage(_subtitle);

            if (string.IsNullOrEmpty(language))
            {
                language = "en_US";
            }

            _namesList          = new NamesList(Configuration.DictionariesDirectory, language, Configuration.Settings.WordLists.UseOnlineNames, Configuration.Settings.WordLists.NamesUrl);
            _namesListInclMulti = _namesList.GetAllNames(); // Will contains both one word names and multi names

            FindAllNames();
            GeneratePreview();
        }
예제 #5
0
        internal void FixCasing(Subtitle subtitle, string language)
        {
            var namesList = new NamesList(Configuration.DictionariesFolder, language, Configuration.Settings.WordLists.UseOnlineNamesEtc, Configuration.Settings.WordLists.NamesEtcUrl);
            var namesEtc  = namesList.GetAllNames();

            // Longer names must be first
            namesEtc.Sort((s1, s2) => s2.Length.CompareTo(s1.Length));

            string lastLine = string.Empty;

            foreach (Paragraph p in subtitle.Paragraphs)
            {
                p.Text = FixCasing(p.Text, lastLine, namesEtc);

                // fix casing of English alone i to I
                if (radioButtonNormal.Checked && language.StartsWith("en", StringComparison.Ordinal))
                {
                    p.Text = FixEnglishAloneILowerToUpper(p.Text);
                }

                lastLine = p.Text;
            }
        }
예제 #6
0
        private void FindAllNames()
        {
            string language = Utilities.AutoDetectLanguageName("en_US", _subtitle);

            if (string.IsNullOrEmpty(language))
            {
                language = "en_US";
            }

            var namesList = new NamesList(Configuration.DictionariesFolder, language, Configuration.Settings.WordLists.UseOnlineNamesEtc, Configuration.Settings.WordLists.NamesEtcUrl);

            // Will contains both one word names and multi names
            var namesEtcList = namesList.GetAllNames();

            if (language.StartsWith("en"))
            {
                if (namesEtcList.Contains("Black"))
                {
                    namesEtcList.Remove("Black");
                }
                if (namesEtcList.Contains("Bill"))
                {
                    namesEtcList.Remove("Bill");
                }
                if (namesEtcList.Contains("Long"))
                {
                    namesEtcList.Remove("Long");
                }
                if (namesEtcList.Contains("Don"))
                {
                    namesEtcList.Remove("Don");
                }
            }

            var sb = new StringBuilder();

            foreach (Paragraph p in _subtitle.Paragraphs)
            {
                sb.AppendLine(p.Text);
            }
            string text        = HtmlUtil.RemoveHtmlTags(sb.ToString());
            string textToLower = text.ToLower();

            foreach (string name in namesEtcList)
            {
                int startIndex = textToLower.IndexOf(name.ToLower(), StringComparison.Ordinal);
                if (startIndex >= 0)
                {
                    while (startIndex >= 0 && startIndex < text.Length &&
                           textToLower.Substring(startIndex).Contains(name.ToLower()) && name.Length > 1 && name != name.ToLower())
                    {
                        bool startOk = (startIndex == 0) || (text[startIndex - 1] == ' ') || (text[startIndex - 1] == '-') ||
                                       (text[startIndex - 1] == '"') || (text[startIndex - 1] == '\'') || (text[startIndex - 1] == '>') ||
                                       (Environment.NewLine.EndsWith(text[startIndex - 1].ToString(CultureInfo.InvariantCulture)));

                        if (startOk)
                        {
                            int  end   = startIndex + name.Length;
                            bool endOk = end <= text.Length;
                            if (endOk)
                            {
                                endOk = end == text.Length || (@" ,.!?:;')-<""" + Environment.NewLine).Contains(text[end]);
                            }

                            if (endOk && text.Substring(startIndex, name.Length) != name) // do not add names where casing already is correct
                            {
                                if (!_usedNames.Contains(name))
                                {
                                    _usedNames.Add(name);
                                    AddToListViewNames(name);
                                    break; // break while
                                }
                            }
                        }

                        startIndex = textToLower.IndexOf(name.ToLower(), startIndex + 2, StringComparison.Ordinal);
                    }
                }
            }
            groupBoxNames.Text = string.Format(Configuration.Settings.Language.ChangeCasingNames.NamesFoundInSubtitleX, listViewNames.Items.Count);
        }