Exemplo n.º 1
0
        private void ReplaceNames1Remove(List <string> namesEtc, List <string> replaceIds, List <string> replaceNames, List <string> originalNames)
        {
            if (Post.StartsWith('.'))
            {
                StrippedText += ".";
                Post          = Post.Remove(0, 1);
            }

            string lower = StrippedText.ToLower();

            foreach (string name in namesEtc)
            {
                int start = lower.IndexOf(name, StringComparison.OrdinalIgnoreCase);
                while (start >= 0 && start < lower.Length)
                {
                    bool startOk = (start == 0) || (lower[start - 1] == ' ') || (lower[start - 1] == '-') ||
                                   (lower[start - 1] == '"') || (lower[start - 1] == '\'') || (lower[start - 1] == '>') ||
                                   Environment.NewLine.EndsWith(lower[start - 1]);

                    if (startOk && string.CompareOrdinal(name, "Don") == 0 && lower.Substring(start).StartsWith("don't"))
                    {
                        startOk = false;
                    }

                    if (startOk)
                    {
                        int  end   = start + name.Length;
                        bool endOk = end <= lower.Length;
                        if (endOk)
                        {
                            endOk = end == lower.Length || (@" ,.!?:;')- <""" + Environment.NewLine).Contains(lower[end]);
                        }

                        if (endOk && StrippedText.Length >= start + name.Length)
                        {
                            string originalName = StrippedText.Substring(start, name.Length);
                            originalNames.Add(originalName);
                            StrippedText = StrippedText.Remove(start, name.Length);
                            StrippedText = StrippedText.Insert(start, GetAndInsertNextId(replaceIds, replaceNames, name));
                            lower        = StrippedText.ToLower();
                        }
                    }
                    if (start + 3 > lower.Length)
                    {
                        start = lower.Length + 1;
                    }
                    else
                    {
                        start = lower.IndexOf(name.ToLower(), start + 3, StringComparison.Ordinal);
                    }
                }
            }

            if (StrippedText.EndsWith('.'))
            {
                Post         = "." + Post;
                StrippedText = StrippedText.TrimEnd('.');
            }
        }
Exemplo n.º 2
0
        private bool ShouldStartWithUpperCase(string lastLine, double millisecondsgaps)
        {
            // do not capitalize url
            if (StrippedText.StartsWith("www.", StringComparison.OrdinalIgnoreCase) || StrippedText.StartsWith("http", StringComparison.OrdinalIgnoreCase))
            {
                return(false);
            }

            // do not capitalize word like iPhone
            if (StrippedText.Length > 1 && StrippedText[0] == 'i' && char.IsUpper(StrippedText[1]))
            {
                return(false);
            }

            // shouldn't capitalize current line not closed
            if (Pre.Contains("...") || Pre.Contains("…"))
            {
                return(false);
            }

            // too much gaps between lines, so should be considered as closed
            if (millisecondsgaps > 5000)
            {
                return(true);
            }

            var preLine = HtmlUtil.RemoveHtmlTags(lastLine).TrimEnd().TrimEnd('\"', '”').TrimEnd();

            // check if previous line was fully closed
            if (string.IsNullOrEmpty(preLine))
            {
                return(true);
            }

            char lastChar = preLine[preLine.Length - 1];

            if (lastChar == '♪')
            {
                string tempPreLine = preLine.Substring(0, preLine.Length - 1).TrimEnd();
                // update last char
                if (tempPreLine.Length > 0)
                {
                    lastChar = tempPreLine[tempPreLine.Length - 1];
                }
            }
            if (lastChar == '.' || lastChar == '!' || lastChar == '?' || lastChar == ']' || lastChar == ')' || lastChar == ':' || lastChar == '_')
            {
                return(true);
            }

            // previous line ends with music symbol but current line doesn't contains any music symbol
            if ((preLine.EndsWith('♪') || preLine.EndsWith('♫')) && !Pre.Contains(new[] { '♪', '♫' }))
            {
                return(true);
            }

            // do not capitalize
            return(false);
        }
Exemplo n.º 3
0
        public void FixCasing(List <string> namesEtc, bool changeNameCases, bool makeUppercaseAfterBreak, bool checkLastLine, string lastLine)
        {
            var replaceIds    = new List <string>();
            var replaceNames  = new List <string>();
            var originalNames = new List <string>();

            ReplaceNames1Remove(namesEtc, replaceIds, replaceNames, originalNames);

            if (checkLastLine)
            {
                string s = HtmlUtil.RemoveHtmlTags(lastLine).TrimEnd().TrimEnd('\"').TrimEnd();

                bool startWithUppercase = string.IsNullOrEmpty(s) ||
                                          s.EndsWith('.') ||
                                          s.EndsWith('!') ||
                                          s.EndsWith('?') ||
                                          s.EndsWith(". ♪", StringComparison.Ordinal) ||
                                          s.EndsWith("! ♪", StringComparison.Ordinal) ||
                                          s.EndsWith("? ♪", StringComparison.Ordinal) ||
                                          s.EndsWith(']') ||
                                          s.EndsWith(')') ||
                                          s.EndsWith(':');

                // start with uppercase after music symbol - but only if next line does not start with music symbol
                if (!startWithUppercase && (s.EndsWith('♪') || s.EndsWith('♫')))
                {
                    if (!Pre.Contains(new[] { '♪', '♫' }))
                    {
                        startWithUppercase = true;
                    }
                }

                if (startWithUppercase && StrippedText.Length > 0 && !Pre.Contains("..."))
                {
                    StrippedText = char.ToUpper(StrippedText[0]) + StrippedText.Substring(1);
                }
            }

            if (makeUppercaseAfterBreak && StrippedText.Contains(ExpectedCharsArray))
            {
                const string breakAfterChars = @".!?:;)]}([{";
                const string ExpectedChars   = "\"`´'()<>!?.- \r\n";
                var          sb           = new StringBuilder();
                bool         lastWasBreak = false;
                for (int i = 0; i < StrippedText.Length; i++)
                {
                    var s = StrippedText[i];
                    if (lastWasBreak)
                    {
                        if (ExpectedChars.Contains(s))
                        {
                            sb.Append(s);
                        }
                        else if ((sb.EndsWith('<') || sb.ToString().EndsWith("</", StringComparison.Ordinal)) && i + 1 < StrippedText.Length && StrippedText[i + 1] == '>')
                        { // tags
                            sb.Append(s);
                        }
                        else if (sb.EndsWith('<') && s == '/' && i + 2 < StrippedText.Length && StrippedText[i + 2] == '>')
                        { // tags
                            sb.Append(s);
                        }
                        else if (sb.ToString().EndsWith("... ", StringComparison.Ordinal))
                        {
                            sb.Append(s);
                            lastWasBreak = false;
                        }
                        else
                        {
                            if (breakAfterChars.Contains(s))
                            {
                                sb.Append(s);
                            }
                            else
                            {
                                lastWasBreak = false;
                                sb.Append(char.ToUpper(s));
                            }
                        }
                    }
                    else
                    {
                        sb.Append(s);
                        if (breakAfterChars.Contains(s))
                        {
                            var idx = sb.ToString().IndexOf('[');
                            if (s == ']' && idx > 1)
                            { // I [Motor roaring] love you!
                                string temp = sb.ToString(0, idx - 1).Trim();
                                if (temp.Length > 0 && !Utilities.LowercaseLetters.Contains(temp[temp.Length - 1]))
                                {
                                    lastWasBreak = true;
                                }
                            }
                            else
                            {
                                lastWasBreak = true;
                            }
                        }
                    }
                }
                StrippedText = sb.ToString();
            }

            if (changeNameCases)
            {
                ReplaceNames2Fix(replaceIds, replaceNames);
            }
            else
            {
                ReplaceNames2Fix(replaceIds, originalNames);
            }
        }
Exemplo n.º 4
0
        public void FixCasing(List <string> nameList, bool changeNameCases, bool makeUppercaseAfterBreak, bool checkLastLine, string lastLine, double millisecondsFromLast = 0)
        {
            var replaceIds    = new List <string>();
            var replaceNames  = new List <string>();
            var originalNames = new List <string>();

            ReplaceNames1Remove(nameList, replaceIds, replaceNames, originalNames);

            if (checkLastLine && ShouldStartWithUpperCase(lastLine, millisecondsFromLast))
            {
                if (StrippedText.StartsWith("_@", StringComparison.Ordinal))
                {
                    for (int i = 0; i < replaceIds.Count; i++)
                    {
                        string id = $"_@{i}_";
                        if (StrippedText.StartsWith(id, StringComparison.Ordinal))
                        {
                            if (!string.IsNullOrEmpty(originalNames[i]))
                            {
                                originalNames[i] = originalNames[i].CapitalizeFirstLetter();
                            }

                            break;
                        }
                    }
                }
                else
                {
                    StrippedText = StrippedText.CapitalizeFirstLetter();
                }
            }

            if (makeUppercaseAfterBreak && StrippedText.Contains(ExpectedCharsArray))
            {
                const string breakAfterChars = @".!?:;)]}([{";
                const string expectedChars   = "\"“`´'()<>!?.- \r\n";
                var          sb           = new StringBuilder(StrippedText.Length);
                bool         lastWasBreak = false;
                for (int i = 0; i < StrippedText.Length; i++)
                {
                    var s = StrippedText[i];
                    if (lastWasBreak)
                    {
                        if (expectedChars.Contains(s))
                        {
                            sb.Append(s);
                        }
                        else if ((sb.EndsWith('<') || sb.ToString().EndsWith("</", StringComparison.Ordinal)) && i + 1 < StrippedText.Length && StrippedText[i + 1] == '>')
                        { // tags
                            sb.Append(s);
                        }
                        else if (sb.EndsWith('<') && s == '/' && i + 2 < StrippedText.Length && StrippedText[i + 2] == '>')
                        { // tags
                            sb.Append(s);
                        }
                        else if (sb.ToString().EndsWith("... ", StringComparison.Ordinal))
                        {
                            sb.Append(s);
                            lastWasBreak = false;
                        }
                        else
                        {
                            if (breakAfterChars.Contains(s))
                            {
                                sb.Append(s);
                            }
                            else
                            {
                                lastWasBreak = false;
                                sb.Append(char.ToUpper(s));

                                if (StrippedText.Substring(i).StartsWith("_@", StringComparison.Ordinal))
                                {
                                    var ks = StrippedText.Substring(i);
                                    for (int k = 0; k < replaceIds.Count; k++)
                                    {
                                        string id = $"_@{k}_";
                                        if (ks.StartsWith(id, StringComparison.Ordinal))
                                        {
                                            if (!string.IsNullOrEmpty(originalNames[k]))
                                            {
                                                originalNames[k] = char.ToUpper(originalNames[k][0]) + originalNames[k].Remove(0, 1);
                                            }

                                            break;
                                        }
                                    }
                                }
                            }
                        }
                    }
                    else
                    {
                        sb.Append(s);
                        if (breakAfterChars.Contains(s))
                        {
                            var idx = sb.ToString().IndexOf('[');
                            if (s == ']' && idx > 1)
                            { // I [Motor roaring] love you!
                                string temp = sb.ToString(0, idx - 1).Trim();
                                if (temp.Length > 0 && !char.IsLetterOrDigit(temp[temp.Length - 1]))
                                {
                                    lastWasBreak = true;
                                }
                            }
                            else if (s == ']' && idx == -1 && Pre.Contains('['))
                            { // [ Motor roaring ] Hallo!
                                lastWasBreak = true;
                            }
                            else if (s == ':') // seems to be the rule (in subtitles) to nearly always capitalize first letter efter semicolon
                            {
                                lastWasBreak = true;
                            }
                            else
                            {
                                idx = sb.ToString().LastIndexOf(' ');
                                if (idx >= 0 && idx < sb.Length - 2 && !IsInMiddleOfUrl(i - idx, StrippedText.Substring(idx + 1)))
                                {
                                    lastWasBreak = true;
                                }
                                else if (StrippedText.Length > i + 1 && " \r\n".Contains(StrippedText[i + 1]))
                                {
                                    lastWasBreak = true;
                                }
                            }
                        }
                        else if (s == '-' && Pre.Contains('-'))
                        {
                            if (sb.ToString().EndsWith(Environment.NewLine + "-"))
                            {
                                var prevLine = HtmlUtil.RemoveHtmlTags(sb.ToString().Substring(0, sb.Length - 2).TrimEnd());
                                if (prevLine.EndsWith('.') ||
                                    prevLine.EndsWith('!') ||
                                    prevLine.EndsWith('?') ||
                                    prevLine.EndsWith(". ♪", StringComparison.Ordinal) ||
                                    prevLine.EndsWith("! ♪", StringComparison.Ordinal) ||
                                    prevLine.EndsWith("? ♪", StringComparison.Ordinal) ||
                                    prevLine.EndsWith(']') ||
                                    prevLine.EndsWith(')') ||
                                    prevLine.EndsWith(':'))
                                {
                                    lastWasBreak = true;
                                }
                            }
                        }
                    }
                }
                StrippedText = sb.ToString();
            }

            ReplaceNames2Fix(replaceIds, changeNameCases ? replaceNames : originalNames);
        }
Exemplo n.º 5
0
        public void FixCasing(List <string> namesEtc, bool changeNameCases, bool makeUppercaseAfterBreak, bool checkLastLine, string lastLine)
        {
            var replaceIds    = new List <string>();
            var replaceNames  = new List <string>();
            var originalNames = new List <string>();

            ReplaceNames1Remove(namesEtc, replaceIds, replaceNames, originalNames);

            if (checkLastLine)
            {
                string s = Utilities.RemoveHtmlTags(lastLine).TrimEnd().TrimEnd('\"').TrimEnd();

                bool startWithUppercase = string.IsNullOrEmpty(s) ||
                                          s.EndsWith('.') ||
                                          s.EndsWith('!') ||
                                          s.EndsWith('?') ||
                                          s.EndsWith(". ♪", StringComparison.Ordinal) ||
                                          s.EndsWith("! ♪", StringComparison.Ordinal) ||
                                          s.EndsWith("? ♪", StringComparison.Ordinal) ||
                                          s.EndsWith(']') ||
                                          s.EndsWith(')') ||
                                          s.EndsWith(':');

                // start with uppercase after music symbol - but only if next line does not start with music symbol
                if (!startWithUppercase && (s.EndsWith('♪') || s.EndsWith('♫')))
                {
                    if (!Pre.Contains("♪") && !Pre.Contains("♫"))
                    {
                        startWithUppercase = true;
                    }
                }

                if (startWithUppercase && StrippedText.Length > 0 && !Pre.Contains("..."))
                {
                    StrippedText = StrippedText.Remove(0, 1).Insert(0, StrippedText[0].ToString().ToUpper());
                }
            }

            if (makeUppercaseAfterBreak &&
                (StrippedText.Contains(".") ||
                 StrippedText.Contains("!") ||
                 StrippedText.Contains("?") ||
                 StrippedText.Contains(":") ||
                 StrippedText.Contains(";") ||
                 StrippedText.Contains(")") ||
                 StrippedText.Contains("]") ||
                 StrippedText.Contains("}") ||
                 StrippedText.Contains("(") ||
                 StrippedText.Contains("[") ||
                 StrippedText.Contains("{")))
            {
                var  sb           = new StringBuilder();
                bool lastWasBreak = false;
                for (int i = 0; i < StrippedText.Length; i++)
                {
                    string s = StrippedText[i].ToString();
                    if (lastWasBreak)
                    {
                        if (("\"`´'()<>!?.- " + Environment.NewLine).Contains(s))
                        {
                            sb.Append(s);
                        }
                        else if ((sb.ToString().EndsWith('<') || sb.ToString().EndsWith("</", StringComparison.Ordinal)) && i + 1 < StrippedText.Length && StrippedText[i + 1] == '>')
                        { // tags
                            sb.Append(s);
                        }
                        else if (sb.ToString().EndsWith('<') && s == "/" && i + 2 < StrippedText.Length && StrippedText[i + 2] == '>')
                        { // tags
                            sb.Append(s);
                        }
                        else if (sb.ToString().EndsWith("... ", StringComparison.Ordinal))
                        {
                            sb.Append(s);
                            lastWasBreak = false;
                        }
                        else
                        {
                            if (".!?:;)]}([{".Contains(s))
                            {
                                sb.Append(s);
                            }
                            else
                            {
                                lastWasBreak = false;
                                sb.Append(s.ToUpper());
                            }
                        }
                    }
                    else
                    {
                        sb.Append(s);
                        if (".!?:;)]}([{".Contains(s))
                        {
                            if (s == "]" && sb.ToString().IndexOf('[') > 1)
                            { // I [Motor roaring] love you!
                                string temp = sb.ToString().Substring(0, sb.ToString().IndexOf('[') - 1).Trim();
                                if (temp.Length > 0 && !Utilities.LowercaseLetters.Contains(temp[temp.Length - 1].ToString()))
                                {
                                    lastWasBreak = true;
                                }
                            }
                            else
                            {
                                lastWasBreak = true;
                            }
                        }
                    }
                }
                StrippedText = sb.ToString();
            }

            if (changeNameCases)
            {
                ReplaceNames2Fix(replaceIds, replaceNames);
            }
            else
            {
                ReplaceNames2Fix(replaceIds, originalNames);
            }
        }