public void FixCasing(List <string> nameList, bool changeNameCases, bool makeUppercaseAfterBreak, bool checkLastLine, string lastLine) { var replaceIds = new List <string>(); var replaceNames = new List <string>(); var originalNames = new List <string>(); ReplaceNames1Remove(nameList, replaceIds, replaceNames, originalNames); if (checkLastLine) { string s = HtmlUtil.RemoveHtmlTags(lastLine).TrimEnd().TrimEnd('\"').TrimEnd(); bool startWithUppercase = string.IsNullOrEmpty(s) || s.EndsWith('.') || s.EndsWith('!') || s.EndsWith('?') || s.EndsWith(". ♪", StringComparison.Ordinal) || s.EndsWith("! ♪", StringComparison.Ordinal) || s.EndsWith("? ♪", StringComparison.Ordinal) || s.EndsWith(']') || s.EndsWith(')') || s.EndsWith(':'); // start with uppercase after music symbol - but only if next line does not start with music symbol if (!startWithUppercase && (s.EndsWith('♪') || s.EndsWith('♫'))) { if (!Pre.Contains(new[] { '♪', '♫' })) { startWithUppercase = true; } } if (startWithUppercase && StrippedText.Length > 0 && !Pre.Contains("...")) { if (!StrippedText.StartsWith("www.", StringComparison.OrdinalIgnoreCase) && !StrippedText.StartsWith("http", StringComparison.OrdinalIgnoreCase)) { StrippedText = char.ToUpper(StrippedText[0]) + StrippedText.Substring(1); if (StrippedText.StartsWith("_@", StringComparison.Ordinal)) { for (int i = 0; i < replaceIds.Count; i++) { string id = $"_@{i}_"; if (StrippedText.StartsWith(id, StringComparison.Ordinal)) { if (!string.IsNullOrEmpty(originalNames[i])) { originalNames[i] = char.ToUpper(originalNames[i][0]) + originalNames[i].Remove(0, 1); } break; } } } } } } if (makeUppercaseAfterBreak && StrippedText.Contains(ExpectedCharsArray)) { const string breakAfterChars = @".!?:;)]}([{"; const string expectedChars = "\"`´'()<>!?.- \r\n"; var sb = new StringBuilder(StrippedText.Length); bool lastWasBreak = false; for (int i = 0; i < StrippedText.Length; i++) { var s = StrippedText[i]; if (lastWasBreak) { if (expectedChars.Contains(s)) { sb.Append(s); } else if ((sb.EndsWith('<') || sb.ToString().EndsWith("</", StringComparison.Ordinal)) && i + 1 < StrippedText.Length && StrippedText[i + 1] == '>') { // tags sb.Append(s); } else if (sb.EndsWith('<') && s == '/' && i + 2 < StrippedText.Length && StrippedText[i + 2] == '>') { // tags sb.Append(s); } else if (sb.ToString().EndsWith("... ", StringComparison.Ordinal)) { sb.Append(s); lastWasBreak = false; } else { if (breakAfterChars.Contains(s)) { sb.Append(s); } else { lastWasBreak = false; sb.Append(char.ToUpper(s)); if (StrippedText.Substring(i).StartsWith("_@", StringComparison.Ordinal)) { var ks = StrippedText.Substring(i); for (int k = 0; k < replaceIds.Count; k++) { string id = $"_@{k}_"; if (ks.StartsWith(id, StringComparison.Ordinal)) { if (!string.IsNullOrEmpty(originalNames[k])) { originalNames[k] = char.ToUpper(originalNames[k][0]) + originalNames[k].Remove(0, 1); } break; } } } } } } else { sb.Append(s); if (breakAfterChars.Contains(s)) { var idx = sb.ToString().IndexOf('['); if (s == ']' && idx > 1) { // I [Motor roaring] love you! string temp = sb.ToString(0, idx - 1).Trim(); if (temp.Length > 0 && !char.IsLetterOrDigit(temp[temp.Length - 1])) { lastWasBreak = true; } } else if (s == ']' && idx == -1 && Pre.Contains('[')) { // [ Motor roaring ] Hallo! lastWasBreak = true; } else if (s == ':') // seems to be the rule (in subtitles) to nearly always capitalize first letter efter semicolon { lastWasBreak = true; } else { idx = sb.ToString().LastIndexOf(' '); if (idx >= 0 && idx < sb.Length - 2 && !IsInMiddleOfUrl(i - idx, StrippedText.Substring(idx + 1))) { lastWasBreak = true; } } } else if (s == '-' && Pre.Contains("-")) { if (sb.ToString().EndsWith(Environment.NewLine + "-")) { var prevLine = HtmlUtil.RemoveHtmlTags(sb.ToString().Substring(0, sb.Length - 2).TrimEnd()); if (prevLine.EndsWith('.') || prevLine.EndsWith('!') || prevLine.EndsWith('?') || prevLine.EndsWith(". ♪", StringComparison.Ordinal) || prevLine.EndsWith("! ♪", StringComparison.Ordinal) || prevLine.EndsWith("? ♪", StringComparison.Ordinal) || prevLine.EndsWith(']') || prevLine.EndsWith(')') || prevLine.EndsWith(':')) { lastWasBreak = true; } } } } } StrippedText = sb.ToString(); } ReplaceNames2Fix(replaceIds, changeNameCases ? replaceNames : originalNames); }
public void FixCasing(List <string> namesEtc, bool changeNameCases, bool makeUppercaseAfterBreak, bool checkLastLine, string lastLine) { var replaceIds = new List <string>(); var replaceNames = new List <string>(); var originalNames = new List <string>(); ReplaceNames1Remove(namesEtc, replaceIds, replaceNames, originalNames); if (checkLastLine) { string s = HtmlUtil.RemoveHtmlTags(lastLine).TrimEnd().TrimEnd('\"').TrimEnd(); bool startWithUppercase = string.IsNullOrEmpty(s) || s.EndsWith('.') || s.EndsWith('!') || s.EndsWith('?') || s.EndsWith(". ♪", StringComparison.Ordinal) || s.EndsWith("! ♪", StringComparison.Ordinal) || s.EndsWith("? ♪", StringComparison.Ordinal) || s.EndsWith(']') || s.EndsWith(')') || s.EndsWith(':'); // start with uppercase after music symbol - but only if next line does not start with music symbol if (!startWithUppercase && (s.EndsWith('♪') || s.EndsWith('♫'))) { if (!Pre.Contains(new[] { '♪', '♫' })) { startWithUppercase = true; } } if (startWithUppercase && StrippedText.Length > 0 && !Pre.Contains("...")) { StrippedText = char.ToUpper(StrippedText[0]) + StrippedText.Substring(1); } } if (makeUppercaseAfterBreak && StrippedText.Contains(new[] { '.', '!', '?', ':', ';', ')', ']', '}', '(', '[', '{' })) { const string breakAfterChars = @".!?:;)]}([{"; var sb = new StringBuilder(); bool lastWasBreak = false; for (int i = 0; i < StrippedText.Length; i++) { var s = StrippedText[i]; if (lastWasBreak) { if (("\"`´'()<>!?.- " + Environment.NewLine).Contains(s)) { sb.Append(s); } else if ((sb.EndsWith('<') || sb.ToString().EndsWith("</", StringComparison.Ordinal)) && i + 1 < StrippedText.Length && StrippedText[i + 1] == '>') { // tags sb.Append(s); } else if (sb.EndsWith('<') && s == '/' && i + 2 < StrippedText.Length && StrippedText[i + 2] == '>') { // tags sb.Append(s); } else if (sb.ToString().EndsWith("... ", StringComparison.Ordinal)) { sb.Append(s); lastWasBreak = false; } else { if (breakAfterChars.Contains(s)) { sb.Append(s); } else { lastWasBreak = false; sb.Append(char.ToUpper(s)); } } } else { sb.Append(s); if (breakAfterChars.Contains(s)) { var idx = sb.ToString().IndexOf('['); if (s == ']' && idx > 1) { // I [Motor roaring] love you! string temp = sb.ToString(0, idx - 1).Trim(); if (temp.Length > 0 && !Utilities.LowercaseLetters.Contains(temp[temp.Length - 1])) { lastWasBreak = true; } } else { lastWasBreak = true; } } } } StrippedText = sb.ToString(); } if (changeNameCases) { ReplaceNames2Fix(replaceIds, replaceNames); } else { ReplaceNames2Fix(replaceIds, originalNames); } }
private static List <Paragraph> MakeHorizontalParagraphs(Paragraph p, int width, int height) { var lines = p.Text.SplitToLines(); var adjustment = 34; var startY = height - lines.Count * 2 * adjustment + 30; if (p.Text.StartsWith("{\\an8", StringComparison.Ordinal)) { startY = 40; } var list = new List <Paragraph>(); var furiganaList = new List <Paragraph>(); var rubyOn = false; var italinOn = false; int startX; using (var g = Graphics.FromHwnd(IntPtr.Zero)) { var actualText = NetflixImsc11Japanese.RemoveBoutens(HtmlUtil.RemoveHtmlTags(p.Text, true)); var actualTextSize = g.MeasureString(actualText, new Font(SystemFonts.DefaultFont.FontFamily, 20)); startX = (int)(width / 2.0 - actualTextSize.Width / 2.0); if (p.Text.StartsWith("{\\an5", StringComparison.Ordinal)) { startY = (int)(height / 2.0 - actualTextSize.Height / 2.0); } } for (var index = 0; index < lines.Count; index++) { var line = lines[index]; if (italinOn) { line = "<i>" + line; } var actual = new StringBuilder(); int i = 0; while (i < line.Length) { if (line.Substring(i).StartsWith("{\\")) { var end = line.IndexOf('}', i); if (end < 0) { break; } i = end + 1; } else if (line.Substring(i).StartsWith("<i>", StringComparison.Ordinal) || line.Substring(i).StartsWith("<u>", StringComparison.Ordinal) || line.Substring(i).StartsWith("<b>", StringComparison.Ordinal)) { actual.Append("{\\i1}"); i += 3; italinOn = true; } else if (line.Substring(i).StartsWith("</i>", StringComparison.Ordinal) || line.Substring(i).StartsWith("</u>", StringComparison.Ordinal) || line.Substring(i).StartsWith("</b>", StringComparison.Ordinal)) { actual.Append("{\\i0}"); i += 4; italinOn = false; } else if (line.Substring(i).StartsWith("<bouten-", StringComparison.Ordinal)) { var end = line.IndexOf('>', i); if (end < 0) { break; } if (end + 1 >= line.Length) { break; } var endTagStart = line.IndexOf("</", end, StringComparison.Ordinal); if (endTagStart < 0) { break; } var tag = line.Substring(i + 1, end - i - 1); var text = line.Substring(end + 1, endTagStart - end - 1); foreach (var ch in text) { var furiganaChar = BoutenTagToUnicode(tag); if (!string.IsNullOrWhiteSpace(furiganaChar)) { furiganaList.Add(new Paragraph($"{{\\alpha&FF&}}{actual}{{\\alpha&0&}}{furiganaChar}", p.StartTime.TotalMilliseconds, p.EndTime.TotalMilliseconds)); } actual.Append(ch); } var endTagEnd = line.IndexOf('>', endTagStart); if (endTagEnd < 0) { break; } i = endTagEnd + 1; } else if (line.Substring(i).StartsWith("<ruby-container>", StringComparison.Ordinal)) { var baseTextStart = line.IndexOf("<ruby-base>", i, StringComparison.Ordinal); var baseTextEnd = line.IndexOf("</ruby-base>", i, StringComparison.Ordinal); if (baseTextStart < 0 || baseTextEnd < 0) { baseTextStart = line.IndexOf("<ruby-base-italic>", i, StringComparison.Ordinal); baseTextEnd = line.IndexOf("</ruby-base-italic>", i, StringComparison.Ordinal); if (baseTextStart < 0 || baseTextEnd < 0) { break; } } baseTextStart += "<ruby-base>".Length; var baseText = line.Substring(baseTextStart, baseTextEnd - baseTextStart); var extraText = string.Empty; var extraTextStart = line.IndexOf("<ruby-text>", i, StringComparison.Ordinal); var extraTextEnd = line.IndexOf("</ruby-text>", i, StringComparison.Ordinal); if (extraTextStart >= 0 || extraTextEnd >= 0 && extraTextStart < extraTextEnd) { extraTextStart += "<ruby-text>".Length; extraText = line.Substring(extraTextStart, extraTextEnd - extraTextStart); } if (string.IsNullOrEmpty(extraText)) { extraTextStart = line.IndexOf("<ruby-text-italic>", i, StringComparison.Ordinal); extraTextEnd = line.IndexOf("</ruby-text-italic>", i, StringComparison.Ordinal); if (extraTextStart >= 0 || extraTextEnd >= 0 && extraTextStart < extraTextEnd) { extraTextStart += "<ruby-text-italic>".Length; extraText = line.Substring(extraTextStart, extraTextEnd - extraTextStart); } } var extraTextAfter = string.Empty; var extraTextStartAfter = line.IndexOf("<ruby-text-after>", i, StringComparison.Ordinal); var extraTextEndAfter = line.IndexOf("</ruby-text-after>", i, StringComparison.Ordinal); if (extraTextStartAfter >= 0 || extraTextEndAfter >= 0 && extraTextStartAfter < extraTextEndAfter) { extraTextStartAfter += "<ruby-text-after>".Length; extraText = line.Substring(extraTextStartAfter, extraTextEndAfter - extraTextStartAfter); } var preFurigana = string.Empty; if (actual.Length > 0) { preFurigana = $"{{\\alpha&FF&}}{actual.ToString().TrimEnd()}{{\\alpha&0&}}"; } if (!string.IsNullOrWhiteSpace(extraText)) { furiganaList.Add(new Paragraph($"{preFurigana}{{\\fs20}}{extraText}", p.StartTime.TotalMilliseconds, p.EndTime.TotalMilliseconds)); } if (!string.IsNullOrWhiteSpace(extraTextAfter)) { furiganaList.Add(new Paragraph($"{preFurigana}{{\\fs20}} {extraTextAfter}", p.StartTime.TotalMilliseconds, p.EndTime.TotalMilliseconds)); } actual.Append(baseText); var endTagEnd = line.IndexOf("</ruby-container>", i, StringComparison.Ordinal); if (endTagEnd < 0) { break; } i = endTagEnd + "</ruby-container>".Length; rubyOn = true; } else { actual.Append(line.Substring(i, 1)); i++; } } var actualText = actual.ToString().TrimEnd(); bool displayBefore = lines.Count == 2 && index == 0 || lines.Count == 1; if (displayBefore && furiganaList.Count > 0) { foreach (var fp in furiganaList) { var beforeText = "{\\an1}{\\pos(" + startX + "," + startY + ")}" + fp.Text; list.Add(new Paragraph(beforeText, p.StartTime.TotalMilliseconds, p.EndTime.TotalMilliseconds)); } startY += adjustment; if (rubyOn && index == 0 && lines.Count == 2) { startY += 3; } } actualText = "{\\an1}{\\pos(" + startX + "," + startY + ")}" + actualText; list.Add(new Paragraph(actualText, p.StartTime.TotalMilliseconds, p.EndTime.TotalMilliseconds)); startY += adjustment; if (!displayBefore && furiganaList.Count > 0) { if (rubyOn && index == 1 && lines.Count == 2) { startY = (int)(startY - adjustment * 0.4); } foreach (var fp in furiganaList) { var beforeText = "{\\an1}{\\pos(" + startX + "," + startY + ")}" + fp.Text; list.Add(new Paragraph(beforeText, p.StartTime.TotalMilliseconds, p.EndTime.TotalMilliseconds)); } startY += adjustment; } furiganaList.Clear(); } return(list); }
public void FixCasing(List <string> nameList, bool changeNameCases, bool makeUppercaseAfterBreak, bool checkLastLine, string lastLine) { var replaceIds = new List <string>(); var replaceNames = new List <string>(); var originalNames = new List <string>(); ReplaceNames1Remove(nameList, replaceIds, replaceNames, originalNames); if (checkLastLine) { string s = HtmlUtil.RemoveHtmlTags(lastLine).TrimEnd().TrimEnd('\"').TrimEnd(); bool startWithUppercase = string.IsNullOrEmpty(s) || s.EndsWith('.') || s.EndsWith('!') || s.EndsWith('?') || s.EndsWith(". ♪", StringComparison.Ordinal) || s.EndsWith("! ♪", StringComparison.Ordinal) || s.EndsWith("? ♪", StringComparison.Ordinal) || s.EndsWith(']') || s.EndsWith(')') || s.EndsWith(':'); // start with uppercase after music symbol - but only if next line does not start with music symbol if (!startWithUppercase && (s.EndsWith('♪') || s.EndsWith('♫'))) { if (!Pre.Contains(new[] { '♪', '♫' })) { startWithUppercase = true; } } if (startWithUppercase && StrippedText.Length > 0 && !Pre.Contains("...")) { if (!StrippedText.StartsWith("www.", StringComparison.OrdinalIgnoreCase) && !StrippedText.StartsWith("http", StringComparison.OrdinalIgnoreCase)) { StrippedText = char.ToUpper(StrippedText[0]) + StrippedText.Substring(1); } } } if (makeUppercaseAfterBreak && StrippedText.Contains(ExpectedCharsArray)) { const string breakAfterChars = @".!?:;)]}([{"; const string expectedChars = "\"`´'()<>!?.- \r\n"; var sb = new StringBuilder(); bool lastWasBreak = false; for (int i = 0; i < StrippedText.Length; i++) { var s = StrippedText[i]; if (lastWasBreak) { if (expectedChars.Contains(s)) { sb.Append(s); } else if ((sb.EndsWith('<') || sb.ToString().EndsWith("</", StringComparison.Ordinal)) && i + 1 < StrippedText.Length && StrippedText[i + 1] == '>') { // tags sb.Append(s); } else if (sb.EndsWith('<') && s == '/' && i + 2 < StrippedText.Length && StrippedText[i + 2] == '>') { // tags sb.Append(s); } else if (sb.ToString().EndsWith("... ", StringComparison.Ordinal)) { sb.Append(s); lastWasBreak = false; } else { if (breakAfterChars.Contains(s)) { sb.Append(s); } else { lastWasBreak = false; sb.Append(char.ToUpper(s)); } } } else { sb.Append(s); if (breakAfterChars.Contains(s)) { var idx = sb.ToString().IndexOf('['); if (s == ']' && idx > 1) { // I [Motor roaring] love you! string temp = sb.ToString(0, idx - 1).Trim(); if (temp.Length > 0 && !char.IsLower(temp[temp.Length - 1])) { lastWasBreak = true; } } else { idx = sb.ToString().LastIndexOf(' '); if (idx >= 0 && idx < sb.Length - 2 && !IsInMiddleOfUrl(i - idx, StrippedText.Substring(idx + 1))) { lastWasBreak = true; } } } } } StrippedText = sb.ToString(); } ReplaceNames2Fix(replaceIds, changeNameCases ? replaceNames : originalNames); }
public static int CountWords(this string source) { return(HtmlUtil.RemoveHtmlTags(source, true).Split(new[] { ' ', '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries).Length); }