public override void LoadSubtitle(Subtitle subtitle, List <string> lines, string fileName) { _errorCount = 0; subtitle.Paragraphs.Clear(); var mp4Parser = new MP4Parser(fileName); var dfxpStrings = mp4Parser.GetMdatsAsStrings(); SubtitleFormat format = new TimedText10(); foreach (var xmlAsString in dfxpStrings) { try { if (xmlAsString.Length < 80) { continue; } if (xmlAsString.IndexOf('\0') >= 0) { _errorCount++; continue; } var sub = new Subtitle(); var mdatLines = xmlAsString.SplitToLines(25_000); format = sub.ReloadLoadSubtitle(mdatLines, null, format); if (sub.Paragraphs.Count == 0) { continue; } // merge lines with same time codes sub = Forms.MergeLinesWithSameTimeCodes.Merge(sub, new List <int>(), out _, true, false, 1000, "en", new List <int>(), new Dictionary <int, bool>(), new Subtitle()); // adjust to last exisiting sub var lastSub = subtitle.GetParagraphOrDefault(subtitle.Paragraphs.Count - 1); if (lastSub != null && sub.Paragraphs.Count > 0 && lastSub.StartTime.TotalMilliseconds > sub.Paragraphs[0].StartTime.TotalMilliseconds) { sub.AddTimeToAllParagraphs(lastSub.EndTime.TimeSpan); } subtitle.Paragraphs.AddRange(sub.Paragraphs); } catch { _errorCount++; } } var merged = MergeLinesSameTextUtils.MergeLinesWithSameTextInSubtitle(subtitle, false, 250); if (merged.Paragraphs.Count < subtitle.Paragraphs.Count) { subtitle.Paragraphs.Clear(); subtitle.Paragraphs.AddRange(merged.Paragraphs); } subtitle.Renumber(); }
public Subtitle MergeLinesWithSameTextInSubtitle(Subtitle subtitle, List <int> mergedIndexes, out int numberOfMerges, bool clearFixes, bool fixIncrementing, bool lineAfterNext, int maxMsBetween) { var removed = new List <int>(); if (!_loading) { listViewFixes.ItemChecked -= listViewFixes_ItemChecked; } if (clearFixes) { listViewFixes.Items.Clear(); } numberOfMerges = 0; var mergedSubtitle = new Subtitle(); bool lastMerged = false; Paragraph p = null; var lineNumbers = new StringBuilder(); for (int i = 1; i < subtitle.Paragraphs.Count; i++) { if (!lastMerged) { p = new Paragraph(subtitle.GetParagraphOrDefault(i - 1)); mergedSubtitle.Paragraphs.Add(p); } Paragraph next = subtitle.GetParagraphOrDefault(i); Paragraph afterNext = subtitle.GetParagraphOrDefault(i + 1); if (next != null) { if ((MergeLinesSameTextUtils.QualifiesForMerge(p, next, maxMsBetween) || (fixIncrementing && MergeLinesSameTextUtils.QualifiesForMergeIncrement(p, next, maxMsBetween))) && IsFixAllowed(p)) { p.Text = next.Text; p.EndTime = next.EndTime; if (lastMerged) { lineNumbers.Append(next.Number); lineNumbers.Append(','); } else { lineNumbers.Append(p.Number); lineNumbers.Append(','); lineNumbers.Append(next.Number); lineNumbers.Append(','); } lastMerged = true; removed.Add(i); numberOfMerges++; if (!mergedIndexes.Contains(i)) { mergedIndexes.Add(i); } if (!mergedIndexes.Contains(i - 1)) { mergedIndexes.Add(i - 1); } } else if (lineAfterNext && MergeLinesSameTextUtils.QualifiesForMerge(p, afterNext, maxMsBetween) && p.Duration.TotalMilliseconds > afterNext.Duration.TotalMilliseconds && IsFixAllowed(p)) { removed.Add(i + 2); numberOfMerges++; if (lastMerged) { lineNumbers.Append(afterNext.Number); lineNumbers.Append(','); } else { lineNumbers.Append(p.Number); lineNumbers.Append(','); lineNumbers.Append(afterNext.Number); lineNumbers.Append(','); } lastMerged = true; if (!mergedIndexes.Contains(i)) { mergedIndexes.Add(i); } if (!mergedIndexes.Contains(i - 1)) { mergedIndexes.Add(i - 1); } } else { lastMerged = false; } } else { lastMerged = false; } if (!removed.Contains(i) && lineNumbers.Length > 0 && clearFixes) { AddToListView(p, lineNumbers.ToString(), p.Text); lineNumbers.Clear(); } } if (lineNumbers.Length > 0 && clearFixes && p != null) { AddToListView(p, lineNumbers.ToString(), p.Text); } if (!lastMerged) { mergedSubtitle.Paragraphs.Add(new Paragraph(subtitle.GetParagraphOrDefault(subtitle.Paragraphs.Count - 1))); } if (!_loading) { listViewFixes.ItemChecked += listViewFixes_ItemChecked; } mergedSubtitle.Renumber(); return(mergedSubtitle); }
public override void LoadSubtitle(Subtitle subtitle, List <string> lines, string fileName) { _errorCount = 0; Paragraph p = null; string positionInfo = string.Empty; bool hadEmptyLine = false; int numbers = 0; double addSeconds = 0; var noteOn = false; var styleOn = false; var regionOn = false; var header = new StringBuilder(); header.AppendLine("WEBVTT"); header.AppendLine(); for (var index = 0; index < lines.Count; index++) { var line = lines[index]; var next = string.Empty; var isNextTimeCode = false; if (index < lines.Count - 1) { next = lines[index + 1]; isNextTimeCode = next.Contains("-->"); } if (index == 0 && line.StartsWith("WEBVTT", StringComparison.Ordinal)) { header.Clear(); header.AppendLine(line); header.AppendLine(); continue; } if (index > 0 && string.IsNullOrEmpty(lines[index - 1]) && (line == "NOTE" || line.StartsWith("NOTE ", StringComparison.Ordinal))) { noteOn = true; if (subtitle.Paragraphs.Count == 0) { header.AppendLine(); header.AppendLine(); } } else if ((line == "STYLE" || line.StartsWith("STYLE ", StringComparison.Ordinal)) && subtitle.Paragraphs.Count == 0) { styleOn = true; header.AppendLine(); header.AppendLine(); } else if ((line == "REGION" || line.StartsWith("REGION ", StringComparison.Ordinal)) && subtitle.Paragraphs.Count == 0) { regionOn = true; header.AppendLine(); header.AppendLine(); } if (styleOn && !string.IsNullOrEmpty(line)) { header.AppendLine(line); continue; } if (regionOn && !string.IsNullOrEmpty(line)) { header.AppendLine(line); continue; } if (noteOn && !string.IsNullOrEmpty(line)) { if (subtitle.Paragraphs.Count == 0) { header.AppendLine(line); } continue; } if (index > 1 && (line.StartsWith("X-TIMESTAMP-MAP=", StringComparison.OrdinalIgnoreCase) || line == "WEBVTT")) { // badly formatted web vtt file continue; } noteOn = false; styleOn = false; regionOn = false; var s = line; bool isTimeCode = line.Contains("-->"); if (isTimeCode && RegexTimeCodesMiddle.IsMatch(s)) { s = "00:" + s; // start is without hours, end is with hours } if (isTimeCode && RegexTimeCodesShort.IsMatch(s)) { s = "00:" + s.Replace("--> ", "--> 00:"); } if (isNextTimeCode && Utilities.IsNumber(s) && p?.Text.Length > 0) { numbers++; } else if (index == 1 && s.StartsWith("X-TIMESTAMP-MAP=", StringComparison.OrdinalIgnoreCase) && s.IndexOf("MPEGTS:", StringComparison.OrdinalIgnoreCase) > 0) { addSeconds = GetXTimeStampSeconds(s); } else if (isTimeCode && RegexTimeCodes.IsMatch(s.TrimStart())) { if (p != null) { p.Text = p.Text.TrimEnd(); subtitle.Paragraphs.Add(p); } try { var parts = s.TrimStart().Replace("-->", "@").Split(new[] { '@' }, StringSplitOptions.RemoveEmptyEntries); p = new Paragraph { StartTime = GetTimeCodeFromString(parts[0]), EndTime = GetTimeCodeFromString(parts[1]) }; positionInfo = GetPositionInfo(s); p.Region = GetRegion(s); } catch (Exception exception) { System.Diagnostics.Debug.WriteLine(exception.Message); _errorCount++; p = null; } hadEmptyLine = false; } else if (p != null && hadEmptyLine && (RegexTimeCodesMiddle.IsMatch(next) || RegexTimeCodesShort.IsMatch(next) || RegexTimeCodes.IsMatch(next))) { // can both be number or an "identifier" which can be text numbers++; } else if (p != null) { string text = positionInfo + line.Trim(); if (string.IsNullOrEmpty(text)) { hadEmptyLine = true; } if (string.IsNullOrEmpty(p.Text)) { p.Text = text + Environment.NewLine; } else { p.Text += text + Environment.NewLine; } positionInfo = string.Empty; } } if (p != null) { p.Text = p.Text.TrimEnd(); subtitle.Paragraphs.Add(p); } if (subtitle.Paragraphs.Count > 3 && numbers >= subtitle.Paragraphs.Count - 1 && lines[0] == "WEBVTT FILE") { // let format WebVTTFileWithLineNumber take the subtitle _errorCount = subtitle.Paragraphs.Count + 1; return; } foreach (var paragraph in subtitle.Paragraphs) { paragraph.Text = ColorWebVttToHtml(paragraph.Text); paragraph.Text = EscapeDecodeText(paragraph.Text); paragraph.Text = RemoveWeirdReatingHeader(paragraph.Text); paragraph.StartTime.TotalMilliseconds += addSeconds * 1000; paragraph.EndTime.TotalMilliseconds += addSeconds * 1000; } var merged = MergeLinesSameTextUtils.MergeLinesWithSameTextInSubtitle(subtitle, false, 1); subtitle.Paragraphs.Clear(); subtitle.Paragraphs.AddRange(merged.Paragraphs); if (header.Length > 0) { subtitle.Header = header .ToString() .Replace(Environment.NewLine + Environment.NewLine, Environment.NewLine) .Trim(); } }
private void ParseMp4(Stream fs) { int count = 0; Position = 0; fs.Seek(0, SeekOrigin.Begin); bool moreBytes = true; var timeTotalMs = 0d; while (moreBytes) { moreBytes = InitializeSizeAndName(fs); if (Size < 8) { return; } if (Name == "moov" && Moov == null) { Moov = new Moov(fs, Position); // only scan first "moov" element } else if (Name == "moof") { Moof = new Moof(fs, Position); } else if (Name == "mdat" && Moof != null && Moof?.Traf?.Trun?.Samples?.Count > 0) { var mdat = new Mdat(fs, Position); if (mdat.Payloads.Count > 0) { if (Moof.Traf?.Trun?.Samples.Count > 0 && Moof?.Traf?.Trun?.Samples.Count >= mdat.Payloads.Count) { if (VttcSubtitle == null) { VttcSubtitle = new Subtitle(); } var timeScale = (double)(Moov?.Mvhd?.TimeScale ?? 1000.0); var sampleIdx = 0; foreach (var payload in mdat.Payloads) { var presentation = Moof.Traf.Trun.Samples[sampleIdx]; if (presentation.Duration.HasValue) { var before = timeTotalMs; timeTotalMs += presentation.Duration.Value / timeScale * 1000.0; sampleIdx++; if (payload != null) { VttcSubtitle.Paragraphs.Add(new Paragraph(payload, before, timeTotalMs)); } } } } Moof = null; } } count++; if (count > 1000) { break; } if (Position > (ulong)fs.Length) { break; } fs.Seek((long)Position, SeekOrigin.Begin); } fs.Close(); if (VttcSubtitle != null) { var merged = MergeLinesSameTextUtils.MergeLinesWithSameTextInSubtitle(VttcSubtitle, false, 250); VttcSubtitle = merged; } }
public Subtitle MergeLinesWithSameTextInSubtitle(Subtitle subtitle, out int numberOfMerges, bool clearFixes, bool fixIncrementing, bool lineAfterNext, int maxMsBetween) { var mergedIndexes = new List <int>(); var removed = new HashSet <int>(); if (!_loading) { listViewFixes.ItemChecked -= listViewFixes_ItemChecked; } if (clearFixes) { listViewFixes.Items.Clear(); _fixItems = new List <FixListItem>(); } numberOfMerges = 0; var mergedSubtitle = new Subtitle(); Paragraph p = null; var lineNumbers = new List <int>(); var listViewItems = new List <ListViewItem>(); for (int i = 1; i < subtitle.Paragraphs.Count; i++) { if (removed.Contains(i - 1)) { continue; } p = new Paragraph(subtitle.GetParagraphOrDefault(i - 1)); mergedSubtitle.Paragraphs.Add(p); for (int j = i; j < subtitle.Paragraphs.Count; j++) { if (removed.Contains(j)) { continue; } var next = subtitle.GetParagraphOrDefault(j); var incrementText = string.Empty; if ((MergeLinesSameTextUtils.QualifiesForMerge(p, next, maxMsBetween) || fixIncrementing && MergeLinesSameTextUtils.QualifiesForMergeIncrement(p, next, maxMsBetween, out incrementText)) && IsFixAllowed(p)) { p.Text = next.Text; if (!string.IsNullOrEmpty(incrementText)) { p.Text = incrementText; } p.EndTime.TotalMilliseconds = next.EndTime.TotalMilliseconds; if (lineNumbers.Count > 0) { lineNumbers.Add(next.Number); } else { lineNumbers.Add(p.Number); lineNumbers.Add(next.Number); } removed.Add(j); numberOfMerges++; if (!mergedIndexes.Contains(j)) { mergedIndexes.Add(j); } if (!mergedIndexes.Contains(i - 1)) { mergedIndexes.Add(i - 1); } } } if (!removed.Contains(i - 1) && lineNumbers.Count > 0 && clearFixes) { listViewItems.Add(MakeListViewItem(p, lineNumbers, p.Text)); lineNumbers.Clear(); } } if (lineNumbers.Count > 0 && clearFixes && p != null) { listViewItems.Add(MakeListViewItem(p, lineNumbers, p.Text)); } listViewFixes.Items.AddRange(listViewItems.ToArray()); if (!mergedIndexes.Contains(subtitle.Paragraphs.Count - 1)) { mergedSubtitle.Paragraphs.Add(new Paragraph(subtitle.GetParagraphOrDefault(subtitle.Paragraphs.Count - 1))); } if (!_loading) { listViewFixes.ItemChecked += listViewFixes_ItemChecked; } mergedSubtitle.Renumber(); return(mergedSubtitle); }
public override void LoadSubtitle(Subtitle subtitle, List <string> lines, string fileName) { _errorCount = 0; subtitle.Paragraphs.Clear(); var sb = new StringBuilder(); lines.ForEach(line => sb.AppendLine(line)); var searchText = sb.ToString(); var idx = searchText.IndexOf(findString, StringComparison.Ordinal); var start = idx; var parts = new List <string>(); while (idx >= 0) { if (idx > start) { var part = searchText.Substring(start, idx - start).Trim(); parts.Add(part); start = idx; } if (idx + findString.Length >= searchText.Length) { break; } idx = searchText.IndexOf(findString, idx + findString.Length, StringComparison.Ordinal); if (idx < 0) { var part = searchText.Substring(start).Trim(); parts.Add(part); } } var formats = new List <SubtitleFormat> { new TimedText10(), new NetflixTimedText(), new ItunesTimedText(), }; foreach (var xml in parts) { var xmlLines = xml.SplitToLines(); foreach (var format in formats) { if (format.IsMine(xmlLines, fileName)) { var sub = new Subtitle(); format.LoadSubtitle(sub, xmlLines, fileName); subtitle.Paragraphs.AddRange(sub.Paragraphs); } } } var merged = MergeLinesSameTextUtils.MergeLinesWithSameTextInSubtitle(subtitle, false, 250); if (merged.Paragraphs.Count < subtitle.Paragraphs.Count) { subtitle.Paragraphs.Clear(); subtitle.Paragraphs.AddRange(merged.Paragraphs); } subtitle.Renumber(); }