/// <summary> /// The general idea is to create a regex based of the "Format: "-line in .ass file. Which /// then can be used to easily filter the required information (namely timestamps, text and /// actor). /// </summary> /// <param name="settings">Settings.</param> /// <param name="rawLines">Raw lines.</param> public List<LineInfo> parse(Settings settings, LinkedList<String> rawLines) { List<LineInfo> lines = new List<LineInfo> (); string formatRegex = GetFormatRegex (rawLines); if (formatRegex == null) return null; // parse every line with format regex and save lines in LineInfo foreach(string rawLine in rawLines) { Match lineMatch = Regex.Match(rawLine, formatRegex, RegexOptions.IgnoreCase | RegexOptions.Compiled); if (!lineMatch.Success) continue; string startTimeString = lineMatch.Groups ["StartTime"].ToString ().Trim (); string endTimeString = lineMatch.Groups ["EndTime"].ToString ().Trim (); string nameString = lineMatch.Groups ["Name"].ToString ().Trim (); string textString = lineMatch.Groups ["Text"].ToString ().Trim (); if (settings.IgnoreStyledSubLines && textString.StartsWith ("{") // NOTE: this is a really big hint for styled subtitles but might create false-negatives -- research common patterns in subtitle files && !textString.StartsWith ("{\\b1}") // bold && !textString.StartsWith ("{\\u1}") // underline && !textString.StartsWith ("{\\i1}") // italics && !textString.StartsWith ("{\\an8}") // text align: up ) { continue; } // remove styling in subtitles textString = Regex.Replace(textString, "{.*?}", ""); if (textString == "") continue; // ignore lines without text // generate line info LineInfo li = new LineInfo(parseTime(startTimeString), parseTime(endTimeString), textString, new List<String>(new String[]{ nameString })); lines.Add(li); } return lines; }
public static void ExtractSnaphots(Settings settings, String path, List<Tuple<CardInfo, String>> allEntries, InfoProgress progress) { foreach(var entry in allEntries) { if(progress.Cancelled) break; progress.ProcessedSteps(1); var cardInfoNameTuple = entry; var cardInfo = cardInfoNameTuple.Item1; if(!cardInfo.HasImage()) continue; // create file at given path String outputSnapshotFilename = cardInfoNameTuple.Item2; String outputSnapshotFilepath = path + Path.DirectorySeparatorChar + outputSnapshotFilename; // get file with snapshot information -> video UtilsInputFiles.FileDesc videoFileDesc = cardInfo.episodeInfo.VideoFileDesc; // extract image double scaling = UtilsVideo.GetMaxScalingByStreamInfo(cardInfo.episodeInfo.VideoStreamInfo, settings.RescaleWidth, settings.RescaleHeight, settings.RescaleMode); double timeStamp = UtilsCommon.GetMiddleTime (cardInfo); UtilsImage.GetImage (videoFileDesc.filename, timeStamp, outputSnapshotFilepath, scaling); } }
public static void ExtractAudio(Settings settings, String path, List<Tuple<CardInfo, String>> allEntries, InfoProgress progress) { foreach(var entry in allEntries) { progress.ProcessedSteps(1); if(progress.Cancelled) return; CardInfo cardInfo = entry.Item1; if(!cardInfo.HasAudio()) continue; String outputAudioFilename = entry.Item2; String outputAudioFilepath = path + Path.DirectorySeparatorChar + outputAudioFilename; UtilsInputFiles.FileDesc audioFileDesc = cardInfo.episodeInfo.AudioFileDesc; var audioStreamInfo = cardInfo.episodeInfo.AudioStreamInfo; String arguments = String.Format ("-v quiet -y -i \"{0}\" -map 0:{1} -ss \"{2}\" -to \"{3}\" -vn -c:a libvorbis \"{4}\"", audioFileDesc.filename, // input file audioStreamInfo.StreamIndex, // audio stream index UtilsCommon.ToTimeArg(cardInfo.audioStartTimestamp), // start time UtilsCommon.ToTimeArg(cardInfo.audioEndTimestamp), // end time outputAudioFilepath // output file ); UtilsCommon.StartProcessAndGetOutput(InstanceSettings.systemSettings.formatConvertCommand, arguments); } }
public List<LineInfo> parse (Settings settings, Stream stream, Encoding encoding) { List<LineInfo> lineInfos = new List<LineInfo> (2000); StreamReader subFile = new StreamReader (stream, encoding); string rawLine; ParseStep parseStep = ParseStep.LineNum; Match match; string rawStartTime = ""; string rawEndTime = ""; string lineText = ""; // Fill in lineInfos while ((rawLine = subFile.ReadLine ()) != null) { switch (parseStep) { case ParseStep.LineNum: // Skip past line number and anything before it match = Regex.Match (rawLine, @"^\d+$", RegexOptions.IgnoreCase | RegexOptions.Compiled); if (match.Success) { parseStep = ParseStep.Time; } continue; case ParseStep.Time: // Match time match = Regex.Match (rawLine, @"^(?<StartTime>.*?)\s-->\s(?<EndTime>.*)$", RegexOptions.IgnoreCase | RegexOptions.Compiled); if (!match.Success) { // Something went wrong - there's something between line number and time info continue; } rawStartTime = match.Groups ["StartTime"].ToString ().Trim (); rawEndTime = match.Groups ["EndTime"].ToString ().Trim (); parseStep = ParseStep.Text; continue; case ParseStep.Text: // Match text match = Regex.Match (rawLine, @"^(?<Text>.*)$", RegexOptions.IgnoreCase | RegexOptions.Compiled); string text = match.Groups ["Text"].ToString ().Trim (); // Keep parsing subs until a blank line is encountered if (text == "") { parseStep = ParseStep.LineNum; lineInfos.Add (this.createLineInfo (lineText, rawStartTime, rawEndTime)); lineText = ""; } else { // Add space between each line of a multiline subtitle lineText += text + " "; } continue; default: // Should never get here break; } } // Handle the last line in the file if (lineText.Trim ().Length > 0) { lineInfos.Add (this.createLineInfo (lineText, rawStartTime, rawEndTime)); } lineInfos.Sort (); return lineInfos; }
public void ExportData(Settings settings, InfoProgress progressInfo) { var activeCardList = GetActiveCards(); progressInfo.AddSection("Exporting text file", 1); progressInfo.AddSection("Exporting snapshots", activeCardList.Count); progressInfo.AddSection("Exporting audio files", activeCardList.Count); if(settings.NormalizeAudio) progressInfo.AddSection("Normalize audio files", activeCardList.Count); progressInfo.Update(); ExportTextFile(activeCardList, settings, progressInfo); progressInfo.ProcessedSteps(1); var cardSnapshotNameTupleList = new List<Tuple<CardInfo, String>>(activeCardList.Count); var cardAudioNameTupleList = new List<Tuple<CardInfo, String>>(activeCardList.Count); foreach(var cardInfo in activeCardList) { cardSnapshotNameTupleList.Add(new Tuple<CardInfo, String>(cardInfo, GetSnapshotFileName(settings, cardInfo))); cardAudioNameTupleList.Add(new Tuple<CardInfo, String>(cardInfo, GetAudioFileName(settings, cardInfo))); } if(progressInfo.Cancelled) return; // extract images String snapshotsPath = settings.OutputDirectoryPath + Path.DirectorySeparatorChar + settings.DeckName + "_snapshots" + Path.DirectorySeparatorChar; UtilsCommon.ClearDirectory(snapshotsPath); WorkerSnapshot.ExtractSnaphots(settings, snapshotsPath, cardSnapshotNameTupleList, progressInfo); if(progressInfo.Cancelled) return; // extract audio String audioPath = settings.OutputDirectoryPath + Path.DirectorySeparatorChar + settings.DeckName + "_audio" + Path.DirectorySeparatorChar; UtilsCommon.ClearDirectory(audioPath); WorkerAudio.ExtractAudio(settings, audioPath, cardAudioNameTupleList, progressInfo); if(progressInfo.Cancelled) return; if(settings.NormalizeAudio) { // normalize all audio files foreach(var entry in cardAudioNameTupleList) { if(progressInfo.Cancelled) return; progressInfo.ProcessedSteps(1); var cardInfo = entry.Item1; if(!cardInfo.HasAudio()) continue; var filepath = audioPath + entry.Item2; var audioStreamInfos = StreamInfo.ReadAllStreams(filepath); audioStreamInfos.RemoveAll(streamInfo => streamInfo.StreamTypeValue != StreamInfo.StreamType.ST_AUDIO); if(audioStreamInfos.Count != 1) { Console.WriteLine("Skipped normalizing file \"{0}\" because it contains {1} audio streams", filepath, audioStreamInfos.Count); continue; } try { UtilsAudio.NormalizeAudio(filepath, audioStreamInfos[0]); } catch(Exception e) { Console.WriteLine(e.ToString()); continue; } } } }
/** Generates a .tsv file */ public void ExportTextFile(List<CardInfo> cardInfoList, Settings settings, InfoProgress progressInfo) { String tsvFilename = settings.OutputDirectoryPath + Path.DirectorySeparatorChar + settings.DeckName + ".tsv"; Console.WriteLine(tsvFilename); // value that will be imported into Anki/SRS-Programs-Field => [sound:???.ogg] and <img src="???.jpg"/> var snapshotFields = new List<String>(cardInfoList.Count); var audioFields = new List<String>(cardInfoList.Count); foreach(var cardInfo in cardInfoList) { if(cardInfo.HasImage()) { var outputSnapshotFilename = GetSnapshotFileName(settings, cardInfo); snapshotFields.Add("<img src=\"" + outputSnapshotFilename + "\"/>"); // TODO: make this flexible } else snapshotFields.Add(""); if(cardInfo.HasAudio()) { var outputAudioFilename = GetAudioFileName(settings, cardInfo); audioFields.Add("[sound:" + outputAudioFilename + "]"); // TODO: make this flexible } else audioFields.Add(""); } using (var outputStream = new StreamWriter(tsvFilename)) { for (int i = 0; i < cardInfoList.Count; i++) { CardInfo cardInfo = cardInfoList[i]; // XXX: performance analasys then --- generate a episode-filtered list for context card search (because it has O(n^2) steps) var contextCardsTuple = UtilsSubtitle.GetContextCards(cardInfo.episodeInfo.Index, cardInfo, m_cardInfos); var previousCards = contextCardsTuple.Item1; var nextCards = contextCardsTuple.Item2; var previousCardsNativeLanguage = UtilsSubtitle.CardListToMultilineString(previousCards, UtilsCommon.LanguageType.NATIVE); var previousCardsTargetLanguage = UtilsSubtitle.CardListToMultilineString(previousCards, UtilsCommon.LanguageType.TARGET); var nextCardsNativeLanguage = UtilsSubtitle.CardListToMultilineString(nextCards, UtilsCommon.LanguageType.NATIVE); var nextCardsTargetLanguage = UtilsSubtitle.CardListToMultilineString(nextCards, UtilsCommon.LanguageType.TARGET); String keyField = cardInfo.GetKey(); String audioField = audioFields[i]; String imageField = snapshotFields[i]; String tags = String.Format("SubtitleMemorize {0} ep{1:000} {2}", settings.DeckNameModified, cardInfo.episodeInfo.Number, InfoLanguages.languages[settings.TargetLanguageIndex].tag); outputStream.WriteLine(UtilsCommon.HTMLify(keyField) + "\t" + UtilsCommon.HTMLify(imageField) + "\t"+ UtilsCommon.HTMLify(audioField) + "\t" + UtilsCommon.HTMLify(cardInfo.ToSingleLine(UtilsCommon.LanguageType.TARGET)) + "\t" + UtilsCommon.HTMLify(cardInfo.ToSingleLine(UtilsCommon.LanguageType.NATIVE)) + "\t" + UtilsCommon.HTMLify(previousCardsTargetLanguage) + "\t" + UtilsCommon.HTMLify(previousCardsNativeLanguage) + "\t" + UtilsCommon.HTMLify(nextCardsTargetLanguage) + "\t" + UtilsCommon.HTMLify(nextCardsNativeLanguage) + "\t" + UtilsCommon.HTMLify(tags) ); } } }
public String GetAudioFileName(Settings settings, CardInfo cardInfo) { return settings.DeckNameModified + "__" + cardInfo.GetKey() + ".ogg"; }
public String GetSnapshotFileName(Settings settings, CardInfo cardInfo) { return settings.DeckNameModified + "__" + cardInfo.GetKey() + ".jpg"; }
/// <summary> /// Sets the settings in instance to default values. /// </summary> public Settings DeepCopy(Settings original) { using (var ms = new MemoryStream()) { var formatter = new BinaryFormatter(); formatter.Serialize(ms, original); ms.Position = 0; return (Settings) formatter.Deserialize(ms); } }
/// <summary> /// Parse ASS files. /// </summary> /// <param name="settings">Settings.</param> /// <param name="stream">Stream.</param> public List<LineInfo> parse(Settings settings, Stream stream, Encoding encoding) { LinkedList<String> rawLines = new LinkedList<String> (); using(StreamReader reader = new StreamReader (stream, encoding)) { String line; while((line = reader.ReadLine()) != null) { rawLines.AddLast(line.Trim()); } } return parse (settings, rawLines); }