/// <summary> /// Handle Tag.CandidateDump. /// </summary> /// <param name="utterance">Utterance to fill in.</param> /// <param name="line">Section starting line.</param> /// <param name="tr">Text data.</param> /// <returns>Next tag, or null for end.</returns> private static string HandleTagCandidateDump(TtsUtterance utterance, string line, TextReader tr) { Debug.Assert(ParseTag(line) == Tag.CandidateDump); while ((line = tr.ReadLine()) != null) { DO_WITH_CANDIDATE: if (string.IsNullOrEmpty(line)) { continue; } if (IsTag(line)) { return line; } if (line.StartsWith("candidate", StringComparison.Ordinal)) { CostNodeCluster cluster = new CostNodeCluster(); string[] items = line.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries); cluster.Index = int.Parse(items[1], CultureInfo.InvariantCulture); cluster.TtsUnit = utterance.Script.Units[cluster.Index]; while ((line = tr.ReadLine()) != null) { if (string.IsNullOrEmpty(line)) { continue; } if (line.StartsWith("unit", StringComparison.Ordinal)) { CostNode node = ParseCostNode(line); node.ClusterIndex = cluster.Index; cluster.AddNode(node); continue; } break; } utterance.Viterbi.CostNodeClusters.Add(cluster); goto DO_WITH_CANDIDATE; } } return MoveToNextTag(tr); }
/// <summary> /// Handle Tag.Pronun. /// </summary> /// <param name="utterance">Utterance to fill in.</param> /// <param name="line">Section starting line.</param> /// <param name="tr">Text data.</param> /// <returns>Next tag, or null for end.</returns> private static string HandleTagPronun(TtsUtterance utterance, string line, TextReader tr) { System.Diagnostics.Debug.Assert(ParseTag(line) == Tag.Pronun); Match m = Regex.Match(line, @"\>(.*)"); System.Diagnostics.Debug.Assert(m.Success); string content = m.Groups[1].Value; utterance.Script.Pronunciation = content; return MoveToNextTag(tr); }
/// <summary> /// Handle Tag.UnitVector. /// </summary> /// <param name="utterance">Utterance to fill in.</param> /// <param name="line">Section starting line.</param> /// <param name="tr">Text data.</param> /// <returns>Next tag, or null for end.</returns> private static string HandleTagUnitVector(TtsUtterance utterance, string line, TextReader tr) { System.Diagnostics.Debug.Assert(ParseTag(line) == Tag.UnitVector); utterance.Script.Units.Clear(); while ((line = tr.ReadLine()) != null) { if (string.IsNullOrEmpty(line)) { continue; } if (IsTag(line)) { return line; } TtsUnit unit = ParseTtsUnit(line, utterance.Script.Language); if (string.Compare(unit.MetaUnit.Name, "_sil_", StringComparison.OrdinalIgnoreCase) == 0 || string.Compare(unit.MetaUnit.Name, "-sil-", StringComparison.OrdinalIgnoreCase) == 0) { // skip it } else { utterance.Script.Units.Add(unit); } } return MoveToNextTag(tr); }
/// <summary> /// Handle Tag.NormText. /// </summary> /// <param name="utterance">Utterance to fill in.</param> /// <param name="line">Section starting line.</param> /// <param name="tr">Text data.</param> /// <returns>Next tag, or null for end.</returns> private static string HandleTagNormText(TtsUtterance utterance, string line, TextReader tr) { System.Diagnostics.Debug.Assert(ParseTag(line) == Tag.NormText); Match m = Regex.Match(line, @"\>(.*)"); System.Diagnostics.Debug.Assert(m.Success); utterance.RawText = m.Groups[1].Value.Trim(); utterance.TNedText = m.Groups[1].Value.Trim(); if (string.IsNullOrEmpty(utterance.RawText)) { string message = string.Format(CultureInfo.InvariantCulture, "nomalized text of utterence should not be empty."); throw new InvalidDataException(message); } return MoveToNextTag(tr); }
/// <summary> /// Handle Tag.BreakAndEmph. /// </summary> /// <param name="utterance">Utterance to fill in.</param> /// <param name="line">Section starting line.</param> /// <param name="tr">Text data.</param> /// <returns>Next tag, or null for end.</returns> private static string HandleTagBreakAndEmph(TtsUtterance utterance, string line, TextReader tr) { System.Diagnostics.Debug.Assert(ParseTag(line) == Tag.BreakAndEmph); Match m = Regex.Match(line, @"\>(.*)"); System.Diagnostics.Debug.Assert(m.Success); utterance.Script.Sentence = m.Groups[1].Value; return MoveToNextTag(tr); }
/// <summary> /// Load TtsUtterance from XML data file. /// </summary> /// <param name="filePath">File to load from.</param> /// <returns>Utterance instance read from file.</returns> public static TtsUtterance ReadFromXml(string filePath) { if (!File.Exists(filePath)) { return null; } TtsUtterance utterance = null; using (XmlTextReader reader = new XmlTextReader(filePath)) { while (reader.Read()) { // Ignore XmlDeclaration, ProcessingInstruction, // Comment, DocumentType, Entity, Notation. if ((reader.NodeType == XmlNodeType.Element) && (reader.LocalName == "utterance")) { Language language = Localor.StringToLanguage( reader.GetAttribute("lang")); utterance = new TtsUtterance(language, EngineType.Tts30); ProcessUtterance(reader, utterance); } } } return utterance; }
/// <summary> /// Post read processing. /// </summary> /// <param name="utterance">Utterance to handle.</param> private static void PostRead(TtsUtterance utterance) { if (utterance.Viterbi != null) { utterance.Viterbi.SortNodeRoutes(); utterance.Viterbi.SelectedRoute = utterance.Viterbi.FindRoute(utterance.WaveUnits); } }
/// <summary> /// Read utterance instance from text reader. /// </summary> /// <param name="tr">Text reader to read from.</param> /// <param name="language">Language of the utterance.</param> /// <param name="engine">Engine of the utterance.</param> /// <returns>Utterance instance.</returns> public static TtsUtterance ReadAllData(TextReader tr, Language language, EngineType engine) { if (tr == null) { throw new ArgumentNullException("tr"); } TtsUtterance utterance = new TtsUtterance(language, engine); string line = null; bool done = false; while ((line = tr.ReadLine()) != null) { DO_WITH_TAG: if (!IsTag(line)) { continue; } try { Tag tag = ParseTag(line); // handle each kind tags switch (tag) { case Tag.NormText: line = HandleTagNormText(utterance, line, tr); break; case Tag.BreakAndEmph: line = HandleTagBreakAndEmph(utterance, line, tr); break; case Tag.Pitch: line = MoveToNextTag(tr); break; case Tag.Pronun: line = HandleTagPronun(utterance, line, tr); break; case Tag.UnitVector: line = HandleTagUnitVector(utterance, line, tr); break; case Tag.UnitControl: line = MoveToNextTag(tr); break; case Tag.CandidateDump: if (utterance.Viterbi == null) { utterance.Viterbi = new ViterbiSearch(); } line = HandleTagCandidateDump(utterance, line, tr); break; case Tag.RouteDump: line = HandleTagRouteDump(utterance, line, tr); break; case Tag.AverageConcateCost: line = MoveToNextTag(tr); break; case Tag.Index: line = MoveToNextTag(tr); break; case Tag.TargetCost: line = MoveToNextTag(tr); break; case Tag.ConcateCost: line = MoveToNextTag(tr); break; case Tag.WaveUnitSel: line = HandleTagWaveUnitSel(utterance, line, tr); done = true; break; case Tag.Unknown: line = null; break; default: break; } } catch (InvalidDataException ide) { // if parsing failed, return null System.Diagnostics.Trace.WriteLine(ide.Message); throw; } if (done) { break; } if (!string.IsNullOrEmpty(line)) { goto DO_WITH_TAG; } } PostRead(utterance); return utterance; }
/// <summary> /// Save the utterance in Mulan TTS log formated text file. /// </summary> /// <param name="filePath">Target file path to save utterance.</param> /// <param name="utterance">Utterance instance to save.</param> public static void Save(string filePath, TtsUtterance utterance) { if (utterance == null) { throw new ArgumentNullException("utterance"); } if (utterance.Script == null) { throw new ArgumentException("utterance.Script is null"); } if (utterance.Script.Units == null) { throw new ArgumentException("utterance.Script.Units is null"); } Save(filePath, utterance, 0, utterance.Script.Units.Count); }
/// <summary> /// Read and parse units data from the XML text reader to utterance. /// </summary> /// <param name="reader">XML text reader to read data from.</param> /// <param name="utterance">Target utterance to save result units.</param> private static void ProcessUnits(XmlTextReader reader, TtsUtterance utterance) { if (reader == null) { throw new ArgumentNullException("reader"); } if (utterance == null) { throw new ArgumentNullException("utterance"); } utterance.Script.Units.Clear(); // Move to containing element of attributes reader.MoveToElement(); if (!reader.IsEmptyElement) { // Move to first child element reader.Read(); // Process each child element while not at end element while (reader.NodeType != XmlNodeType.EndElement) { bool invalidNode = false; if (reader.NodeType == XmlNodeType.Element) { switch (reader.LocalName) { case "u": ParseUnit(reader, utterance); reader.Skip(); break; default: invalidNode = true; break; } } else if (reader.NodeType == XmlNodeType.Text) { reader.Skip(); } else { // Skip over non-element/text node types reader.Skip(); } if (invalidNode) { throw new ArgumentException("Invalid element '" + reader.Name + "'"); } } } // Move to next sibling reader.Read(); }
/// <summary> /// Read and parse unit data from the XML text reader to utterance. /// </summary> /// <param name="reader">XML text reader to read data from.</param> /// <param name="utterance">Target utterance to save result units.</param> private static void ParseUnit(XmlTextReader reader, TtsUtterance utterance) { TtsUnit unit = new TtsUnit(utterance.Script.Language); unit.MetaUnit.Name = reader.GetAttribute("val"); if (reader.GetAttribute("iSyll") != null) { unit.Feature.PosInSyllable = (PosInSyllable)Enum.Parse(typeof(PosInSyllable), reader.GetAttribute("iSyll")); } if (reader.GetAttribute("iWord") != null) { unit.Feature.PosInWord = (PosInWord)Enum.Parse(typeof(PosInWord), reader.GetAttribute("iWord")); } if (reader.GetAttribute("iSent") != null) { unit.Feature.PosInSentence = (PosInSentence)Enum.Parse(typeof(PosInSentence), reader.GetAttribute("iSent")); } Phoneme phoneme = Localor.GetPhoneme(utterance.Script.Language, utterance.Script.Engine); unit.Feature.LeftContextPhone = phoneme.TtsPhone2Id(reader.GetAttribute("lPh")); unit.Feature.RightContextPhone = phoneme.TtsPhone2Id(reader.GetAttribute("rPh")); if (reader.GetAttribute("em") != null) { unit.Feature.TtsEmphasis = (TtsEmphasis)Enum.Parse(typeof(TtsEmphasis), reader.GetAttribute("em")); } if (reader.GetAttribute("st") != null) { unit.Feature.TtsStress = (TtsStress)Enum.Parse(typeof(TtsStress), reader.GetAttribute("st")); } utterance.Script.Units.Add(unit); }
/// <summary> /// Read and parse word data from the XML text reader to utterance. /// </summary> /// <param name="reader">XML text reader to read data from.</param> /// <param name="utterance">Target utterance to save result words.</param> private static void ProcessWord(XmlTextReader reader, TtsUtterance utterance) { ScriptWord word = new ScriptWord(utterance.Script.Language); word.Grapheme = reader.GetAttribute("val"); if (reader.GetAttribute("p") != null) { word.Pronunciation = reader.GetAttribute("p"); } if (reader.GetAttribute("pos") != null) { word.Pos = (PartOfSpeech)Enum.Parse(typeof(PartOfSpeech), reader.GetAttribute("pos")); } if (reader.GetAttribute("emphasis") != null) { word.Emphasis = (TtsEmphasis)Enum.Parse(typeof(TtsEmphasis), reader.GetAttribute("emphasis")); } if (reader.GetAttribute("break") != null) { word.Break = (TtsBreak)Enum.Parse(typeof(TtsBreak), reader.GetAttribute("break")); } if (reader.GetAttribute("type") != null) { word.WordType = (WordType)Enum.Parse(typeof(WordType), reader.GetAttribute("type")); } utterance.Script.Words.Add(word); reader.Skip(); }
/// <summary> /// Read and parse utterance data from the XML text reader. /// </summary> /// <param name="reader">XML text reader to read data from.</param> /// <param name="utterance">Target utterance to save.</param> private static void ProcessUtterance(XmlTextReader reader, TtsUtterance utterance) { // Move to containing element of attributes reader.MoveToElement(); if (!reader.IsEmptyElement) { // Move to first child element reader.Read(); // Process each child element while not at end element while (reader.NodeType != XmlNodeType.EndElement) { bool invalidNode = false; if (reader.NodeType == XmlNodeType.Element) { switch (reader.LocalName) { case "s": utterance.Script.Sentence = reader.GetAttribute("val"); reader.Skip(); break; case "p": utterance.Script.Pronunciation = reader.GetAttribute("val"); reader.Skip(); break; case "words": ProcessWords(reader, utterance); break; case "units": ProcessUnits(reader, utterance); break; default: invalidNode = true; break; } } else if (reader.NodeType == XmlNodeType.Text) { reader.Skip(); } else { // Skip over non-element/text node types reader.Skip(); } if (invalidNode) { throw new ArgumentException(reader.Name); } } } // Move to next sibling reader.Read(); }
/// <summary> /// Save TTS units to file in XML format. /// </summary> /// <param name="utterance">Units of utterance to save.</param> /// <param name="writer">XML text writer to write units information.</param> private static void SaveUnitsAsXml(TtsUtterance utterance, XmlTextWriter writer) { writer.WriteStartElement("units"); Phoneme phoneme = Localor.GetPhoneme(utterance.Script.Language, utterance.Script.Engine); foreach (TtsUnit unit in utterance.Script.Units) { writer.WriteStartElement("u"); writer.WriteAttributeString("val", unit.MetaUnit.Name); writer.WriteAttributeString("iSyll", Enum.GetName(typeof(PosInSyllable), unit.Feature.PosInSyllable)); writer.WriteAttributeString("iWord", Enum.GetName(typeof(PosInWord), unit.Feature.PosInWord)); writer.WriteAttributeString("iSent", Enum.GetName(typeof(PosInSentence), unit.Feature.PosInSentence)); writer.WriteAttributeString("lPh", phoneme.TtsId2Phone(unit.Feature.LeftContextPhone)); writer.WriteAttributeString("rPh", phoneme.TtsId2Phone(unit.Feature.RightContextPhone)); writer.WriteAttributeString("st", Enum.GetName(typeof(TtsStress), unit.Feature.TtsStress)); writer.WriteAttributeString("em", Enum.GetName(typeof(TtsEmphasis), unit.Feature.TtsEmphasis)); writer.WriteEndElement(); } writer.WriteEndElement(); }
/// <summary> /// Handle Tag.RouteDump. /// </summary> /// <param name="utterance">Utterance to fill in.</param> /// <param name="line">Section starting line.</param> /// <param name="tr">Text data.</param> /// <returns>Next tag, or null for end.</returns> private static string HandleTagRouteDump(TtsUtterance utterance, string line, TextReader tr) { System.Diagnostics.Debug.Assert(ParseTag(line) == Tag.RouteDump); while ((line = tr.ReadLine()) != null) { DO_WITH_CANDIDATE: if (string.IsNullOrEmpty(line)) { continue; } if (IsTag(line)) { return line; } if (line.StartsWith("route", StringComparison.Ordinal)) { NodeRoute route = new NodeRoute(); string[] items = line.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries); route.Index = int.Parse(items[1], CultureInfo.InvariantCulture); while ((line = tr.ReadLine()) != null) { if (string.IsNullOrEmpty(line)) { continue; } if (line.StartsWith("unit", StringComparison.Ordinal)) { CostNode tempnode = ParseCostNode(line); CostNodeCluster cluster = utterance.Viterbi.CostNodeClusters[tempnode.Index]; CostNode node = cluster.IndexedNodes[tempnode.Key]; route.CostNodes.Add(node); continue; } break; } route.ReverseCostNodes(); // route.CostNodes.Sort(); utterance.Viterbi.NodeRoutes.Add(route); goto DO_WITH_CANDIDATE; } } return MoveToNextTag(tr); }
/// <summary> /// Save the utterance in Mulan TTS log formated text file. /// </summary> /// <param name="filePath">Target file path to save utterance.</param> /// <param name="utterance">Utterance instance to save.</param> /// <param name="minUnitIndex">Minimum unit index to start to save.</param> /// <param name="maxUnitIndex">Maximum unit index to stop from saving.</param> public static void Save(string filePath, TtsUtterance utterance, int minUnitIndex, int maxUnitIndex) { if (utterance == null) { throw new ArgumentNullException("utterance"); } using (StreamWriter sw = new StreamWriter(filePath, false, Encoding.Unicode)) { sw.WriteLine("<NormText> " + utterance.TNedText); if (utterance.Script != null && utterance.Script.Units != null) { sw.WriteLine("<UnitVector>"); for (int i = minUnitIndex; i < maxUnitIndex; i++) { TtsUnit unit = utterance.Script.Units[i]; string line = string.Format(CultureInfo.InvariantCulture, "{0} {1} {2} {3} {4} {5} {6} {7} {8} {9} 0 0 {10} {11} 0 0 0 100 0 0 0 0 {12}", (int)unit.Feature.PosInSentence, (int)unit.Feature.PosInWord, (int)unit.Feature.PosInSyllable, (int)unit.Feature.LeftContextPhone, (int)unit.Feature.RightContextPhone, (int)unit.Feature.LeftContextTone, (int)unit.Feature.RightContextTone, (int)unit.Feature.TtsStress, (int)unit.Feature.TtsEmphasis, (int)unit.Feature.TtsWordTone, // break level, punctuation Localor.MapLanguageId(unit.MetaUnit.Language), unit.MetaUnit.Id, // left context feature // right context feature // control flag // volumn // rate // pitch // voice // user break unit.MetaUnit.Name); sw.WriteLine(line); if (unit.TtsBreak == TtsBreak.Sentence) { sw.WriteLine("7 0 0 14 2 0 0 0 1 {0} 0 {1} 0 0 0 0 0 0 0 0 750 -SIL-", (int)unit.TtsBreak, (int)unit.Language); } else if (unit.TtsBreak == TtsBreak.IntonationPhrase) { sw.WriteLine("7 0 0 14 2 0 0 0 1 {0} 0 {1} 0 0 0 0 0 0 0 0 400 -SIL-", (int)unit.TtsBreak, (int)unit.Language); } } sw.Write("\r\n"); } sw.WriteLine("<WaveUnitSel>"); int index = 0; if (utterance.Viterbi != null && utterance.Viterbi.SelectedRoute != null && utterance.Viterbi.CostNodeClusters != null && utterance.Viterbi.SelectedRoute.CostNodes != null) { for (int i = minUnitIndex; i < maxUnitIndex; i++) { CostNode node = utterance.Viterbi.SelectedRoute.CostNodes[i]; CostNodeCluster cluter = utterance.Viterbi.CostNodeClusters[i]; System.Diagnostics.Debug.Assert(cluter != null); if (cluter == null) { continue; } TtsUnit unit = cluter.TtsUnit; System.Diagnostics.Debug.Assert(unit != null, "unit should not be null here"); index = SaveUnit(sw, index, unit, node); } } } }
/// <summary> /// Handle Tag.WaveUnitSel. /// </summary> /// <param name="utterance">Utterance to fill in.</param> /// <param name="line">Section starting line.</param> /// <param name="tr">Text data.</param> /// <returns>Next tag, or null for end.</returns> private static string HandleTagWaveUnitSel(TtsUtterance utterance, string line, TextReader tr) { Debug.Assert(ParseTag(line) == Tag.WaveUnitSel); while ((line = tr.ReadLine()) != null) { if (string.IsNullOrEmpty(line)) { continue; } if (IsTag(line)) { break; } WaveUnit unit = ParseWaveUnitForWaveUnitSel(line); utterance.WaveUnits.Add(unit); } if (IsTag(line)) { // this should be next tag return line; } else { return MoveToNextTag(tr); } }
/// <summary> /// Save TTS utterance to file in XML format. /// </summary> /// <param name="utterance">Utterance to save.</param> /// <param name="filePath">File to save in.</param> public static void SaveAsXml(TtsUtterance utterance, string filePath) { if (utterance == null) { throw new ArgumentNullException("utterance"); } if (utterance.Script == null) { throw new ArgumentException("utterance.Script is null"); } using (XmlTextWriter tw = new XmlTextWriter(filePath, Encoding.Unicode)) { tw.Formatting = Formatting.Indented; tw.Indentation = 4; tw.WriteStartElement("utterance"); tw.WriteAttributeString("lang", Localor.LanguageToString(utterance.Script.Language)); tw.WriteStartElement("s"); tw.WriteAttributeString("val", utterance.Script.Sentence); tw.WriteEndElement(); tw.WriteStartElement("p"); tw.WriteAttributeString("val", utterance.Script.Pronunciation); tw.WriteEndElement(); // Save words if (!string.IsNullOrEmpty(utterance.Script.Sentence) && utterance.Script.Words != null && utterance.Script.Words.Count > 0) { tw.WriteStartElement("words"); foreach (ScriptWord word in utterance.Script.Words) { tw.WriteStartElement("w"); tw.WriteAttributeString("val", word.Grapheme); if (word.WordType == WordType.Normal) { tw.WriteAttributeString("p", word.Pronunciation); } else { // word.WordType != WordType.Normal tw.WriteAttributeString("type", Enum.GetName(typeof(WordType), word.WordType)); } if (word.Pos != PartOfSpeech.Unknown) { tw.WriteAttributeString("pos", Enum.GetName(typeof(PartOfSpeech), word.WordType)); } if (word.Emphasis != TtsEmphasis.None) { tw.WriteAttributeString("emphasis", Enum.GetName(typeof(TtsEmphasis), word.WordType)); } if (word.Break != TtsBreak.Phone) { tw.WriteAttributeString("break", Enum.GetName(typeof(TtsBreak), word.Break)); } tw.WriteEndElement(); } tw.WriteEndElement(); } // Save units if (!string.IsNullOrEmpty(utterance.Script.Pronunciation) && utterance.Script.Units != null && utterance.Script.Units.Count > 0) { SaveUnitsAsXml(utterance, tw); } tw.WriteEndElement(); } }