/// <summary> /// Define the method to dump the utterance to XML. /// </summary> /// <param name="utt">The utterance for dumpping.</param> /// <param name="ttsEngine">The object ttsEngine to help to convert the Pos and get sentence id.</param> /// <returns>An XmlScriptFile object.</returns> /// <exception cref="InvalideDataException">Empty utt word text.</exception> public static XmlScriptFile ToXml(this SP.TtsUtterance utt, SP.TtsEngine ttsEngine) { if (ttsEngine == null) { throw new ArgumentNullException("ttsEngine"); } XmlScriptFile script = new XmlScriptFile(); script.Encoding = Encoding.Unicode; script.Language = GetLanguage(utt); ScriptItem item = utt.ToScriptItem(ttsEngine); script.Items.Add(item); return script; }
/// <summary> /// Write the item to xml writer. /// </summary> /// <param name="writer">XmlWriter.</param> /// <param name="scriptContentController">XmlScriptFile ContentControler.</param> /// <param name="scriptLanguage">The language of the script.</param> public void WriteToXml(XmlWriter writer, XmlScriptFile.ContentControler scriptContentController, Language scriptLanguage) { if (writer == null) { throw new ArgumentNullException("writer"); } if (scriptContentController == null) { throw new ArgumentNullException("scriptContentController"); } // write <si> node and its attributes writer.WriteStartElement("si"); writer.WriteAttributeString("id", Id); string domainName = DomainTypeToString(Domain); if (!string.IsNullOrEmpty(domainName)) { writer.WriteAttributeString("domain", domainName); } if (Frequency != DefaultFrequency) { writer.WriteAttributeString("frequency", Frequency.ToString()); } if (ReadingDifficulty > DefaultReadingDifficulty) { // We use Flesh Score ranging from 0 to 100 now. writer.WriteAttributeString("difficulty", string.Format(CultureInfo.InvariantCulture, "{0:F4}", ReadingDifficulty)); } if (scriptContentController.SaveComments) { _ttsXmlComments.WriteToXml(writer); } // write <text> node and its content writer.WriteStartElement("text"); if (IsSsml) { writer.WriteCData(Text); } else { writer.WriteString(Text); } writer.WriteEndElement(); // write sentences foreach (ScriptSentence sentence in Sentences) { sentence.WriteToXml(writer, scriptContentController, scriptLanguage); } writer.WriteEndElement(); }
/// <summary> /// Write sentence to xml. /// </summary> /// <param name="writer">XmlWriter.</param> /// <param name="scriptContentController">XmlScriptFile.ContentControler.</param> /// <param name="scriptLanguage">The language of the script.</param> public void WriteToXml(XmlWriter writer, XmlScriptFile.ContentControler scriptContentController, Language scriptLanguage) { if (writer == null) { throw new ArgumentNullException("writer"); } if (scriptContentController == null) { throw new ArgumentNullException("scriptContentController"); } if (scriptContentController.SaveComments) { WriteDeletedWordsToComments(scriptLanguage); } // write <sent> node and its attributes writer.WriteStartElement("sent"); string sentenceTypeName = SentenceTypeToString(SentenceType); if (!string.IsNullOrEmpty(sentenceTypeName)) { writer.WriteAttributeString("type", sentenceTypeName); } string emotionType = EmotionTypeToString(Emotion); if (!string.IsNullOrEmpty(emotionType)) { writer.WriteAttributeString("emotion", emotionType); } if (scriptContentController.SaveComments) { _ttsXmlComments.WriteToXml(writer); } // write <text> node and its content writer.WriteStartElement("text"); writer.WriteString(Text); writer.WriteEndElement(); // write words writer.WriteStartElement("words"); foreach (ScriptWord word in Words) { word.WriteToXml(writer, scriptContentController, scriptLanguage); } writer.WriteEndElement(); // write multi accept foreach (List<ScriptWord> acceptSent in AcceptSentences) { writer.WriteStartElement("accept"); foreach (ScriptWord accept in acceptSent) { accept.WriteToXml(writer, scriptContentController, scriptLanguage); } writer.WriteEndElement(); } if (NamedEntities.Count > 0) { writer.WriteStartElement("nes"); foreach (ScriptNamedEntity entity in NamedEntities) { entity.WriteToXml(writer, scriptContentController); } writer.WriteEndElement(); } writer.WriteEndElement(); }
/// <summary> /// Write word to xml. /// </summary> /// <param name="writer">XmlWriter.</param> /// <param name="scriptContentController">XmlScriptFile.ContentControler.</param> /// <param name="scriptLanguage">The language of the script.</param> public void WriteToXml(XmlWriter writer, XmlScriptFile.ContentControler scriptContentController, Language scriptLanguage) { if (writer == null) { throw new ArgumentNullException("writer"); } if (scriptContentController == null) { throw new ArgumentNullException("scriptContentController"); } // write <w> node and its attributes writer.WriteStartElement("w"); if (Language != Language.Neutral && Language != scriptLanguage) { writer.WriteAttributeString("language", Localor.LanguageToString(Language)); } writer.WriteAttributeString("v", Grapheme); if (!string.IsNullOrEmpty(Pronunciation)) { writer.WriteAttributeString("p", Pronunciation); } if (!string.IsNullOrEmpty(AcceptGrapheme)) { writer.WriteAttributeString("av", AcceptGrapheme); } if (!string.IsNullOrEmpty(AcceptPronunciation)) { writer.WriteAttributeString("ap", AcceptPronunciation); } writer.WriteAttributeString("type", WordTypeToString(WordType)); if (!string.IsNullOrEmpty(PosString)) { writer.WriteAttributeString("pos", PosString); } if (!string.IsNullOrEmpty(Expansion)) { writer.WriteAttributeString("exp", Expansion); } string emphasisName = EmphasisToString(Emphasis); if (!string.IsNullOrEmpty(emphasisName)) { writer.WriteAttributeString("em", emphasisName); } if (Break != DefaultBreak) { string breakName = BreakToString(Break); writer.WriteAttributeString("br", breakName); } if (BreakAsk != UndefinedBreakAsk) { string breakName = BreakToString(BreakAsk, true); writer.WriteAttributeString("bra", breakName); } if (BreakProb != DefaultProbability) { writer.WriteAttributeString("brp", BreakProb.ToString("0.000", CultureInfo.InvariantCulture)); } if (TobiFinalBoundaryTone != null) { writer.WriteAttributeString("tobifbt", TobiFinalBoundaryTone.ToString()); } if (!string.IsNullOrEmpty(AcousticDomainTag)) { writer.WriteAttributeString("domain", AcousticDomainTag); } if (!string.IsNullOrEmpty(NusTag)) { writer.WriteAttributeString("nus", NusTag); } if (TobiInitialBoundaryTone != null) { writer.WriteAttributeString("tobiibt", TobiInitialBoundaryTone.ToString()); } if (!string.IsNullOrEmpty(ShallowParseTag)) { writer.WriteAttributeString("sp", ShallowParseTag); } string wordToneName = WordToneToString(WordTone); if (!string.IsNullOrEmpty(wordToneName)) { writer.WriteAttributeString("wt", wordToneName); } if (!string.IsNullOrEmpty(_tcgppScores)) { writer.WriteAttributeString("tcgppScore", _tcgppScores); } if (!string.IsNullOrEmpty(NETypeText)) { writer.WriteAttributeString("netype", NETypeText); } if (!string.IsNullOrEmpty(RegularText)) { writer.WriteAttributeString("regularText", RegularText.ToString()); } if (PronSource != DefaultPronSource && scriptContentController.SavePronSource) { string pronSourceName = PronSourceToString(PronSource); if (!string.IsNullOrEmpty(pronSourceName)) { writer.WriteAttributeString("pronSource", pronSourceName); } } if (WordType != WordType.Silence) { if (OffsetInString > 0) { writer.WriteAttributeString("offset", OffsetInString.ToString(CultureInfo.InvariantCulture)); } if (LengthInString > 0) { writer.WriteAttributeString("length", LengthInString.ToString(CultureInfo.InvariantCulture)); } } if (ProcessType != PType.NAN) { switch (ProcessType) { case PType.Word: writer.WriteAttributeString("processType", "word"); break; case PType.Spell: writer.WriteAttributeString("processType", "spell"); break; case PType.Expand: writer.WriteAttributeString("processType", "expand"); break; } } if (scriptContentController.SaveComments) { _ttsXmlComments.WriteToXml(writer); } // write syllables if (Syllables.Count != 0) { writer.WriteStartElement("syls"); foreach (ScriptSyllable syllable in Syllables) { syllable.WriteToXml(writer); } writer.WriteEndElement(); } if (HasAcousticsValue) { Acoustics.WriteToXml(writer); } writer.WriteEndElement(); }
/// <summary> /// Load script and check it. /// </summary> /// <param name="scriptFile">File to be loaded.</param> /// <param name="validateSetting">Validation data set.</param> /// <returns>Script loaded.</returns> public static XmlScriptFile LoadWithValidation(string scriptFile, XmlScriptValidateSetting validateSetting) { if (string.IsNullOrEmpty(scriptFile)) { throw new ArgumentNullException("scriptFile"); } if (validateSetting == null) { throw new ArgumentNullException("validateSetting"); } validateSetting.VerifySetting(); XmlScriptFile script = new XmlScriptFile(); script.Load(scriptFile); script.PhoneSet = validateSetting.PhoneSet; script.PosSet = validateSetting.PosSet; script.Validate(validateSetting); return script; }
/// <summary> /// Merge scripts in a folder into a script file. /// Error items are removed from the output file. /// </summary> /// <param name="scriptDir">Dir conataining script file.</param> /// <param name="errorSet">Error set.</param> /// <param name="resetId">True means resetting id.</param> /// <param name="validateSetting">Validation setting.</param> /// <param name="contentController">Contenct controller.</param> /// <returns>Xml script file.</returns> public static XmlScriptFile MergeScripts(string scriptDir, ErrorSet errorSet, bool resetId, XmlScriptValidateSetting validateSetting, object contentController) { if (string.IsNullOrEmpty(scriptDir)) { throw new ArgumentNullException("scriptDir"); } if (errorSet == null) { throw new ArgumentNullException("errorSet"); } if (validateSetting == null) { throw new ArgumentNullException("validateSetting"); } if (!Directory.Exists(scriptDir)) { throw new DirectoryNotFoundException(scriptDir); } validateSetting.VerifySetting(); XmlScriptValidationScope scope = validateSetting.ValidationScope; string[] subFiles = Directory.GetFiles( scriptDir, "*" + XmlScriptFile.Extension, SearchOption.AllDirectories); XmlScriptFile mergedScript = new XmlScriptFile(); long id = 0; foreach (string file in subFiles) { XmlScriptFile script = new XmlScriptFile(); script.Load(file, contentController); if (mergedScript.Language == Language.Neutral) { mergedScript.Language = script.Language; } else if (mergedScript.Language != script.Language) { throw new InvalidDataException(Helper.NeutralFormat("Inconsistent langage in {0}", file)); } if (scope != XmlScriptValidationScope.None) { script.PosSet = validateSetting.PosSet; script.PhoneSet = validateSetting.PhoneSet; script.Validate(validateSetting); script.Remove(GetNeedDeleteItemIds(script.ErrorSet)); } errorSet.Merge(script.ErrorSet); foreach (ScriptItem item in script.Items) { item.Id = resetId ? Helper.NeutralFormat("{0:D10}", ++id) : item.Id; ErrorSet addErrors = new ErrorSet(); if (!mergedScript.Add(item, addErrors, false)) { // Added failed errorSet.Merge(addErrors); } } } return mergedScript; }
/// <summary> /// Extracts features from the given script. /// </summary> /// <param name="script"> /// The xml script file. /// </param> /// <param name="fileListMap"> /// The file list map. /// </param> /// <param name="alignmentDir"> /// The alignment directory. /// </param> /// <param name="waveDir"> /// The wave directory. /// </param> /// <returns> /// The extracted features in training sentence set. /// </returns> /// <exception cref="ArgumentNullException"> /// Exception. /// </exception> public TrainingSentenceSet Extract(XmlScriptFile script, FileListMap fileListMap, string alignmentDir, string waveDir) { if (script == null) { throw new ArgumentNullException("script"); } if (fileListMap == null) { throw new ArgumentNullException("fileListMap"); } if (alignmentDir == null) { throw new ArgumentNullException("alignmentDir"); } if (waveDir == null) { throw new ArgumentNullException("waveDir"); } TrainingSentenceSet sentenceSet = new TrainingSentenceSet { FileListMap = fileListMap }; List<string> errList = new List<string>(); foreach (string sid in fileListMap.Map.Keys) { ScriptItem item = script.ItemDic[sid]; try { // Loads the segmentation file. SegmentFile segmentFile = new SegmentFile(); segmentFile.Load(fileListMap.BuildPath(alignmentDir, sid, "txt")); // Loads the waveform file to set the end time of the last segmentation. WaveFile waveFile = new WaveFile(); waveFile.Load(fileListMap.BuildPath(waveDir, sid, FileExtensions.Waveform)); segmentFile.WaveSegments[segmentFile.WaveSegments.Count - 1].EndTime = waveFile.Duration; // Extracts the single script item. Sentence sentence = Extract(item, segmentFile); sentence.TrainingSet = sentenceSet; sentenceSet.Sentences.Add(sid, sentence); } catch (Exception e) { if (!(e is InvalidDataException)) { throw; } // Removes the error sentences. Logger.Log(Helper.BuildExceptionMessage(e)); script.Remove(sid); errList.Add(sid); } } fileListMap.RemoveItems(errList); return sentenceSet; }
/// <summary> /// Remove all slice boundaries in the source script file /// And save it to target script file. /// </summary> /// <param name="sourceScriptFilePath">Source script file.</param> /// <param name="targetScriptFilePath">Target script file.</param> /// <returns>Data error set found.</returns> public static ErrorSet RemoveSliceBoundary(string sourceScriptFilePath, string targetScriptFilePath) { // Parameters validation if (string.IsNullOrEmpty(sourceScriptFilePath)) { throw new ArgumentNullException("sourceScriptFilePath"); } if (string.IsNullOrEmpty(targetScriptFilePath)) { throw new ArgumentNullException("targetScriptFilePath"); } XmlScriptFile script = new XmlScriptFile(); // Keep comments in XmlScript file XmlScriptFile.ContentControler controler = new XmlScriptFile.ContentControler(); controler.LoadComments = true; script.Load(sourceScriptFilePath, controler); foreach (ScriptItem item in script.Items) { foreach (ScriptWord word in item.AllWords) { if (!string.IsNullOrEmpty(word.Pronunciation)) { word.Pronunciation = Pronunciation.RemoveUnitBoundary(word.Pronunciation); } } } // Save comments in XmlScript file script.Save(targetScriptFilePath, Encoding.Unicode); return script.ErrorSet; }
/// <summary> /// Extract acoustic features for a given sentence. /// </summary> /// <param name="writer">Stream writer to write acoustic features.</param> /// <param name="script">Script file instance.</param> /// <param name="sid">Script item id.</param> /// <param name="phoneme">Phoneme used to get units.</param> /// <param name="sliceData">Slice data used to get units.</param> /// <param name="fileMap">File list map.</param> /// <param name="segmentDir">Segmentation file directory.</param> /// <param name="wave16kDir">16k Hz waveform file directory.</param> /// <param name="epochDir">Epoch file directory.</param> private static void ExtractAcoustic(StreamWriter writer, XmlScriptFile script, string sid, Phoneme phoneme, SliceData sliceData, FileListMap fileMap, string segmentDir, string wave16kDir, string epochDir) { ScriptItem scriptItem = script.ItemDic[sid]; // find the absolute file paths for each kind data file string wave16kFilePath = Path.Combine(wave16kDir, fileMap.Map[scriptItem.Id] + ".wav"); string epochFilePath = Path.Combine(epochDir, fileMap.Map[scriptItem.Id] + ".epoch"); string segmentFilePath = Path.Combine(segmentDir, fileMap.Map[scriptItem.Id] + ".txt"); // load data files SegmentFile segFile = new SegmentFile(); segFile.Load(segmentFilePath); EggAcousticFeature eggFile = new EggAcousticFeature(); eggFile.LoadEpoch(epochFilePath); WaveAcousticFeature waveFile = new WaveAcousticFeature(); waveFile.Load(wave16kFilePath); // calculate acoustic features for each segments in the files int totalCount = segFile.NonSilenceWaveSegments.Count; Collection<TtsUnit> units = scriptItem.GetUnits(phoneme, sliceData); if (units.Count != totalCount) { string str1 = "Unit number mis-matched between sentence [{0}] in "; string str2 = "script file [{1}] and in the alignment file [{2}]. "; string str3 = "There are {3} units in script but {4} units in alignment."; string message = string.Format(CultureInfo.InvariantCulture, str1 + str2 + str3, sid, script.FilePath, segmentFilePath, units.Count, totalCount); throw new InvalidDataException(message); } for (int i = 0; i < totalCount; i++) { // for each wave segment WaveSegment ws = segFile.NonSilenceWaveSegments[i]; // get unit sample scope int sampleOffset = (int)(ws.StartTime * waveFile.SamplesPerSecond); int sampleLength = (int)(ws.Duration * waveFile.SamplesPerSecond); int sampleEnd = sampleOffset + sampleLength; int epochOffset = 0; int epochEnd = 0; // calculate average pitch, pitch average float averagePitch, pitchRange; eggFile.GetPitchAndRange(sampleOffset, sampleLength, out averagePitch, out pitchRange); ws.AveragePitch = averagePitch; ws.PitchRange = pitchRange; // calculate root mean square, and before that ajust the segment alignment with // the epoch data epochOffset = eggFile.AdjustAlignment(ref sampleOffset); epochEnd = eggFile.AdjustAlignment(ref sampleEnd); if (epochOffset > epochEnd) { string info = string.Format(CultureInfo.InvariantCulture, "epochOffset[{0}] should not be bigger than epochEnd[{1}]", epochOffset, epochEnd); throw new InvalidDataException(info); } if (sampleEnd > waveFile.SampleNumber) { string str1 = "Mis-match found between alignment file [{0}] and waveform file [{1}], "; string str2 = "for the end sample of alignment is [{2}] but"; string str3 = " the total sample number of waveform file is [{3}]."; string info = string.Format(CultureInfo.InvariantCulture, str1 + str2 + str3, segmentFilePath, wave16kFilePath, epochEnd, waveFile.SampleNumber); throw new InvalidDataException(info); } ws.RootMeanSquare = waveFile.CalculateRms(sampleOffset, sampleEnd - sampleOffset); // calculate epoch int epoch16KCompressLength = EpochFile.CompressEpoch(eggFile.Epoch, epochOffset, epochEnd - epochOffset, null); int epoch8KCompressLength = EpochFile.CompressEpoch(eggFile.Epoch8k, epochOffset, epochEnd - epochOffset, null); // leave (epoch offset in sentence) (epoch length) // (16k compressed epoch lenght) (8k compressed epoch lenght) as zero string message = string.Format(CultureInfo.InvariantCulture, "{0,12} {1,3} {2,9:0.000000} {3,9:0.000000} {4,7} {5,5} {6,4} {7,3} {8,3} {9,3} {10,7:0.0} {11,5:0.0} {12,4:0.0} {13}", scriptItem.Id, i, ws.StartTime, ws.Duration, sampleOffset, sampleEnd - sampleOffset, epochOffset, epochEnd - epochOffset, epoch16KCompressLength, epoch8KCompressLength, ws.RootMeanSquare, ws.AveragePitch, ws.PitchRange, units[i].FullName); writer.WriteLine(message); } }
public void ParseConfig(XmlElement config) { if (config == null) { throw new ArgumentNullException("config"); } Debug.Assert(!string.IsNullOrEmpty(config.GetAttribute("language"))); Debug.Assert(!string.IsNullOrEmpty(config.GetAttribute("engine"))); _primaryLanguage = Localor.StringToLanguage( config.GetAttribute("language")); _engineType = (EngineType)Enum.Parse(typeof(EngineType), config.GetAttribute("engine")); XmlElement eleLangData = config.SelectSingleNode("languageData") as XmlElement; VoiceCreationLanguageData languageData = new VoiceCreationLanguageData(); if (eleLangData != null) { languageData.ParseLanguageDataFromXmlElement(true, eleLangData); languageData.SetLanguageData(_primaryLanguage); } else { languageData.CartQuestions = config.SelectSingleNode("question/@path").InnerText; } _voiceName = config.GetAttribute("voiceName"); _tokenId = config.GetAttribute("tokenId"); _fontPath = config.SelectSingleNode("font/@path").InnerText; TtsPhoneSet phoneSet = Localor.GetPhoneSet(_primaryLanguage); XmlScriptValidateSetting validateSetting = new XmlScriptValidateSetting(phoneSet, null); _scriptFile = XmlScriptFile.LoadWithValidation(config.SelectSingleNode("script/@path").InnerText, validateSetting); _scriptFile.Remove(ScriptHelper.GetNeedDeleteItemIds(_scriptFile.ErrorSet)); FileMap = new FileListMap(); FileMap.Load(config.SelectSingleNode("filemap/@path").InnerText); _weightTable = new WeightTable(_primaryLanguage, _engineType); _weightTable.Load(config.SelectSingleNode("weighttable/@path").InnerText); _cartTreeManager = new CartTreeManager(); _cartTreeManager.CartTreeDir = config.SelectSingleNode("treedir/@path").InnerText; if (!Directory.Exists(_cartTreeManager.CartTreeDir)) { string message = string.Format(CultureInfo.InvariantCulture, "The treeDir path does not exist at [{0}]", _cartTreeManager.CartTreeDir); throw new DirectoryNotFoundException(message); } _cartTreeManager.CartQuestionFile = languageData.CartQuestions; if (!File.Exists(_cartTreeManager.CartQuestionFile)) { string message = string.Format(CultureInfo.InvariantCulture, "The tree question file path does not exist at [{0}]", _cartTreeManager.CartQuestionFile); throw new DirectoryNotFoundException(message); } _cartTreeManager.UnitDescriptFile = config.SelectSingleNode("unitdescript/@path").InnerText; if (!File.Exists(_cartTreeManager.UnitDescriptFile)) { string message = string.Format(CultureInfo.InvariantCulture, "The unit description file path does not exist at [{0}]", _cartTreeManager.UnitDescriptFile); throw new DirectoryNotFoundException(message); } _unitFeatureFilePath = config.SelectSingleNode("wavesequence/@path").InnerText; if (!File.Exists(_unitFeatureFilePath)) { string message = string.Format(CultureInfo.InvariantCulture, "The wave sequence file path does not exist at [{0}]", _unitFeatureFilePath); throw new DirectoryNotFoundException(message); } _wave16kDirectories.Clear(); foreach (XmlNode dirNode in config.SelectNodes("wave16k/@path")) { string waveDir = dirNode.InnerText.Trim(); if (!Directory.Exists(waveDir)) { string message = string.Format(CultureInfo.InvariantCulture, "The wave16k path does not exist at [{0}]", waveDir); throw new DirectoryNotFoundException(message); } _wave16kDirectories.Add(waveDir); } _segmentDirectories.Clear(); foreach (XmlNode dirNode in config.SelectNodes("segment/@path")) { string alignmentDir = dirNode.InnerText.Trim(); if (!Directory.Exists(alignmentDir)) { string message = string.Format(CultureInfo.InvariantCulture, "The alignment path does not exist at [{0}]", alignmentDir); throw new DirectoryNotFoundException(message); } _segmentDirectories.Add(alignmentDir); } }
/// <summary> /// Checks whether the data contains -sil- in within a word. /// </summary> /// <param name="script">Script file instance.</param> /// <param name="item">Script item.</param> /// <param name="fileMap">File list map.</param> /// <param name="segmentDir">Segment file directory.</param> /// <param name="errorSet">Data error set found.</param> public static void ValidateSilenceInWord(XmlScriptFile script, ScriptItem item, FileListMap fileMap, string segmentDir, ErrorSet errorSet) { string segmentFilePath = Path.Combine(segmentDir, fileMap.Map[item.Id] + ".txt"); StringBuilder errorMessage = new StringBuilder(); SegmentFile segmentFile = ValidateAlignmentFile(segmentFilePath, errorMessage); if (errorMessage.Length != 0) { errorSet.Add(ScriptError.OtherErrors, item.Id, errorMessage.ToString()); } else { int indexOfSegment = 0; foreach (ScriptWord word in item.AllPronouncedNormalWords) { ErrorSet errors = new ErrorSet(); Collection<string> phones = word.GetNormalPhoneNames(script.PhoneSet, errors); if (errors.Count > 0) { errorSet.Merge(errors); break; } if (segmentFile.WaveSegments[indexOfSegment].IsSilenceFeature) { ++indexOfSegment; } for (int i = 0; i < phones.Count; ++i, ++indexOfSegment) { if (segmentFile.WaveSegments[indexOfSegment].IsSilenceFeature) { errorSet.Add(ScriptError.OtherErrors, item.Id, Helper.NeutralFormat("Alignment file {0} is invalid, for silence in word {1}.", segmentFilePath, word.Grapheme)); ++indexOfSegment; } if (segmentFile.WaveSegments[indexOfSegment].Label != phones[i]) { string message = string.Format(CultureInfo.InvariantCulture, "phone [{0}/{1}] at {2} does not match between script and segment.", WaveSegment.FormatLabel(phones[i]), segmentFile.WaveSegments[indexOfSegment].Label, i); errorSet.Add(ScriptError.OtherErrors, item.Id, message); } } } } }
/// <summary> /// Check phone based data consistence between script item and segmentation file. /// </summary> /// <param name="script">Script file instance.</param> /// <param name="item">Script item.</param> /// <param name="fileMap">File list map.</param> /// <param name="segmentDir">Segment file directory.</param> /// <param name="errorSet">Data error set found.</param> public static void ValidateDataAlignment(XmlScriptFile script, ScriptItem item, FileListMap fileMap, string segmentDir, ErrorSet errorSet) { string segmentFilePath = Path.Combine(segmentDir, fileMap.Map[item.Id] + ".txt"); StringBuilder errorMessage = new StringBuilder(); SegmentFile segmentFile = ValidateAlignmentFile(segmentFilePath, errorMessage); if (errorMessage.Length != 0) { errorSet.Add(ScriptError.OtherErrors, item.Id, errorMessage.ToString()); } else { ErrorSet errors = new ErrorSet(); Collection<string> phones = item.GetNormalPhoneNames(script.PhoneSet, errors); errorSet.Merge(errors); if (segmentFile.WaveSegments.Count == 0) { string message = Helper.NeutralFormat( "There is no valid alignment data in alignment file {0}.", segmentFilePath); errorSet.Add(ScriptError.OtherErrors, item.Id, message); } else if (!segmentFile.WaveSegments[segmentFile.WaveSegments.Count - 1].IsSilencePhone) { string message = Helper.NeutralFormat( "Alignment file {0} is invalid, for without silence segment at the end.", segmentFilePath); errorSet.Add(ScriptError.OtherErrors, item.Id, message); } else if (phones.Count != segmentFile.NonSilenceWaveSegments.Count) { string message = Helper.NeutralFormat( "units number {0} in script file does not equal to non-silence " + "segments number {1} in segmentation file.", phones.Count, segmentFile.NonSilenceWaveSegments.Count); errorSet.Add(ScriptError.OtherErrors, item.Id, message); } else { // go through each segments for (int i = 0; i < segmentFile.NonSilenceWaveSegments.Count; i++) { WaveSegment segment = segmentFile.NonSilenceWaveSegments[i]; if (segment.Label != phones[i]) { string message = string.Format(CultureInfo.InvariantCulture, "phone [{0}/{1}] at {2} does not match between script and segment.", WaveSegment.FormatLabel(phones[i]), segment.Label, i); errorSet.Add(ScriptError.OtherErrors, item.Id, message); } } } } }
/// <summary> /// Load all the script items from a folder /// Note: Here don't validate the content, But duplicate item ID is not allowed. /// </summary> /// <param name="sourceDir">Script dir.</param> /// <param name="errors">Errors happened.</param> /// <returns>Loaded items collection.</returns> public static Collection<ScriptItem> LoadScriptsWithoutValidation(string sourceDir, ErrorSet errors) { if (string.IsNullOrEmpty(sourceDir)) { throw new ArgumentNullException("sourceDir"); } if (errors == null) { throw new ArgumentNullException("errors"); } Collection<ScriptItem> items = new Collection<ScriptItem>(); Dictionary<string, string> ids = new Dictionary<string, string>(); string pattern = @"*" + XmlScriptFile.Extension; Language language = Language.Neutral; foreach (string file in Directory.GetFiles(sourceDir, pattern, SearchOption.AllDirectories)) { XmlScriptFile script = new XmlScriptFile(); XmlScriptFile.ContentControler controler = new XmlScriptFile.ContentControler(); controler.LoadComments = true; script.Load(file, controler); if (language == Language.Neutral) { language = script.Language; } else if (language != script.Language) { throw new InvalidDataException(Helper.NeutralFormat( "The language name in File [{0}] is different from other files.", file)); } errors.Merge(script.ErrorSet); foreach (ScriptItem item in script.Items) { if (ids.ContainsKey(item.Id)) { errors.Add(ScriptError.DuplicateItemId, item.Id); } else { item.ScriptFile = null; items.Add(item); } } } return items; }
/// <summary> /// Convert XML script to two-line script. /// </summary> /// <param name="xmlScript">Input XML script.</param> /// <param name="targetFile">Output script.</param> /// <param name="phoneSet"> /// Phone set used to convert pronunciation /// It can be null when you can directly get the word's pronunciation in the word's attribute. /// </param> /// <returns>Errors happened.</returns> public static ErrorSet ConvertXmlScriptToTwoLineScript(string xmlScript, string targetFile, TtsPhoneSet phoneSet) { if (string.IsNullOrEmpty(xmlScript)) { throw new ArgumentNullException("xmlScript"); } if (string.IsNullOrEmpty(targetFile)) { throw new ArgumentNullException("targetFile"); } if (!Directory.Exists(Path.GetDirectoryName(targetFile))) { throw new DirectoryNotFoundException(targetFile); } ErrorSet errorSet = new ErrorSet(); XmlScriptFile script = new XmlScriptFile(); script.Load(xmlScript); ScriptFile oldScript = new ScriptFile(script.Language); foreach (ScriptItem item in script.Items) { ErrorSet itemErrors = new ErrorSet(); ScriptItem oldItem = ConvertScriptItemToTwoLineFormat(item, phoneSet, itemErrors); if (itemErrors.Count != 0) { errorSet.Merge(itemErrors); } else { oldScript.Items.Add(oldItem.Id, oldItem); } } oldScript.Save(targetFile, true, true); return errorSet; }
/// <summary> /// Convert two-line script to XML script. /// </summary> /// <param name="twoLineScript">Input two-line script.</param> /// <param name="targetFile">Output script.</param> /// <param name="language">Language.</param> /// <param name="inScriptWithoutPron">Whether input script without pronunciation.</param> /// <returns>Errors.</returns> public static ErrorSet ConvertTwoLineScriptToXmlScript(string twoLineScript, string targetFile, Language language, bool inScriptWithoutPron) { if (string.IsNullOrEmpty(twoLineScript)) { throw new ArgumentNullException("twoLineScript"); } if (string.IsNullOrEmpty(targetFile)) { throw new ArgumentNullException("targetFile"); } if (!Directory.Exists(Path.GetDirectoryName(targetFile))) { throw new DirectoryNotFoundException(targetFile); } ErrorSet errorSet = new ErrorSet(); Collection<ScriptItem> items = new Collection<ScriptItem>(); DataErrorSet errors = ScriptFile.ReadAllData(twoLineScript, items, !inScriptWithoutPron, true); if (errors.Errors.Count > 0) { foreach (DataError error in errors.Errors) { if (!string.IsNullOrEmpty(error.SentenceId)) { errorSet.Add(ScriptError.OtherErrors, error.SentenceId, error.ToString()); } } } XmlScriptFile script = new XmlScriptFile(language); foreach (ScriptItem item in items) { ErrorSet itemErrors = new ErrorSet(); ScriptItem newItem = ConvertScriptItemToXmlFormat(item, inScriptWithoutPron, itemErrors); if (itemErrors.Count != 0) { errorSet.Merge(itemErrors); } else { script.Items.Add(newItem); } } script.Save(targetFile, Encoding.Unicode); return errorSet; }
/// <summary> /// Slice the pronunciation of each script item in the script file. /// </summary> /// <param name="script">Script file to slice.</param> /// <returns>Data error found during the slicing.</returns> public ErrorSet Slice(XmlScriptFile script) { if (script == null) { throw new ArgumentNullException("script"); } ErrorSet errorSet = new ErrorSet(); foreach (ScriptItem entry in script.Items) { try { Slice(entry); } catch (InvalidDataException ide) { string message = Helper.NeutralFormat("Error in item {0} of file {1}: {2}", entry.Id, script.FilePath, Helper.BuildExceptionMessage(ide)); errorSet.Add(ScriptError.OtherErrors, entry.Id, message); } } return errorSet; }
/// <summary> /// Slice the pronunciation of one script file. /// </summary> /// <param name="scriptFilePath">Source file.</param> /// <param name="targetFilePath">Target file.</param> /// <returns>Data error set found.</returns> public ErrorSet Slice(string scriptFilePath, string targetFilePath) { XmlScriptFile script = new XmlScriptFile(); XmlScriptFile.ContentControler controler = new XmlScriptFile.ContentControler(); controler.LoadComments = true; script.Load(scriptFilePath, controler); ErrorSet errorSet = Slice(script); script.Save(targetFilePath); errorSet.Merge(script.ErrorSet); return errorSet; }
/// <summary> /// Check data file consistence between script file and filemap file. /// </summary> /// <param name="fileMap">File list map.</param> /// <param name="script">Script file instance.</param> /// <returns>Data error set found.</returns> public static ErrorSet ValidateDataAlignment(FileListMap fileMap, XmlScriptFile script) { // Parameters validation if (fileMap == null) { throw new ArgumentNullException("fileMap"); } if (fileMap.Map == null) { throw new ArgumentException("fileMap.Map is null"); } if (fileMap.Map.Keys == null) { throw new ArgumentException("fileMap.Map.Keys is null"); } if (string.IsNullOrEmpty(fileMap.FilePath)) { throw new ArgumentException("fileMap.FilePath is null"); } if (script == null) { throw new ArgumentNullException("script"); } if (string.IsNullOrEmpty(script.FilePath)) { throw new ArgumentException("script.FilePath is null"); } ErrorSet errorSet = new ErrorSet(); // go through sentence ids listed in the filemap first foreach (string sid in fileMap.Map.Keys) { if (!script.ItemDic.ContainsKey(sid)) { string message = string.Format(CultureInfo.InvariantCulture, "Sentence [{0}] is found in the filemap [{1}], but not listed in script file [{2}].", sid, fileMap.FilePath, script.FilePath); errorSet.Add(ScriptError.OtherErrors, sid, message); } } // check sentence ids in the script file foreach (string sid in script.ItemDic.Keys) { if (!fileMap.Map.ContainsKey(sid)) { string message = string.Format(CultureInfo.InvariantCulture, "Sentence [{0}] is found in the script [{1}], but not listed in filemap [{2}].", sid, script.FilePath, fileMap.FilePath); errorSet.Add(ScriptError.OtherErrors, sid, message); } } return errorSet; }
/// <summary> /// Writes named entity to xml. /// </summary> /// <param name="writer">XmlWriter instance to performance writing.</param> /// <param name="scriptContentController">XmlScriptFile.ContentControler.</param> public void WriteToXml(XmlWriter writer, XmlScriptFile.ContentControler scriptContentController) { if (writer == null) { throw new ArgumentNullException("writer"); } if (scriptContentController == null) { throw new ArgumentNullException("scriptContentController"); } if (Start.Sentence != End.Sentence) { throw new InvalidDataException(Helper.NeutralFormat("Named entity should not be cross sentences boundary.")); } writer.WriteStartElement("ne"); Validate(); Text = Start.Sentence.TextWords.Skip(StartIndex).Take(EndIndex - StartIndex + 1) .Select(w => w.Grapheme).Concatenate(" "); if (string.IsNullOrEmpty(Text)) { throw new InvalidDataException(Helper.NeutralFormat("Does not support empty-grapheme named entity.")); } if (string.IsNullOrEmpty(Type)) { throw new InvalidDataException(Helper.NeutralFormat("Does not support null typed named entity.")); } writer.WriteAttributeString("s", StartIndex.ToString(CultureInfo.InvariantCulture)); writer.WriteAttributeString("e", EndIndex.ToString(CultureInfo.InvariantCulture)); writer.WriteAttributeString("type", Type); writer.WriteAttributeString("v", Text); if (!string.IsNullOrEmpty(PosString) && !PosString.Equals(DefaultEmptyPosString, StringComparison.OrdinalIgnoreCase)) { writer.WriteAttributeString("pos", PosString); } writer.WriteEndElement(); }
/// <summary> /// Checks whether the data contains -sil- in within a word. /// </summary> /// <param name="fileMap">File list map.</param> /// <param name="script">Script file instance.</param> /// <param name="segmentDir">Segment file directory.</param> /// <returns>Data error set found.</returns> public static ErrorSet ValidateSilenceInWord(FileListMap fileMap, XmlScriptFile script, string segmentDir) { // Parameters validation if (string.IsNullOrEmpty(segmentDir)) { throw new ArgumentNullException("segmentDir"); } if (fileMap == null) { throw new ArgumentNullException("fileMap"); } if (fileMap.Map == null) { throw new ArgumentException("fileMap.Map is null"); } if (fileMap.Map.Keys == null) { throw new ArgumentException("fileMap.Map.Keys is null"); } if (script == null) { throw new ArgumentNullException("script"); } ErrorSet errorSet = new ErrorSet(); foreach (ScriptItem item in script.Items) { try { if (!fileMap.Map.ContainsKey(item.Id)) { errorSet.Add(ScriptError.OtherErrors, item.Id, "File list map does not contain item"); continue; } ValidateSilenceInWord(script, item, fileMap, segmentDir, errorSet); } catch (InvalidDataException ide) { errorSet.Add(ScriptError.OtherErrors, item.Id, Helper.BuildExceptionMessage(ide)); } } foreach (string sid in fileMap.Map.Keys) { if (!script.ItemDic.ContainsKey(sid)) { errorSet.Add(ScriptError.OtherErrors, sid, "script file does not contain item"); } } return errorSet; }
/// <summary> /// Extract acoustic features for a given script file. /// </summary> /// <param name="script">Script file instance.</param> /// <param name="phoneme">Phoneme used to get units.</param> /// <param name="sliceData">Slice data used to get units.</param> /// <param name="fileMap">File list map.</param> /// <param name="segmentDir">Segmentation file directory.</param> /// <param name="wave16kDir">16k Hz waveform file directory.</param> /// <param name="epochDir">Epoch file directory.</param> /// <param name="targetFilePath">Target acoustic file path.</param> public static void ExtractAcoustic(XmlScriptFile script, Phoneme phoneme, SliceData sliceData, FileListMap fileMap, string segmentDir, string wave16kDir, string epochDir, string targetFilePath) { // Parameters validation if (script == null) { throw new ArgumentNullException("script"); } if (string.IsNullOrEmpty(script.FilePath)) { throw new ArgumentException("script.FilePath is null"); } if (fileMap == null) { throw new ArgumentNullException("fileMap"); } if (fileMap.Map == null) { throw new ArgumentException("fileMap.Map"); } if (fileMap.Map.Keys == null) { throw new ArgumentException("fileMap.Map.Keys is null"); } if (string.IsNullOrEmpty(segmentDir)) { throw new ArgumentNullException("segmentDir"); } if (string.IsNullOrEmpty(wave16kDir)) { throw new ArgumentNullException("wave16kDir"); } if (string.IsNullOrEmpty(epochDir)) { throw new ArgumentNullException("epochDir"); } if (!Directory.Exists(segmentDir)) { throw Helper.CreateException(typeof(DirectoryNotFoundException), segmentDir); } if (!Directory.Exists(wave16kDir)) { throw Helper.CreateException(typeof(DirectoryNotFoundException), wave16kDir); } if (!Directory.Exists(epochDir)) { throw Helper.CreateException(typeof(DirectoryNotFoundException), epochDir); } if (string.IsNullOrEmpty(targetFilePath)) { throw new ArgumentNullException("targetFilePath"); } Helper.EnsureFolderExistForFile(targetFilePath); using (StreamWriter sw = new StreamWriter(targetFilePath)) { // iterate each script item or sentence foreach (string sid in fileMap.Map.Keys) { if (!script.ItemDic.ContainsKey(sid)) { string message = string.Format(CultureInfo.InvariantCulture, "Sentence [{0}] does not exist in script file [{1}].", sid, script.FilePath); throw new InvalidDataException(message); } ExtractAcoustic(sw, script, sid, phoneme, sliceData, fileMap, segmentDir, wave16kDir, epochDir); } } }
/// <summary> /// Create domain index file. /// </summary> /// <param name="scriptFile">Script file.</param> /// <param name="domainList">Domain list.</param> /// <param name="uif">Name indexed unit features.</param> public void Create(XmlScriptFile scriptFile, DomainConfigList domainList, UnitIndexingFile uif) { // Parameters Validation if (scriptFile == null) { throw new ArgumentNullException("scriptFile"); } if (domainList == null) { throw new ArgumentNullException("domainList"); } if (uif == null) { throw new ArgumentNullException("uif"); } Dictionary<string, DomainIndexItem> items = new Dictionary<string, DomainIndexItem>(StringComparer.Ordinal); _language = scriptFile.Language; _tag = domainList.FontTag; Phoneme phoneme = Localor.GetPhoneme(_language); SliceData sliceData = Localor.GetSliceData(_language); foreach (ScriptItem scriptItem in scriptFile.Items) { if (!domainList.Contains(scriptItem.Id)) { continue; } Collection<TtsUnit> itemUnits = scriptItem.GetUnits(phoneme, sliceData); Collection<ScriptWord> allPronouncedNormalWords = scriptItem.AllPronouncedNormalWords; for (int i = 0; i < allPronouncedNormalWords.Count; i++) { ScriptWord word = allPronouncedNormalWords[i]; string text; if (domainList.Domain == ScriptDomain.Number) { text = GetNumberDomainWordText(word, scriptItem.Id, i, (domainList as NumberDomainConfigList).Digitals); } else if (domainList.Domain == ScriptDomain.Acronym) { text = GetAcronymDomainWordText(word, scriptItem.Id, i, (domainList as AcronymDomainConfigList).Acronyms); } else if (domainList.Domain == ScriptDomain.Letter) { // Use pronunciation phone ids as key text = GetPhoneIds(word); } else { text = word.Grapheme.ToUpperInvariant(); } if (items.ContainsKey(text) && domainList.Domain != ScriptDomain.Letter) { // Skip duplicate word, except Letter domain continue; } DomainIndexItem item = null; if (!items.ContainsKey(text)) { item = new DomainIndexItem(); item.Word = text; } else { item = items[text]; } bool skipped = false; Collection<TtsUnit> wordUnits = word.GetUnits(phoneme, sliceData); for (int wordUnitIndex = 0; wordUnitIndex < wordUnits.Count; wordUnitIndex++) { TtsUnit unit = wordUnits[wordUnitIndex]; FeatureDataItem featureItem = new FeatureDataItem(); int indexOfNonSilence = itemUnits.IndexOf(unit); Debug.Assert(indexOfNonSilence >= 0 && indexOfNonSilence < itemUnits.Count); int unitOffset = uif.SearchCandidateOffset(unit.MetaUnit.Name, scriptItem.Id, (uint)indexOfNonSilence); if (unitOffset == -1) { // Skip this word skipped = true; break; } if (item.FeatureItems.Count == wordUnitIndex) { featureItem.UnitIndexes.Add(unitOffset); item.FeatureItems.Add(featureItem); // [].UnitIndexes.Add(unitOffset); } else { item.FeatureItems[wordUnitIndex].UnitIndexes.Add(unitOffset); } } if (!skipped && !items.ContainsKey(item.Word)) { items.Add(item.Word, item); } } } _items = BuildHashTable(items.Values); }