/// <summary> /// Checks whether the data contains -sil- in within a word. /// </summary> /// <param name="script">Script file instance.</param> /// <param name="item">Script item.</param> /// <param name="fileMap">File list map.</param> /// <param name="segmentDir">Segment file directory.</param> /// <param name="errorSet">Data error set found.</param> public static void ValidateSilenceInWord(XmlScriptFile script, ScriptItem item, FileListMap fileMap, string segmentDir, ErrorSet errorSet) { string segmentFilePath = Path.Combine(segmentDir, fileMap.Map[item.Id] + ".txt"); StringBuilder errorMessage = new StringBuilder(); SegmentFile segmentFile = ValidateAlignmentFile(segmentFilePath, errorMessage); if (errorMessage.Length != 0) { errorSet.Add(ScriptError.OtherErrors, item.Id, errorMessage.ToString()); } else { int indexOfSegment = 0; foreach (ScriptWord word in item.AllPronouncedNormalWords) { ErrorSet errors = new ErrorSet(); Collection<string> phones = word.GetNormalPhoneNames(script.PhoneSet, errors); if (errors.Count > 0) { errorSet.Merge(errors); break; } if (segmentFile.WaveSegments[indexOfSegment].IsSilenceFeature) { ++indexOfSegment; } for (int i = 0; i < phones.Count; ++i, ++indexOfSegment) { if (segmentFile.WaveSegments[indexOfSegment].IsSilenceFeature) { errorSet.Add(ScriptError.OtherErrors, item.Id, Helper.NeutralFormat("Alignment file {0} is invalid, for silence in word {1}.", segmentFilePath, word.Grapheme)); ++indexOfSegment; } if (segmentFile.WaveSegments[indexOfSegment].Label != phones[i]) { string message = string.Format(CultureInfo.InvariantCulture, "phone [{0}/{1}] at {2} does not match between script and segment.", WaveSegment.FormatLabel(phones[i]), segmentFile.WaveSegments[indexOfSegment].Label, i); errorSet.Add(ScriptError.OtherErrors, item.Id, message); } } } } }
/// <summary> /// Add one item to script file. /// This method will check whether the item is balid before adding. /// </summary> /// <param name="item">The item to be added.</param> /// <param name="errors">The errors if failed to add.</param> /// <param name="validate">Whether validate schema and content.</param> /// <param name="sort">Whether insert the script item in the sort position.</param> /// <returns>True if successfully added.</returns> public bool Add(ScriptItem item, ErrorSet errors, bool validate, bool sort) { if (item == null) { throw new ArgumentNullException("item"); } if (errors == null) { throw new ArgumentNullException("errors"); } // check schema, should throw exception if invalid CheckSchema(item); bool added = true; errors.Clear(); // content checking, should add to errors if invalid if (_itemDic.ContainsKey(item.Id)) { errors.Add(ScriptError.DuplicateItemId, item.Id); } if (validate) { ErrorSet contentErrors = new ErrorSet(); XmlScriptValidateSetting validateSetting = new XmlScriptValidateSetting(PhoneSet, PosSet); ScriptItem.IsValidItem(item, contentErrors, validateSetting); errors.Merge(contentErrors); } if (errors.Count > 0) { added = false; } if (added) { _itemDic.Add(item.Id, item); if (sort) { bool inserted = false; for (int i = 0; i < _items.Count; i++) { if (string.Compare(item.Id, _items[i].Id, StringComparison.OrdinalIgnoreCase) < 0) { _items.Insert(i, item); inserted = true; break; } } if (!inserted) { _items.Add(item); } } else { _items.Add(item); } } return added; }
/// <summary> /// Find unmatching sentences between filemap file and script file /// <param /> /// This function should be merged with that in forcedalignment into common library. /// </summary> /// <param name="scriptFilePath">The location of script file.</param> /// <param name="language">Language of the script file.</param> /// <param name="mapFilePath">The location of file fist map path.</param> /// <returns>Unmatching sentence ids.</returns> public static ErrorSet FindUnmatchedSentences(string scriptFilePath, Language language, string mapFilePath) { ErrorSet errorSet = new ErrorSet(); TtsPhoneSet phoneSet = Localor.GetPhoneSet(language); XmlScriptValidateSetting validateSetting = new XmlScriptValidateSetting(phoneSet, null); XmlScriptFile script = XmlScriptFile.LoadWithValidation(scriptFilePath, validateSetting); script.Remove(ScriptHelper.GetNeedDeleteItemIds(script.ErrorSet)); Dictionary<string, string> map = Microsoft.Tts.Offline.FileListMap.ReadAllData(mapFilePath); errorSet.Merge(script.ErrorSet); foreach (string sid in script.ItemDic.Keys) { if (!map.ContainsKey(sid)) { string message = Helper.NeutralFormat( "Script item {0} in script file but not in file list map file", sid); errorSet.Add(ScriptError.OtherErrors, sid, message); } } foreach (string sid in map.Keys) { if (!script.ItemDic.ContainsKey(sid)) { string message = Helper.NeutralFormat( "Script item {0} in file list map file but not in script file", sid); errorSet.Add(ScriptError.OtherErrors, sid, message); } } return errorSet; }
/// <summary> /// Build mlf from script item. /// </summary> /// <param name="item">Script item.</param> /// <param name="sw">Text writer.</param> /// <param name="writeToFile">Whether writing to file.</param> /// <param name="phoneme">Phoneme.</param> /// <param name="sliceData">Slice data.</param> /// <returns>Errors.</returns> private static ErrorSet BuildMonoMlf(ScriptItem item, StreamWriter sw, bool writeToFile, Phoneme phoneme, SliceData sliceData) { Debug.Assert(item != null); Debug.Assert(phoneme != null); if (writeToFile && sw == null) { throw new ArgumentNullException("sw"); } Collection<ScriptWord> allPronouncedNormalWords = item.AllPronouncedNormalWords; ErrorSet errors = new ErrorSet(); if (allPronouncedNormalWords.Count == 0) { errors.Add(ScriptError.OtherErrors, item.Id, Helper.NeutralFormat("No pronounced normal word.")); } else { for (int i = 0; i < allPronouncedNormalWords.Count; i++) { ScriptWord word = allPronouncedNormalWords[i]; Debug.Assert(word != null); if (string.IsNullOrEmpty(word.Pronunciation)) { errors.Add(ScriptError.OtherErrors, item.Id, Helper.NeutralFormat("No pronunciation normal word '{1}' in script item {0}.", item.Id, word.Grapheme)); } } if (errors.Count == 0) { if (writeToFile) { sw.WriteLine("\"*/{0}.lab\"", item.Id); sw.WriteLine(Phoneme.SilencePhone); } for (int i = 0; i < allPronouncedNormalWords.Count; i++) { ScriptWord word = allPronouncedNormalWords[i]; Collection<TtsUnit> units = word.GetUnits(phoneme, sliceData); if (phoneme.Tts2srMapType == Phoneme.TtsToSrMappingType.PhoneBased) { foreach (TtsUnit unit in units) { errors.Merge(BuildMonoMlf(unit, item, sw, writeToFile, phoneme)); } } else if (phoneme.Tts2srMapType == Phoneme.TtsToSrMappingType.SyllableBased) { foreach (ScriptSyllable syllable in word.UnitSyllables) { errors.Merge(BuildMonoMlf(syllable, item, sw, writeToFile, phoneme)); } } if (writeToFile && i + 1 < allPronouncedNormalWords.Count) { sw.WriteLine(Phoneme.ShortPausePhone); } } if (writeToFile) { sw.WriteLine(Phoneme.SilencePhone); sw.WriteLine("."); // end of sentence } } } return errors; }
/// <summary> /// Load Lexicon Attribute Schema Data object. /// </summary> /// <param name="errorSet">ErrorSet.</param> /// <returns>Lexicon Attribute Schema Data object.</returns> internal override object LoadDataObject(ErrorSet errorSet) { if (errorSet == null) { throw new ArgumentNullException("errorSet"); } LexicalAttributeSchema schema = new LexicalAttributeSchema(); schema.Load(this.Path); schema.Validate(); errorSet.Merge(schema.ErrorSet); if (schema.ErrorSet.Contains(ErrorSeverity.MustFix)) { schema = null; } return schema; }
/// <summary> /// Load Char table Data object. /// </summary> /// <param name="errorSet">ErrorSet.</param> /// <returns>Char table Data object.</returns> internal override object LoadDataObject(ErrorSet errorSet) { if (errorSet == null) { throw new ArgumentNullException("errorSet"); } CharTable charTable = new CharTable(); charTable.Load(Path); errorSet.Merge(charTable.ErrorSet); return charTable; }
/// <summary> /// Compile the foreign LTS collection. /// </summary> /// <param name="configuration">Foreign LTS configuration.</param> /// <param name="outputStream">Output steam.</param> /// <returns>Error set.</returns> private ErrorSet CompileForeignLtsCollection(string configuration, Stream outputStream) { ErrorSet errorSet = new ErrorSet(); // The configuration is written in // "originLanguageA : phonesetA ; RuleA ; originLanguageB: phonesetB ; RuleB" string[] phonesetLtsList = configuration.Split(new char[] { ';' }, StringSplitOptions.RemoveEmptyEntries); ushort count = Convert.ToUInt16(phonesetLtsList.Length / 2); Offline.Language[] languages = new Offline.Language[count]; TtsPhoneSet[] phoneSets = new TtsPhoneSet[count]; string[] ltsPaths = new string[count]; // Load the phone sets for (ushort i = 0; i < count; i++) { languages[i] = Offline.Language.Neutral; string phoneSetPath = phonesetLtsList[i * 2].Trim(); int languageSeparatorIndex = phoneSetPath.IndexOf(":"); if (languageSeparatorIndex != -1) { string language = phoneSetPath.Substring(0, languageSeparatorIndex).Trim(); languages[i] = Localor.StringToLanguage(language); phoneSetPath = phoneSetPath.Substring(languageSeparatorIndex + 1, phoneSetPath.Length - languageSeparatorIndex - 1).Trim(); } if (!Path.IsPathRooted(phoneSetPath)) { phoneSetPath = Path.Combine(_dataHandlerList.DataRoot, phoneSetPath); } phoneSets[i] = new TtsPhoneSet(); phoneSets[i].Load(phoneSetPath); phoneSets[i].Validate(); if (languages[i] == Offline.Language.Neutral) { languages[i] = phoneSets[i].Language; } errorSet.Merge(phoneSets[i].ErrorSet); if (phoneSets[i].ErrorSet.Contains(ErrorSeverity.MustFix)) { phoneSets[i] = null; } else { ltsPaths[i] = phonesetLtsList[(i * 2) + 1].Trim(); if (!Path.IsPathRooted(ltsPaths[i])) { ltsPaths[i] = Path.Combine(_dataHandlerList.DataRoot, ltsPaths[i]); } } } if (!errorSet.Contains(ErrorSeverity.MustFix)) { BinaryWriter bw = new BinaryWriter(outputStream); { bw.Write((ushort)count); for (ushort i = 0; i < count; i++) { bw.Write((ushort)languages[i]); bw.Write((ushort)phoneSets[i].Language); } // Write phone set offset long phoneSetOffset = bw.BaseStream.Position; for (byte i = 0; i < count; i++) { bw.Write((uint)0); } // Write LTS offset long ltsOffset = bw.BaseStream.Position; for (byte i = 0; i < count; i++) { bw.Write((uint)0); } // Write phone set for (byte i = 0; i < count; i++) { long offset = bw.BaseStream.Position; bw.BaseStream.Seek(phoneSetOffset, SeekOrigin.Begin); if (offset > uint.MaxValue) { throw new InvalidDataException(Helper.NeutralFormat( "Foreign LTS collection size exceeds the maximal size {0}", uint.MaxValue)); } bw.Write((uint)offset); phoneSetOffset += sizeof(uint); bw.BaseStream.Seek(offset, SeekOrigin.Begin); errorSet.Merge(PhoneSetCompiler.Compile(phoneSets[i], bw.BaseStream)); } // Write LTS for (byte i = 0; i < count; i++) { long offset = bw.BaseStream.Position; bw.BaseStream.Seek(ltsOffset, SeekOrigin.Begin); if (offset > uint.MaxValue) { throw new InvalidDataException(Helper.NeutralFormat( "Foreign LTS collection size exceeds the maximal size {0}", uint.MaxValue)); } bw.Write((uint)offset); ltsOffset += sizeof(uint); bw.BaseStream.Seek(offset, SeekOrigin.Begin); LoadStream(ltsPaths[i], bw.BaseStream); } } } return errorSet; }
/// <summary> /// Get the Phones of this item. /// </summary> /// <param name="phoneSet">Phone set.</param> /// <param name="errors">Errors if having invalid phone.</param> /// <returns>The phones.</returns> public Collection<Phone> GetPhones(TtsPhoneSet phoneSet, ErrorSet errors) { if (phoneSet == null) { throw new ArgumentNullException("phoneSet"); } if (errors == null) { throw new ArgumentNullException("errors"); } errors.Clear(); Collection<Phone> phones = new Collection<Phone>(); foreach (ScriptSentence sentence in Sentences) { ErrorSet sentenceErrors = new ErrorSet(); foreach (Phone phone in sentence.GetPhones(phoneSet, sentenceErrors)) { phones.Add(phone); } errors.Merge(sentenceErrors); } return phones; }
public static ErrorSet BuildMonoMlf(string scriptFilePath, string outFilePath, bool writeToFile, Phoneme phoneme, XmlScriptValidateSetting validateSetting, SliceData sliceData) { if (phoneme == null) { throw new ArgumentNullException("phoneme"); } if (sliceData == null) { throw new ArgumentNullException("phoneme"); } if (validateSetting == null) { throw new ArgumentNullException("validateSetting"); } validateSetting.VerifySetting(); ErrorSet errors = new ErrorSet(); StreamWriter sw = null; if (writeToFile) { sw = new StreamWriter(outFilePath, false, Encoding.ASCII); sw.WriteLine("#!MLF!#"); } try { XmlScriptFile script = XmlScriptFile.LoadWithValidation(scriptFilePath, validateSetting); script.Remove(GetNeedDeleteItemIds(script.ErrorSet)); if (script.Items.Count == 0) { throw new InvalidDataException( Helper.NeutralFormat("No valid items in {0}.", scriptFilePath)); } errors.Merge(script.ErrorSet); foreach (ScriptItem item in script.Items) { errors.Merge(BuildMonoMlf(item, sw, writeToFile, phoneme, sliceData)); } } finally { if (sw != null) { sw.Close(); } } if (writeToFile) { Debug.Assert(HtkTool.VerifyMlfFormat(outFilePath)); } return errors; }
/// <summary> /// Load all the script items from a folder /// Note: Here don't validate the content, But duplicate item ID is not allowed. /// </summary> /// <param name="sourceDir">Script dir.</param> /// <param name="errors">Errors happened.</param> /// <returns>Loaded items collection.</returns> public static Collection<ScriptItem> LoadScriptsWithoutValidation(string sourceDir, ErrorSet errors) { if (string.IsNullOrEmpty(sourceDir)) { throw new ArgumentNullException("sourceDir"); } if (errors == null) { throw new ArgumentNullException("errors"); } Collection<ScriptItem> items = new Collection<ScriptItem>(); Dictionary<string, string> ids = new Dictionary<string, string>(); string pattern = @"*" + XmlScriptFile.Extension; Language language = Language.Neutral; foreach (string file in Directory.GetFiles(sourceDir, pattern, SearchOption.AllDirectories)) { XmlScriptFile script = new XmlScriptFile(); XmlScriptFile.ContentControler controler = new XmlScriptFile.ContentControler(); controler.LoadComments = true; script.Load(file, controler); if (language == Language.Neutral) { language = script.Language; } else if (language != script.Language) { throw new InvalidDataException(Helper.NeutralFormat( "The language name in File [{0}] is different from other files.", file)); } errors.Merge(script.ErrorSet); foreach (ScriptItem item in script.Items) { if (ids.ContainsKey(item.Id)) { errors.Add(ScriptError.DuplicateItemId, item.Id); } else { item.ScriptFile = null; items.Add(item); } } } return items; }
/// <summary> /// Convert XML script to two-line script. /// </summary> /// <param name="xmlScript">Input XML script.</param> /// <param name="targetFile">Output script.</param> /// <param name="phoneSet"> /// Phone set used to convert pronunciation /// It can be null when you can directly get the word's pronunciation in the word's attribute. /// </param> /// <returns>Errors happened.</returns> public static ErrorSet ConvertXmlScriptToTwoLineScript(string xmlScript, string targetFile, TtsPhoneSet phoneSet) { if (string.IsNullOrEmpty(xmlScript)) { throw new ArgumentNullException("xmlScript"); } if (string.IsNullOrEmpty(targetFile)) { throw new ArgumentNullException("targetFile"); } if (!Directory.Exists(Path.GetDirectoryName(targetFile))) { throw new DirectoryNotFoundException(targetFile); } ErrorSet errorSet = new ErrorSet(); XmlScriptFile script = new XmlScriptFile(); script.Load(xmlScript); ScriptFile oldScript = new ScriptFile(script.Language); foreach (ScriptItem item in script.Items) { ErrorSet itemErrors = new ErrorSet(); ScriptItem oldItem = ConvertScriptItemToTwoLineFormat(item, phoneSet, itemErrors); if (itemErrors.Count != 0) { errorSet.Merge(itemErrors); } else { oldScript.Items.Add(oldItem.Id, oldItem); } } oldScript.Save(targetFile, true, true); return errorSet; }
/// <summary> /// Convert two-line script to XML script. /// </summary> /// <param name="twoLineScript">Input two-line script.</param> /// <param name="targetFile">Output script.</param> /// <param name="language">Language.</param> /// <param name="inScriptWithoutPron">Whether input script without pronunciation.</param> /// <returns>Errors.</returns> public static ErrorSet ConvertTwoLineScriptToXmlScript(string twoLineScript, string targetFile, Language language, bool inScriptWithoutPron) { if (string.IsNullOrEmpty(twoLineScript)) { throw new ArgumentNullException("twoLineScript"); } if (string.IsNullOrEmpty(targetFile)) { throw new ArgumentNullException("targetFile"); } if (!Directory.Exists(Path.GetDirectoryName(targetFile))) { throw new DirectoryNotFoundException(targetFile); } ErrorSet errorSet = new ErrorSet(); Collection<ScriptItem> items = new Collection<ScriptItem>(); DataErrorSet errors = ScriptFile.ReadAllData(twoLineScript, items, !inScriptWithoutPron, true); if (errors.Errors.Count > 0) { foreach (DataError error in errors.Errors) { if (!string.IsNullOrEmpty(error.SentenceId)) { errorSet.Add(ScriptError.OtherErrors, error.SentenceId, error.ToString()); } } } XmlScriptFile script = new XmlScriptFile(language); foreach (ScriptItem item in items) { ErrorSet itemErrors = new ErrorSet(); ScriptItem newItem = ConvertScriptItemToXmlFormat(item, inScriptWithoutPron, itemErrors); if (itemErrors.Count != 0) { errorSet.Merge(itemErrors); } else { script.Items.Add(newItem); } } script.Save(targetFile, Encoding.Unicode); return errorSet; }
/// <summary> /// Merge scripts in a folder into a script file. /// Error items are removed from the output file. /// </summary> /// <param name="scriptDir">Dir conataining script file.</param> /// <param name="errorSet">Error set.</param> /// <param name="resetId">True means resetting id.</param> /// <param name="validateSetting">Validation setting.</param> /// <param name="contentController">Contenct controller.</param> /// <returns>Xml script file.</returns> public static XmlScriptFile MergeScripts(string scriptDir, ErrorSet errorSet, bool resetId, XmlScriptValidateSetting validateSetting, object contentController) { if (string.IsNullOrEmpty(scriptDir)) { throw new ArgumentNullException("scriptDir"); } if (errorSet == null) { throw new ArgumentNullException("errorSet"); } if (validateSetting == null) { throw new ArgumentNullException("validateSetting"); } if (!Directory.Exists(scriptDir)) { throw new DirectoryNotFoundException(scriptDir); } validateSetting.VerifySetting(); XmlScriptValidationScope scope = validateSetting.ValidationScope; string[] subFiles = Directory.GetFiles( scriptDir, "*" + XmlScriptFile.Extension, SearchOption.AllDirectories); XmlScriptFile mergedScript = new XmlScriptFile(); long id = 0; foreach (string file in subFiles) { XmlScriptFile script = new XmlScriptFile(); script.Load(file, contentController); if (mergedScript.Language == Language.Neutral) { mergedScript.Language = script.Language; } else if (mergedScript.Language != script.Language) { throw new InvalidDataException(Helper.NeutralFormat("Inconsistent langage in {0}", file)); } if (scope != XmlScriptValidationScope.None) { script.PosSet = validateSetting.PosSet; script.PhoneSet = validateSetting.PhoneSet; script.Validate(validateSetting); script.Remove(GetNeedDeleteItemIds(script.ErrorSet)); } errorSet.Merge(script.ErrorSet); foreach (ScriptItem item in script.Items) { item.Id = resetId ? Helper.NeutralFormat("{0:D10}", ++id) : item.Id; ErrorSet addErrors = new ErrorSet(); if (!mergedScript.Add(item, addErrors, false)) { // Added failed errorSet.Merge(addErrors); } } } return mergedScript; }
public static ArrayList ConvertIntoArray(string grammaticalCase, ErrorSet errorSet) { if (errorSet == null) { throw new ArgumentNullException("errorSet"); } ArrayList arrayList = new ArrayList(); int id = 0; errorSet.Merge(StringToId(grammaticalCase, out id)); foreach (int i in Enum.GetValues(typeof(GrammaticalCase))) { if ((id & i) != 0) { arrayList.Add(Enum.GetName(typeof(GrammaticalCase), i).ToLower(CultureInfo.InvariantCulture)); } } return arrayList; }
private ErrorSet CompileLexicon(Stream outputStream) { if (outputStream == null) { throw new ArgumentNullException("outputStream"); } ErrorSet errorSet = new ErrorSet(); ErrorSet subErrorSet = new ErrorSet(); LexicalAttributeSchema schema = (LexicalAttributeSchema)GetObject( RawDataName.LexicalAttributeSchema, subErrorSet); MergeDependencyError(errorSet, subErrorSet, _schemaFullName); subErrorSet.Clear(); TtsPhoneSet phoneSet = (TtsPhoneSet)GetObject(RawDataName.PhoneSet, subErrorSet); MergeDependencyError(errorSet, subErrorSet, RawDataName.PhoneSet); if (!errorSet.Contains(ErrorSeverity.MustFix)) { Microsoft.Tts.Offline.Core.Lexicon lexicon = (Microsoft.Tts.Offline.Core.Lexicon)GetObject(RawDataName.Lexicon, errorSet); errorSet.Merge(lexicon.ErrorSet); // Change to case insensitive lexicon MemoryStream lexiconStream = new MemoryStream(); using (XmlWriter xmlWriter = XmlWriter.Create(lexiconStream)) { Microsoft.Tts.Offline.Core.Lexicon.ContentControler lexiconControler = new Microsoft.Tts.Offline.Core.Lexicon.ContentControler(); lexiconControler.IsCaseSensitive = true; lexicon.Save(xmlWriter, lexiconControler); } lexiconStream.Seek(0, SeekOrigin.Begin); Microsoft.Tts.Offline.Core.Lexicon caseInsensitiveLexicon = new Microsoft.Tts.Offline.Core.Lexicon(); using (StreamReader sr = new StreamReader(lexiconStream)) { caseInsensitiveLexicon.Load(sr); } if (caseInsensitiveLexicon != null && !errorSet.Contains(ErrorSeverity.MustFix)) { caseInsensitiveLexicon.LexicalAttributeSchema = schema; caseInsensitiveLexicon.PhoneSet = phoneSet; caseInsensitiveLexicon.Validate(); // Set severity of errors only in case-insensitive lexicon to NoError for they're not treated as real error caseInsensitiveLexicon.ErrorSet.SetSeverity(ErrorSeverity.NoError); string vendorLexiconPath = Helper.GetTempFileName(); caseInsensitiveLexicon.SaveToVendorLexicon(vendorLexiconPath); string toolFileName = ToolName.BldVendor2; string binaryLexiconPath = Helper.GetTempFileName(); string compilingArguments = Helper.NeutralFormat("-v {0} V2 \"{1}\" \"{2}\" \"{3}\" TTS", (int)_language, _dataHandlerList.Datas[RawDataName.LexicalAttributeSchema].Path, vendorLexiconPath, binaryLexiconPath); string toolPath = Path.Combine(ToolDir, toolFileName); CheckToolExists(toolPath, errorSet); if (!errorSet.Contains(ErrorSeverity.MustFix)) { HandleCommandLine(ModuleDataName.Lexicon, toolPath, compilingArguments, binaryLexiconPath, outputStream, errorSet); } File.Delete(vendorLexiconPath); errorSet.Merge(caseInsensitiveLexicon.ErrorSet); } else if (lexicon == null) { errorSet.Add(DataCompilerError.RawDataError, "Lexicon"); } else { errorSet.Merge(caseInsensitiveLexicon.ErrorSet); } } return errorSet; }
/// <summary> /// Get the Phones of this sentence. /// </summary> /// <param name="phoneSet">Phone set.</param> /// <param name="errors">Errors if having invalid phone.</param> /// <returns>The phones.</returns> public Collection<Phone> GetPhones(TtsPhoneSet phoneSet, ErrorSet errors) { if (phoneSet == null) { throw new ArgumentNullException("phoneSet"); } if (errors == null) { throw new ArgumentNullException("errors"); } errors.Clear(); Collection<Phone> phones = new Collection<Phone>(); foreach (ScriptWord word in Words) { ErrorSet wordErrors = new ErrorSet(); foreach (Phone phone in word.GetPhones(phoneSet, wordErrors)) { phones.Add(phone); } errors.Merge(wordErrors); } return phones; }
/// <summary> /// Char table compiler. /// </summary> /// <param name="outputStream">Output Stream.</param> /// <returns>ErrorSet.</returns> private ErrorSet CompileCharTable(Stream outputStream) { ErrorSet errorSet = new ErrorSet(); try { CharTable charTable = (CharTable)GetObject(RawDataName.CharTable, errorSet); ChartableValidator charTableValidator = new ChartableValidator(); Microsoft.Tts.Offline.Core.Lexicon lexicon = (Microsoft.Tts.Offline.Core.Lexicon)GetObject(RawDataName.Lexicon, errorSet); TtsPhoneSet phoneSet = (TtsPhoneSet)GetObject(RawDataName.PhoneSet, errorSet); if (!errorSet.Contains(ErrorSeverity.MustFix)) { charTableValidator.Lexicon = lexicon; charTableValidator.PhoneSet = phoneSet; charTableValidator.EnsureInitialized(); if (charTable.Language != charTableValidator.Language) { throw new InvalidDataException("chartable language should match with lexicon or phoneset"); } ErrorSet charTableErrors = charTableValidator.Validate(charTable, false, null); foreach (Error error in charTableErrors.Errors) { if (error.Severity == ErrorSeverity.MustFix) { errorSet.Add(DataCompilerError.CompilingLogWithError, RawDataName.CharTable, error.ToString()); } else { errorSet.Add(DataCompilerError.CompilingLogWithWarning, RawDataName.CharTable, error.ToString()); } } errorSet.Merge(CharTableCompiler.Compile(charTable, phoneSet, outputStream)); } } catch (XmlException e) { errorSet.Add(DataCompilerError.RawDataError, e.Message); } return errorSet; }
/// <summary> /// Get the normal phones' names. /// </summary> /// <param name="phoneSet">Phone set.</param> /// <param name="errors">Errors is having.</param> /// <returns>The pohne names.</returns> public Collection<string> GetNormalPhoneNames(TtsPhoneSet phoneSet, ErrorSet errors) { if (phoneSet == null) { throw new ArgumentNullException("phoneSet"); } if (errors == null) { throw new ArgumentNullException("errors"); } errors.Clear(); Collection<string> names = new Collection<string>(); foreach (ScriptSentence sentence in Sentences) { ErrorSet sentenceErrors = new ErrorSet(); foreach (string name in sentence.GetNormalPhoneNames(phoneSet, sentenceErrors)) { names.Add(name); } errors.Merge(sentenceErrors); } return names; }
public ErrorSet Build(string moduleDataName, Stream outputStream, bool isEnableValidate, string formatGuid) { ////#region Check arguments if (string.IsNullOrEmpty(moduleDataName)) { throw new ArgumentNullException("dataName"); } if (outputStream == null) { throw new ArgumentNullException("outputStream"); } ////#endregion ErrorSet errorSet = new ErrorSet(); ErrorSet subErrorSet = new ErrorSet(); try { switch (moduleDataName) { case ModuleDataName.PhoneSet: TtsPhoneSet phoneSet = (TtsPhoneSet)GetObject(RawDataName.PhoneSet, errorSet); if (!errorSet.Contains(ErrorSeverity.MustFix)) { errorSet.Merge(PhoneSetCompiler.Compile(phoneSet, outputStream)); } break; case ModuleDataName.BackendPhoneSet: phoneSet = (TtsPhoneSet)GetObject(RawDataName.BackendPhoneSet, errorSet); if (!errorSet.Contains(ErrorSeverity.MustFix)) { errorSet.Merge(PhoneSetCompiler.Compile(phoneSet, outputStream)); } break; case ModuleDataName.PosSet: TtsPosSet posSet = (TtsPosSet)GetObject(RawDataName.PosSet, errorSet); if (!errorSet.Contains(ErrorSeverity.MustFix)) { errorSet.Merge(PosSetCompiler.Compile(posSet, outputStream)); } break; case ModuleDataName.PosTaggerPos: LexicalAttributeSchema schema = (LexicalAttributeSchema)GetObject( RawDataName.LexicalAttributeSchema, subErrorSet); MergeDependencyError(errorSet, subErrorSet, _schemaFullName); if (!subErrorSet.Contains(ErrorSeverity.MustFix)) { TtsPosSet postaggingPosSet = TtsPosSet.LoadPosTaggingPosFromSchema(schema); errorSet.Merge(PosSetCompiler.CompilePosTaggerPos(postaggingPosSet, outputStream)); } break; case ModuleDataName.Lexicon: errorSet = CompileLexicon(outputStream); break; case ModuleDataName.CharTable: ErrorSet charTableErrorSet = CompileCharTable(outputStream); if (!isEnableValidate) { foreach (Error error in charTableErrorSet.Errors) { error.Severity = ErrorSeverity.Warning; } } errorSet.Merge(charTableErrorSet); break; case ModuleDataName.SentenceSeparator: string sentSepDataDir = _dataHandlerList.Datas[RawDataName.SentenceSeparatorDataPath].Path; Collection<string> compiledSentenceSeparatorFiles = new Collection<string>(); errorSet = SentenceSeparatorCompiler.Compile(sentSepDataDir, outputStream, compiledSentenceSeparatorFiles); if (errorSet.GetSeverityCount(ErrorSeverity.MustFix) == 0 && compiledSentenceSeparatorFiles.Count > 0) { errorSet.Add(ReportCompiledFiles("sentence separator", compiledSentenceSeparatorFiles)); } break; case ModuleDataName.WordBreaker: { System.IO.MemoryStream memStream = new MemoryStream(); string wordBreakerDataDir = _dataHandlerList.Datas[RawDataName.WordBreakerDataPath].Path; Collection<string> compiledWordBreakerFiles = new Collection<string>(); errorSet = WordBreakerCompiler.Compile(wordBreakerDataDir, outputStream, compiledWordBreakerFiles, formatGuid); if (errorSet.GetSeverityCount(ErrorSeverity.MustFix) == 0 && compiledWordBreakerFiles.Count > 0) { errorSet.Add(ReportCompiledFiles("word breaker", compiledWordBreakerFiles)); } } break; case ModuleDataName.PostWordBreaker: string postWordBreakerFilePath = _dataHandlerList.Datas[RawDataName.PostWordBreaker].Path; errorSet = PostWordBreakerCompiler.Compile(postWordBreakerFilePath, outputStream); break; case ModuleDataName.ChineseTone: string chineseToneFilePath = _dataHandlerList.Datas[RawDataName.ChineseTone].Path; errorSet = ChineseToneCompiler.Compile(chineseToneFilePath, outputStream); break; case ModuleDataName.AcronymDisambiguation: { string acronymDisambiguationDataDir = _dataHandlerList.Datas[RawDataName.AcronymDisambiguation].Path; Collection<string> compiledFiles = new Collection<string>(); errorSet = CrfModelCompiler.Compile(acronymDisambiguationDataDir, outputStream, compiledFiles, _language); if (errorSet.GetSeverityCount(ErrorSeverity.MustFix) == 0 && compiledFiles.Count > 0) { errorSet.Add(ReportCompiledFiles("AcronymDisambiguation", compiledFiles)); } } break; case ModuleDataName.NEDisambiguation: { string strNeDisambiguationDataDir = _dataHandlerList.Datas[RawDataName.NEDisambiguation].Path; Collection<string> compiledFiles = new Collection<string>(); errorSet = CrfModelCompiler.Compile(strNeDisambiguationDataDir, outputStream, compiledFiles, _language); if (errorSet.GetSeverityCount(ErrorSeverity.MustFix) == 0 && compiledFiles.Count > 0) { errorSet.Add(ReportCompiledFiles("NEDisambiguation", compiledFiles)); } } break; case ModuleDataName.SyllabifyRule: string syllabifyRuleFilePath = _dataHandlerList.Datas[RawDataName.SyllabifyRule].Path; phoneSet = (TtsPhoneSet)GetObject(RawDataName.PhoneSet, subErrorSet); MergeDependencyError(errorSet, subErrorSet, RawDataName.PhoneSet); if (!subErrorSet.Contains(ErrorSeverity.MustFix)) { errorSet.Merge(SyllabifyRuleCompiler.Compile(syllabifyRuleFilePath, phoneSet, outputStream)); } break; case ModuleDataName.UnitGenerator: string truncRuleFilePath = _dataHandlerList.Datas[RawDataName.TruncateRule].Path; phoneSet = (TtsPhoneSet)GetObject(RawDataName.PhoneSet, subErrorSet); MergeDependencyError(errorSet, subErrorSet, RawDataName.PhoneSet); if (!subErrorSet.Contains(ErrorSeverity.MustFix)) { errorSet.Merge(UnitGeneratorDataCompiler.Compile(truncRuleFilePath, phoneSet, outputStream)); } break; case ModuleDataName.PolyphoneRule: string generalRuleFilePath = _dataHandlerList.Datas[RawDataName.PolyphoneRule].Path; phoneSet = (TtsPhoneSet)GetObject(RawDataName.PhoneSet, errorSet); PolyphonyRuleFile polyRuleFile = new PolyphonyRuleFile(); ErrorSet polyErrorSet = polyRuleFile.Load(generalRuleFilePath, phoneSet); if (!isEnableValidate) { foreach (Error error in polyErrorSet.Errors) { error.Severity = ErrorSeverity.Warning; } } errorSet.Merge(polyErrorSet); errorSet.Merge(CompileGeneralRule(generalRuleFilePath, outputStream)); break; case ModuleDataName.BoundaryPronChangeRule: generalRuleFilePath = _dataHandlerList.Datas[RawDataName.BoundaryPronChangeRule].Path; errorSet = CompileGeneralRule(generalRuleFilePath, outputStream); break; case ModuleDataName.SentenceDetector: generalRuleFilePath = _dataHandlerList.Datas[RawDataName.SentenceDetectRule].Path; RuleFile ruleFile = new RuleFile(); List<string> dupKeys = ruleFile.GetDupKeys(generalRuleFilePath); if (dupKeys.Count > 0) { foreach (string key in dupKeys) { errorSet.Add(new Error(DataCompilerError.DuplicateItemKey, key)); } } else { errorSet = CompileGeneralRule(generalRuleFilePath, outputStream); } break; case ModuleDataName.QuotationMarkTable: QuotationMarkTable quoteTable = QuotationMarkTable.Read(_dataHandlerList.Datas[RawDataName.QuotationMarkTable].Path); errorSet = QuotationMarkCompiler.Compile(quoteTable, outputStream); break; case ModuleDataName.ParallelStructTable: schema = (LexicalAttributeSchema)GetObject( RawDataName.LexicalAttributeSchema, subErrorSet); MergeDependencyError(errorSet, subErrorSet, _schemaFullName); if (!subErrorSet.Contains(ErrorSeverity.MustFix)) { TtsPosSet postaggingPosSet = TtsPosSet.LoadPosTaggingPosFromSchema(schema); ParallelStructTable parallelStructTable = ParallelStructTable.Read(_dataHandlerList.Datas[RawDataName.ParallelStructTable].Path); if (postaggingPosSet != null) { errorSet = ParallelStructCompiler.Compile(parallelStructTable, postaggingPosSet, outputStream); } } break; case ModuleDataName.WordFeatureSuffixTable: schema = (LexicalAttributeSchema)GetObject( RawDataName.LexicalAttributeSchema, subErrorSet); MergeDependencyError(errorSet, subErrorSet, _schemaFullName); if (!subErrorSet.Contains(ErrorSeverity.MustFix)) { WordFeatureSuffixTable suffixTable = WordFeatureSuffixTable.Read(_dataHandlerList.Datas[RawDataName.WordFeatureSuffixTable].Path); errorSet = WordFeatureSuffixCompiler.Compile(suffixTable, outputStream); } break; case ModuleDataName.LtsRule: string ltsRuleDataPath = _dataHandlerList.Datas[RawDataName.LtsRuleDataPath].Path; errorSet = CompileLtsRule(ltsRuleDataPath, outputStream); break; case ModuleDataName.PhoneEventData: PhoneConverterWrapper pcw = null; // Check if the language has phone mapping data. if (_moduleDataSet.ContainsKey(ModuleDataName.PhoneMappingRule)) { // Check phone mapping binary data dependency. if (_moduleDataSet[ModuleDataName.PhoneMappingRule].Data != null) { pcw = new PhoneConverterWrapper(_language, _moduleDataSet[ModuleDataName.PhoneMappingRule].Data); } else { errorSet.Add(DataCompilerError.DependenciesNotValid, "Please make sure that PhoneMappingRule has been compiled before PhoneEvent"); } } if (errorSet.GetSeverityCount(ErrorSeverity.MustFix) == 0) { phoneSet = (TtsPhoneSet)GetObject(RawDataName.PhoneSet, errorSet); errorSet = PhoneEventCompiler.Compile(phoneSet, pcw, outputStream); } break; case ModuleDataName.PosRule: string lexicalRuleFilePath = _dataHandlerList.Datas[RawDataName.PosLexicalRule].Path; string contextualRuleFilePath = _dataHandlerList.Datas[RawDataName.PosContextualRule].Path; schema = (LexicalAttributeSchema)GetObject( RawDataName.LexicalAttributeSchema, subErrorSet); MergeDependencyError(errorSet, subErrorSet, _schemaFullName); if (!subErrorSet.Contains(ErrorSeverity.MustFix)) { posSet = TtsPosSet.LoadPosTaggingPosFromSchema(schema); string posSetFilePath = Helper.GetTempFileName(); posSet.Save(posSetFilePath, Encoding.Unicode); errorSet.Merge(CompilePosRule(lexicalRuleFilePath, contextualRuleFilePath, posSetFilePath, outputStream)); File.Delete(posSetFilePath); } break; case ModuleDataName.TnRule: { string tnmlRuleFilePath = _dataHandlerList.Datas[moduleDataName].Path; string schemaFilePath = _dataHandlerList.Datas[RawDataName.LexicalAttributeSchema].Path; errorSet = CompileTnml(tnmlRuleFilePath, schemaFilePath, outputStream, true); } break; case ModuleDataName.FstNERule: { string fstNERuleFilePath = _dataHandlerList.Datas[moduleDataName].Path; errorSet = CompileFstNE(fstNERuleFilePath, outputStream); } break; case ModuleDataName.CompoundRule: { string tnmlRuleFilePath = _dataHandlerList.Datas[moduleDataName].Path; string schemaFilePath = _dataHandlerList.Datas[RawDataName.LexicalAttributeSchema].Path; phoneSet = (TtsPhoneSet)GetObject(RawDataName.PhoneSet, errorSet); ErrorSet compundRuleError = DataFileValidator.ValidateCompoundRule( _dataHandlerList.Datas[moduleDataName].Path, phoneSet); if (!isEnableValidate) { foreach (Error error in compundRuleError.Errors) { error.Severity = ErrorSeverity.Warning; } } errorSet.Merge(compundRuleError); errorSet.Merge(CompileTnml(tnmlRuleFilePath, schemaFilePath, outputStream, false)); } break; case ModuleDataName.PhoneMappingRule: case ModuleDataName.BackendPhoneMappingRule: case ModuleDataName.FrontendBackendPhoneMappingRule: case ModuleDataName.MixLingualPOSConverterData: { string tnmlRuleFilePath = _dataHandlerList.Datas[moduleDataName].Path; string schemaFilePath = _dataHandlerList.Datas[RawDataName.LexicalAttributeSchema].Path; errorSet = CompileTnml(tnmlRuleFilePath, schemaFilePath, outputStream, false); } break; case ModuleDataName.ForeignLtsCollection: errorSet = CompileForeignLtsCollection(_dataHandlerList.Datas[moduleDataName].Path, outputStream); break; case ModuleDataName.PolyphonyModel: { string polyphonyModelDataDir = _dataHandlerList.Datas[RawDataName.PolyphonyModel].Path; Collection<string> compiledFiles = new Collection<string>(); errorSet = CrfModelCompiler.Compile(polyphonyModelDataDir, outputStream, compiledFiles, _language); if (errorSet.GetSeverityCount(ErrorSeverity.MustFix) == 0 && compiledFiles.Count > 0) { errorSet.Add(ReportCompiledFiles("PolyphonyModel", compiledFiles)); } } break; case ModuleDataName.RNNPolyphonyModel: { string polyphonyModelDataPath = _dataHandlerList.Datas[RawDataName.RNNPolyphonyModel].Path; Collection<string> compiledFiles = new Collection<string>(); errorSet = RNNModelCompiler.Compile(polyphonyModelDataPath, outputStream, compiledFiles); if (errorSet.GetSeverityCount(ErrorSeverity.MustFix) == 0 && compiledFiles.Count > 0) { errorSet.Add(ReportCompiledFiles("PolyphonyModel", compiledFiles)); } } break; default: errorSet.Add(DataCompilerError.InvalidModuleData, moduleDataName); break; } } catch (Exception ex) { Type exceptionType = ex.GetType(); if (exceptionType.Equals(typeof(FileNotFoundException)) || exceptionType.Equals(typeof(ArgumentNullException)) || exceptionType.Equals(typeof(XmlException)) || exceptionType.Equals(typeof(InvalidDataException))) { errorSet.Add(DataCompilerError.RawDataNotFound, moduleDataName, Helper.BuildExceptionMessage(ex)); } else { throw; } } return errorSet; }
/// <summary> /// Load Phone set Data object. /// </summary> /// <param name="errorSet">ErrorSet.</param> /// <returns>Phone set Data object.</returns> internal override object LoadDataObject(ErrorSet errorSet) { if (errorSet == null) { throw new ArgumentNullException("errorSet"); } TtsPhoneSet phoneSet = new TtsPhoneSet(); phoneSet.Load(this.Path); phoneSet.Validate(); errorSet.Merge(phoneSet.ErrorSet); if (phoneSet.ErrorSet.Contains(ErrorSeverity.MustFix)) { phoneSet = null; } return phoneSet; }
/// <summary> /// Ensure the necessary data is in the data objects. /// </summary> /// <param name="dataObjects">DataObjects.</param> /// <returns>Error Set.</returns> private ErrorSet EnsureNecessaryData(Dictionary<string, LangDataObject> dataObjects) { ErrorSet errorSet = new ErrorSet(); string[] necessaryModuleDataNames = new string[] { ModuleDataName.PhoneSet, ModuleDataName.PosTaggerPos, ModuleDataName.Lexicon, ModuleDataName.CharTable }; foreach (string moduleDataName in necessaryModuleDataNames) { if (!_moduleDataSet.ContainsKey(moduleDataName)) { Error error = new Error(DataCompilerError.NecessaryDataMissing, moduleDataName); errorSet.Add(error); errorSet.Merge(Compile(moduleDataName)); } } return errorSet; }
/// <summary> /// Compiler. /// </summary> /// <param name="truncRuleFileName">File path of trunc rule.</param> /// <param name="phoneSet">Phone set.</param> /// <param name="outputStream">Output Stream.</param> /// <returns>ErrorSet.</returns> public static ErrorSet Compile(string truncRuleFileName, TtsPhoneSet phoneSet, Stream outputStream) { if (string.IsNullOrEmpty(truncRuleFileName)) { throw new ArgumentNullException("truncRuleFileName"); } // pauseLengthFileName could be null if (phoneSet == null) { throw new ArgumentNullException("phoneSet"); } if (outputStream == null) { throw new ArgumentNullException("outputStream"); } ErrorSet errorSet = new ErrorSet(); phoneSet.Validate(); if (phoneSet.ErrorSet.Contains(ErrorSeverity.MustFix)) { errorSet.Add(UnitGeneratorDataCompilerError.InvalidPhoneSet); } else { BinaryWriter bw = new BinaryWriter(outputStream); { errorSet.Merge(CompTruncRuleData(truncRuleFileName, phoneSet, bw)); } } return errorSet; }
/// <summary> /// Check phone based data consistence between script item and segmentation file. /// </summary> /// <param name="script">Script file instance.</param> /// <param name="item">Script item.</param> /// <param name="fileMap">File list map.</param> /// <param name="segmentDir">Segment file directory.</param> /// <param name="errorSet">Data error set found.</param> public static void ValidateDataAlignment(XmlScriptFile script, ScriptItem item, FileListMap fileMap, string segmentDir, ErrorSet errorSet) { string segmentFilePath = Path.Combine(segmentDir, fileMap.Map[item.Id] + ".txt"); StringBuilder errorMessage = new StringBuilder(); SegmentFile segmentFile = ValidateAlignmentFile(segmentFilePath, errorMessage); if (errorMessage.Length != 0) { errorSet.Add(ScriptError.OtherErrors, item.Id, errorMessage.ToString()); } else { ErrorSet errors = new ErrorSet(); Collection<string> phones = item.GetNormalPhoneNames(script.PhoneSet, errors); errorSet.Merge(errors); if (segmentFile.WaveSegments.Count == 0) { string message = Helper.NeutralFormat( "There is no valid alignment data in alignment file {0}.", segmentFilePath); errorSet.Add(ScriptError.OtherErrors, item.Id, message); } else if (!segmentFile.WaveSegments[segmentFile.WaveSegments.Count - 1].IsSilencePhone) { string message = Helper.NeutralFormat( "Alignment file {0} is invalid, for without silence segment at the end.", segmentFilePath); errorSet.Add(ScriptError.OtherErrors, item.Id, message); } else if (phones.Count != segmentFile.NonSilenceWaveSegments.Count) { string message = Helper.NeutralFormat( "units number {0} in script file does not equal to non-silence " + "segments number {1} in segmentation file.", phones.Count, segmentFile.NonSilenceWaveSegments.Count); errorSet.Add(ScriptError.OtherErrors, item.Id, message); } else { // go through each segments for (int i = 0; i < segmentFile.NonSilenceWaveSegments.Count; i++) { WaveSegment segment = segmentFile.NonSilenceWaveSegments[i]; if (segment.Label != phones[i]) { string message = string.Format(CultureInfo.InvariantCulture, "phone [{0}/{1}] at {2} does not match between script and segment.", WaveSegment.FormatLabel(phones[i]), segment.Label, i); errorSet.Add(ScriptError.OtherErrors, item.Id, message); } } } } }
/// <summary> /// Data validation. /// </summary> /// <param name="language">Language.</param> /// <returns>Data error set found.</returns> public ErrorSet Validate(Language language) { // Files existance validation if (!Directory.Exists(Dir)) { throw Helper.CreateException(typeof(DirectoryNotFoundException), Dir); } if (!File.Exists(ScriptFilePath)) { throw Helper.CreateException(typeof(FileNotFoundException), ScriptFilePath); } if (!File.Exists(FileMapFilePath)) { throw Helper.CreateException(typeof(FileNotFoundException), FileMapFilePath); } if (!File.Exists(UnitFeatureFilePath)) { throw Helper.CreateException(typeof(FileNotFoundException), UnitFeatureFilePath); } ErrorSet errorSet = new ErrorSet(); ErrorSet subErrorSet = FindUnmatchedSentences(ScriptFilePath, language, FileMapFilePath); errorSet.Merge(subErrorSet); subErrorSet = ValidateFeatureData(UnitFeatureFilePath, ScriptFilePath, language); errorSet.Merge(subErrorSet); return errorSet; }
/// <summary> /// Collect the words in the file and save them into string pool together with /// The offset list. /// </summary> /// <param name="filePath">File path containing words.</param> /// <param name="stringPool">String pool.</param> /// <param name="offsets">Offset list.</param> /// <param name="sort">Whether the word in the string pool are sorted.</param> /// <param name="errorSet">Error set.</param> /// <returns>Number of words.</returns> public static int LoadWordsIntoStringPool(string filePath, StringPool stringPool, ICollection<int> offsets, bool sort, ErrorSet errorSet) { if (string.IsNullOrEmpty(filePath)) { throw new ArgumentNullException("filePath"); } if (stringPool == null) { throw new ArgumentNullException("stringPool"); } if (offsets == null) { throw new ArgumentNullException("offsets"); } if (errorSet == null) { throw new ArgumentNullException("errorSet"); } List<string> words = new List<string>(); errorSet.Merge(LoadWordsIntoWordList(filePath, words, sort)); StringPool.WordsToStringPool(words, stringPool, offsets); return words.Count; }