/// <summary> /// Create domain index file. /// </summary> /// <param name="scriptFile">Script file.</param> /// <param name="domainList">Domain list.</param> /// <param name="uif">Name indexed unit features.</param> public void Create(XmlScriptFile scriptFile, DomainConfigList domainList, UnitIndexingFile uif) { // Parameters Validation if (scriptFile == null) { throw new ArgumentNullException("scriptFile"); } if (domainList == null) { throw new ArgumentNullException("domainList"); } if (uif == null) { throw new ArgumentNullException("uif"); } Dictionary<string, DomainIndexItem> items = new Dictionary<string, DomainIndexItem>(StringComparer.Ordinal); _language = scriptFile.Language; _tag = domainList.FontTag; Phoneme phoneme = Localor.GetPhoneme(_language); SliceData sliceData = Localor.GetSliceData(_language); foreach (ScriptItem scriptItem in scriptFile.Items) { if (!domainList.Contains(scriptItem.Id)) { continue; } Collection<TtsUnit> itemUnits = scriptItem.GetUnits(phoneme, sliceData); Collection<ScriptWord> allPronouncedNormalWords = scriptItem.AllPronouncedNormalWords; for (int i = 0; i < allPronouncedNormalWords.Count; i++) { ScriptWord word = allPronouncedNormalWords[i]; string text; if (domainList.Domain == ScriptDomain.Number) { text = GetNumberDomainWordText(word, scriptItem.Id, i, (domainList as NumberDomainConfigList).Digitals); } else if (domainList.Domain == ScriptDomain.Acronym) { text = GetAcronymDomainWordText(word, scriptItem.Id, i, (domainList as AcronymDomainConfigList).Acronyms); } else if (domainList.Domain == ScriptDomain.Letter) { // Use pronunciation phone ids as key text = GetPhoneIds(word); } else { text = word.Grapheme.ToUpperInvariant(); } if (items.ContainsKey(text) && domainList.Domain != ScriptDomain.Letter) { // Skip duplicate word, except Letter domain continue; } DomainIndexItem item = null; if (!items.ContainsKey(text)) { item = new DomainIndexItem(); item.Word = text; } else { item = items[text]; } bool skipped = false; Collection<TtsUnit> wordUnits = word.GetUnits(phoneme, sliceData); for (int wordUnitIndex = 0; wordUnitIndex < wordUnits.Count; wordUnitIndex++) { TtsUnit unit = wordUnits[wordUnitIndex]; FeatureDataItem featureItem = new FeatureDataItem(); int indexOfNonSilence = itemUnits.IndexOf(unit); Debug.Assert(indexOfNonSilence >= 0 && indexOfNonSilence < itemUnits.Count); int unitOffset = uif.SearchCandidateOffset(unit.MetaUnit.Name, scriptItem.Id, (uint)indexOfNonSilence); if (unitOffset == -1) { // Skip this word skipped = true; break; } if (item.FeatureItems.Count == wordUnitIndex) { featureItem.UnitIndexes.Add(unitOffset); item.FeatureItems.Add(featureItem); // [].UnitIndexes.Add(unitOffset); } else { item.FeatureItems[wordUnitIndex].UnitIndexes.Add(unitOffset); } } if (!skipped && !items.ContainsKey(item.Word)) { items.Add(item.Word, item); } } } _items = BuildHashTable(items.Values); }
/// <summary> /// Build hash table. /// </summary> /// <param name="items">Domain index items.</param> /// <returns>Hash table contains domain index items.</returns> private static DomainIndexItem[] BuildHashTable(ICollection<DomainIndexItem> items) { SortedDictionary<int, Collection<DomainIndexItem>> dict = new SortedDictionary<int, Collection<DomainIndexItem>>(); foreach (DomainIndexItem item in items) { int hashValue = (int)GetHashValue(item.Word, items.Count); Debug.Assert(hashValue >= 0 && hashValue < items.Count); if (!dict.ContainsKey(hashValue)) { dict.Add(hashValue, new Collection<DomainIndexItem>()); } dict[hashValue].Add(item); } DomainIndexItem[] array = new DomainIndexItem[items.Count]; foreach (int hashValue in dict.Keys) { Collection<DomainIndexItem> domainIndexItems = dict[hashValue]; Debug.Assert(array[hashValue] == null); array[hashValue] = domainIndexItems[0]; domainIndexItems[0].NextItemIndex = -1; } int pos = 0; foreach (int hashValue in dict.Keys) { Collection<DomainIndexItem> domainIndexItems = dict[hashValue]; for (int i = 1; i < domainIndexItems.Count; i++) { for (; pos < array.Length; pos++) { if (array[pos] == null) { break; } } Debug.Assert(pos < array.Length); array[pos] = domainIndexItems[i]; domainIndexItems[i].NextItemIndex = -1; domainIndexItems[i - 1].NextItemIndex = pos; } } int featureDataOffset = 0; int stringPoolOffset = 0; for (int i = 0; i < array.Length; i++) { array[i].FeatureDataOffset = featureDataOffset; array[i].StringPoolOffset = stringPoolOffset; featureDataOffset += array[i].FeatureItemBinarySize; stringPoolOffset += array[i].StringBinarySize; } return array; }
/// <summary> /// Load domain index file. /// </summary> /// <param name="filePath">File path.</param> public void Load(string filePath) { FileStream fs = new FileStream(filePath, FileMode.Open, FileAccess.Read, FileShare.Read); try { int nLength = (int)fs.Length; using (BinaryReader br = new BinaryReader(fs)) { fs = null; // Load header DomainIndexFileHeaderSerial header = DomainIndexFileHeaderSerial.Read(br); if (header.HashTableItemCount <= 0) { string message = Helper.NeutralFormat("Zero hash table item in file [{0}]", filePath); throw new InvalidDataException(message); } // Verify file size if (nLength != header.Size + 8) { string message = Helper.NeutralFormat("Found malformed data: Expected data size = {0}," + "Real file size = {1}", header.Size + 8, nLength); throw new InvalidDataException(message); } int headerSize = Marshal.SizeOf(typeof(DomainIndexFileHeaderSerial)); long expectedFileSize = headerSize + header.HashTableSize + header.FeatureDataSize + header.StringPoolSize; if (nLength != expectedFileSize) { string message = Helper.NeutralFormat("Found malformed data: header size = {0}," + "hash table size = {1}, feature data size = {2}, string pool size = {3}," + "real file size = {4}", headerSize, header.HashTableSize, header.FeatureDataOffset, header.StringPoolSize, nLength); throw new InvalidDataException(message); } _language = (Language)header.LanguageId; _tag = (FontSectionTag)header.Tag; // Load hash table data byte[] hashTableChunck = br.ReadBytes(header.HashTableSize); // Load feature data byte[] featureDataChunck = br.ReadBytes(header.FeatureDataSize); // Load string pool byte[] stringPool = br.ReadBytes(header.StringPoolSize); _items = new DomainIndexItem[header.HashTableItemCount]; int offset = 0; for (int i = 0; i < _items.Length; i++) { DomainIndexItem item = new DomainIndexItem(); // Load hash table offset += item.LoadHashtable(hashTableChunck, offset); // Load feature data item.LoadFeatureData(featureDataChunck, item.FeatureDataOffset); // Load word text item.LoadWordText(stringPool, item.StringPoolOffset); _items[i] = item; } } } finally { if (null != fs) { fs.Dispose(); } } }