/// <summary> /// Write out string pool with encrypt. /// </summary> /// <param name="stringPool">String pool.</param> /// <param name="writer">Binary data writer.</param> /// <returns>Size of bytes written out.</returns> public static uint Write(StringPool stringPool, DataWriter writer) { Helper.ThrowIfNull(stringPool); Helper.ThrowIfNull(writer); byte[] encryptedStringPool = new byte[stringPool.Length]; Microsoft.Tts.ServiceProvider.HTSVoiceDataEncrypt.EncryptStringPool( stringPool.ToArray(), encryptedStringPool); return writer.Write(encryptedStringPool); }
public BinaryModule ToBinary() { // 生成methods的时候可能依然会修改常量池,因此先做 BinaryMethod[] binaryMethods = Methods.Select(m => m?.ToBinary()).ToArray(); return(new BinaryModule { ModuleNameIndex = ModuleNameIndex, StringPool = StringPool.ToArray(), ClassPool = ClassPool.ToArray(), MethodPool = MethodPool.ToArray(), FieldPool = FieldPool.ToArray(), Methods = binaryMethods }); }
public static ErrorSet Compile(string rnnModelPath, Stream outputStream, Collection<string> addedFileNames) { if (string.IsNullOrEmpty(rnnModelPath)) { throw new ArgumentNullException("rnnModelPath"); } if (outputStream == null) { throw new ArgumentNullException("outputStream"); } if (addedFileNames == null) { throw new ArgumentNullException("addedFileNames"); } ErrorSet errorSet = new ErrorSet(); if (!File.Exists(rnnModelPath)) { errorSet.Add(RNNModelCompilerError.ModelDataNotFound, rnnModelPath); } else { BinaryWriter bw = new BinaryWriter(outputStream, Encoding.Unicode); Dictionary<string, float> polyCharactersInfo = null; List<string> polyphones = null; List<float> thresholds = null; // load polyphonic characters that should be enabled in product. string polyphonicCharFile = Path.Combine(new DirectoryInfo(Path.GetDirectoryName(rnnModelPath)).Parent.FullName, "RNNPolyphoneList.txt"); if (File.Exists(polyphonicCharFile)) { // If the list file is existed, load it. polyCharactersInfo = LoadPolyphonicInfo(polyphonicCharFile, errorSet); } else { errorSet.Add(RNNModelCompilerError.PolyphonicCharFileNotFound, polyphonicCharFile); } polyphones = GetPolyphonicChars(polyCharactersInfo); thresholds = GetPolyphonicThreshold(polyCharactersInfo); uint polyCharCount = 0; uint modelOffset = 0; // write the count of polyphonic characters and polyphonic characters using (StringPool plycharSp = new StringPool()) { Collection<int> polycharOffsets = new Collection<int>(); StringPool.WordsToStringPool(polyphones, plycharSp, polycharOffsets); polyCharCount = (uint)polycharOffsets.Count; bw.Write(modelOffset); bw.Write(polyCharCount); foreach (float threshold in thresholds) { bw.Write(threshold); } byte[] plycharPool = plycharSp.ToArray(); foreach (int offset in polycharOffsets) { bw.Write((uint)offset); } bw.Write(plycharPool, 0, plycharPool.Length); } modelOffset = (uint)bw.BaseStream.Position; // write rnn models using (FileStream fs = new FileStream(rnnModelPath, FileMode.Open, FileAccess.Read)) using (BinaryReader br = new BinaryReader(fs)) { bw.Write(br.ReadBytes((int)fs.Length)); } bw.Flush(); bw.Seek(0, SeekOrigin.Begin); bw.Write(modelOffset); bw.Flush(); addedFileNames.Add(rnnModelPath); } return errorSet; }
public static ErrorSet Compile(string crfModelDir, Stream outputStream, Collection<string> addedFileNames, Language lang) { if (string.IsNullOrEmpty(crfModelDir)) { throw new ArgumentNullException("crfModelDir"); } if (outputStream == null) { throw new ArgumentNullException("outputStream"); } if (addedFileNames == null) { throw new ArgumentNullException("addedFileNames"); } ErrorSet errorSet = new ErrorSet(); if (!Directory.Exists(crfModelDir)) { errorSet.Add(CrfModelCompilerError.DataFolderNotFound, crfModelDir); } else { BinaryWriter bw = new BinaryWriter(outputStream, Encoding.Unicode); List<byte[]> crfModels = new List<byte[]>(); List<string> crfTags = new List<string>(); Dictionary<string, string> localizedNameMapping = null; // if language = zh-cn or ja-jp, we should use their localized name as crf model tag. if (lang == Language.ZhCN || lang == Language.JaJP) { string crfModelNameMappingFile = Path.Combine(new DirectoryInfo(crfModelDir).Parent.FullName, "CRFLocalizedMapping.txt"); if (File.Exists(crfModelNameMappingFile)) { // If the mapping file is existed, load it. localizedNameMapping = LocalizeCRFModelName(crfModelNameMappingFile, errorSet); } else { errorSet.Add(CrfModelCompilerError.MappingFileNotFound, crfModelNameMappingFile); } } string[] crfModelFileNames = Directory.GetFiles(crfModelDir, "*.crf", SearchOption.TopDirectoryOnly); foreach (string crfModelFileName in crfModelFileNames) { if (localizedNameMapping != null) { // If mapping is existed, replace the crfTag name. string crfModelName = Path.GetFileName(crfModelFileName); if (localizedNameMapping.ContainsKey(crfModelName)) { crfTags.Add(localizedNameMapping[crfModelName].ToUpper()); // case insensitive } } else { crfTags.Add(Path.GetFileNameWithoutExtension(crfModelFileName).ToUpper()); // case insensitive } using (FileStream fs = new FileStream(crfModelFileName, FileMode.Open, FileAccess.Read)) using (BinaryReader br = new BinaryReader(fs)) { crfModels.Add(br.ReadBytes((int)fs.Length)); } addedFileNames.Add(crfModelFileName); } using (StringPool crfModelSp = new StringPool()) { using (StringPool crfTagSp = new StringPool()) { Collection<int> crfModelsOffsets = new Collection<int>(); Collection<int> crfTagsOffsets = new Collection<int>(); // Add models to StringPool foreach (byte[] model in crfModels) { crfModelsOffsets.Add(crfModelSp.PutBuffer(model)); } // Add tags to StringPool StringPool.WordsToStringPool(crfTags, crfTagSp, crfTagsOffsets); uint tagOffset = 0; uint modelOffset = 0; bw.Write(tagOffset); bw.Write(modelOffset); bw.Write((uint)crfTagsOffsets.Count); for (int i = 0; i < crfModelsOffsets.Count; i++) { uint offset = (uint)crfModelsOffsets[i]; if ((offset % 4) != 0) { errorSet.Add(CrfModelCompilerError.InvalidCrfModel, crfModelFileNames[i]); } bw.Write(offset); } foreach (int offset in crfTagsOffsets) { bw.Write((uint)offset); } modelOffset = (uint)bw.BaseStream.Position; Debug.Assert((modelOffset % 4) == 0); byte[] crfModelPool = crfModelSp.ToArray(); bw.Write(crfModelPool, 0, crfModelPool.Length); tagOffset = (uint)bw.BaseStream.Position; Debug.Assert((tagOffset % 4) == 0); byte[] crfTagPool = crfTagSp.ToArray(); bw.Write(crfTagPool, 0, crfTagPool.Length); bw.Flush(); // Update offset value. bw.Seek(0, SeekOrigin.Begin); bw.Write(tagOffset); bw.Write(modelOffset); bw.Flush(); } } } return errorSet; }
/// <summary> /// Compile Chinese tone data table into binary file. /// </summary> /// <param name="chineseToneDataFile">Path of Chinese tone data file.</param> /// <param name="outputStream">Output Stream.</param> /// <returns>ErrorSet.</returns> public static ErrorSet Compile(string chineseToneDataFile, Stream outputStream) { if (string.IsNullOrEmpty(chineseToneDataFile)) { throw new ArgumentNullException("chineseToneDataFile"); } if (outputStream == null) { throw new ArgumentNullException("outputStream"); } ErrorSet errorSet = new ErrorSet(); if (!File.Exists(chineseToneDataFile)) { errorSet.Add(ChineseToneCompilerError.DataFileNotFound, chineseToneDataFile); } else { BinaryWriter outputBinaryWriter = new BinaryWriter(outputStream, Encoding.Unicode); List<string> fileLines = new List<string>(Helper.FileLines(chineseToneDataFile, Encoding.Unicode)); List<string> wordListAABB = new List<string>(); List<string> wordListAAB = new List<string>(); int nTableIdx = -1; // Load words from raw data file to a word list for (int i = 0; i < fileLines.Count; ++i) { string fileLine = fileLines[i]; if (fileLine.Contains("[ABAB]")) { nTableIdx = 0; continue; } else if (fileLine.Contains("[AAB]")) { nTableIdx = 1; continue; } if ((nTableIdx == 0 || nTableIdx == 1) && !string.IsNullOrWhiteSpace(fileLines[i]) && !fileLines[i].StartsWith("//")) { // Remove comment at the end of line if (fileLine.Contains("//")) { fileLine = fileLine.Remove(fileLines[i].IndexOf("//")); } // Clean white spaces among the line fileLine = fileLine.Trim(); string[] segments = fileLine.Split( new string[] { PairSeparator }, StringSplitOptions.RemoveEmptyEntries); if (segments.Length == 2) { segments[0] = segments[0].Trim(); segments[1] = segments[1].Trim(); switch (nTableIdx) { case 0: wordListAABB.AddRange(segments); wordListAABB.Add(string.Empty); break; case 1: wordListAAB.AddRange(segments); wordListAAB.Add(string.Empty); break; default: break; } } else { errorSet.Add(ChineseToneCompilerError.InvalidPatternFormData); } } } if (errorSet.Count == 0) { // used for 4 bytes align. const int ALIGNCOUNT = 4; byte[] alignBytes = new byte[] { 0, 0, 0, 0 }; // Start writing binary to output stream // Write table count outputBinaryWriter.Write((uint)2); // Write word count in each table outputBinaryWriter.Write((uint)wordListAABB.Count); outputBinaryWriter.Write((uint)wordListAAB.Count); List<int> offsetList = new List<int>(); using (StringPool stringPool = new StringPool()) { // Put the words from word list to string pool StringPool.WordsToStringPool(wordListAABB, stringPool, offsetList); // Write the strings from string pool byte[] stringBuffer = stringPool.ToArray(); int nWordByteSize = sizeof(int) * offsetList.Count; int nBufferByteSize = stringBuffer.Length; int nUnAlignSize = stringBuffer.Length % ALIGNCOUNT; if (nUnAlignSize != 0) { nBufferByteSize += ALIGNCOUNT - nUnAlignSize; } int nTableByteSize = nWordByteSize + nBufferByteSize; // Write offset of each table outputBinaryWriter.Write((uint)nTableByteSize); // Write offset of each word offsetList.ForEach(x => outputBinaryWriter.Write((uint)x)); outputBinaryWriter.Write(stringBuffer, 0, stringBuffer.Length); // Fill 0 to align the memory by 4 bytes if (stringBuffer.Length % ALIGNCOUNT != 0) { outputBinaryWriter.Write(alignBytes, 0, ALIGNCOUNT - (stringBuffer.Length % ALIGNCOUNT)); } } offsetList = new List<int>(); using (StringPool stringPool = new StringPool()) { // Put the words from word list to string pool StringPool.WordsToStringPool(wordListAAB, stringPool, offsetList); // Write the strings from string pool byte[] stringBuffer = stringPool.ToArray(); int nWordByteSize = sizeof(int) * offsetList.Count; int nBufferByteSize = stringBuffer.Length; int nUnAlignSize = stringBuffer.Length % ALIGNCOUNT; if (nUnAlignSize != 0) { nBufferByteSize += ALIGNCOUNT - nUnAlignSize; } int nTableByteSize = nWordByteSize + nBufferByteSize; // Write offset of each table outputBinaryWriter.Write((uint)nTableByteSize); // Write offset of each word offsetList.ForEach(x => outputBinaryWriter.Write((uint)x)); outputBinaryWriter.Write(stringBuffer, 0, stringBuffer.Length); // Fill 0 to align the memory by 4 bytes if (stringBuffer.Length % ALIGNCOUNT != 0) { outputBinaryWriter.Write(alignBytes, 0, ALIGNCOUNT - (stringBuffer.Length % ALIGNCOUNT)); } } } } return errorSet; }