/// <summary> /// Write the decision forest and tree index. /// </summary> /// <param name="forest">The decision forest.</param> /// <param name="treeIndexes">Tree indexes.</param> /// <param name="questionIndexes">Question indexes.</param> /// <param name="questionSet">The Question set.</param> /// <param name="namedOffsets">The named Offsets.</param> /// <param name="forestSerializer">The forest serializer.</param> /// <param name="writer">The writer to write.</param> /// <returns>The postion after write.</returns> internal int Write(DecisionForest forest, List<TreeIndex> treeIndexes, Dictionary<string, uint> questionIndexes, HtsQuestionSet questionSet, IDictionary<string, uint[]> namedOffsets, DecisionForestSerializer forestSerializer, DataWriter writer) { Helper.ThrowIfNull(forest); Helper.ThrowIfNull(treeIndexes); Helper.ThrowIfNull(writer); Helper.ThrowIfNull(questionIndexes); Helper.ThrowIfNull(questionSet); int decisionTreeSectionStart = (int)writer.BaseStream.Position; int position = decisionTreeSectionStart; // Write tree index (place holder) position += (int)WriteTreeIndexes(writer, treeIndexes.ToArray()); // Write trees for (int treeIndex = 0; treeIndex < forest.TreeList.Count; treeIndex++) { DecisionTree tree = forest.TreeList[treeIndex]; TreeIndex index = treeIndexes[treeIndex]; index.Offset = position - decisionTreeSectionStart; index.Size = (int)forestSerializer.Write(tree, writer, questionIndexes, namedOffsets); position += index.Size; } // Write tree index using (PositionRecover recover = new PositionRecover(writer, decisionTreeSectionStart, SeekOrigin.Begin)) { WriteTreeIndexes(writer, treeIndexes.ToArray()); } Debug.Assert(position % sizeof(uint) == 0, "Data should be 4-byte aligned."); return position - decisionTreeSectionStart; }
/// <summary> /// Save pre-selection forest. /// </summary> /// <param name="decisionForest">The forest with each tree corresponding to a unit.</param> /// <param name="candidateGroups">The candidate group collection.</param> /// <param name="unitCandidateNameIds">Given candidate idx.</param> /// <param name="customFeatures">Cusotmized linguistic feature list.</param> /// <param name="outputPath">The output path.</param> public void Write(DecisionForest decisionForest, ICollection<CandidateGroup> candidateGroups, IDictionary<string, int> unitCandidateNameIds, HashSet<string> customFeatures, string outputPath) { foreach (Question question in decisionForest.QuestionList) { question.Language = _phoneSet.Language; question.ValueSetToCodeValueSet(_posSet, _phoneSet, customFeatures); } FileStream file = new FileStream(outputPath, FileMode.Create); try { using (DataWriter writer = new DataWriter(file)) { file = null; uint position = 0; // Write header section place holder PreselectionFileHeader header = new PreselectionFileHeader(); position += (uint)header.Write(writer); HtsFontSerializer serializer = new HtsFontSerializer(); // Write feature, question and prepare string pool HtsQuestionSet questionSet = new HtsQuestionSet { Items = decisionForest.QuestionList, Header = new HtsQuestionSetHeader { HasQuestionName = false }, CustomFeatures = customFeatures, }; using (StringPool stringPool = new StringPool()) { Dictionary<string, uint> questionIndexes = new Dictionary<string, uint>(); header.QuestionOffset = position; header.QuestionSize = serializer.Write( questionSet, writer, stringPool, questionIndexes, customFeatures); position += header.QuestionSize; // Write leaf referenced data to buffer IEnumerable<INodeData> dataNodes = GetCandidateNodes(candidateGroups); using (MemoryStream candidateSetBuffer = new MemoryStream()) { Dictionary<string, int> namedSetOffset = new Dictionary<string, int>(); int candidateSetSize = HtsFontSerializer.Write( dataNodes, new DataWriter(candidateSetBuffer), namedSetOffset); // Write decision forest Dictionary<string, uint[]> namedOffsets = namedSetOffset.ToDictionary(p => p.Key, p => new[] { (uint)p.Value }); header.DecisionTreeSectionOffset = position; header.DecisionTreeSectionSize = (uint)Write(decisionForest, unitCandidateNameIds, questionIndexes, questionSet, namedOffsets, new DecisionForestSerializer(), writer); position += header.DecisionTreeSectionSize; // Write string pool header.StringPoolOffset = position; header.StringPoolSize = HtsFontSerializer.Write(stringPool, writer); position += header.StringPoolSize; // Write leaf referenced data header.CandidateSetSectionOffset = position; header.CandidateSetSectionSize = writer.Write(candidateSetBuffer.ToArray()); position += header.CandidateSetSectionSize; } // Write header section place holder using (PositionRecover recover = new PositionRecover(writer, 0)) { header.Write(writer); } } } } finally { if (null != file) { file.Dispose(); } } }