Ejemplo n.º 1
0
        /// <summary>
        /// Write the decision forest and tree index.
        /// </summary>
        /// <param name="forest">The decision forest.</param>
        /// <param name="treeIndexes">Tree indexes.</param>
        /// <param name="questionIndexes">Question indexes.</param>
        /// <param name="questionSet">The Question set.</param>
        /// <param name="namedOffsets">The named Offsets.</param>
        /// <param name="forestSerializer">The forest serializer.</param>
        /// <param name="writer">The writer to write.</param>
        /// <returns>The postion after write.</returns>
        internal int Write(DecisionForest forest, List<TreeIndex> treeIndexes,
            Dictionary<string, uint> questionIndexes, HtsQuestionSet questionSet,
            IDictionary<string, uint[]> namedOffsets, DecisionForestSerializer forestSerializer, DataWriter writer)
        {
            Helper.ThrowIfNull(forest);
            Helper.ThrowIfNull(treeIndexes);
            Helper.ThrowIfNull(writer);
            Helper.ThrowIfNull(questionIndexes);
            Helper.ThrowIfNull(questionSet);

            int decisionTreeSectionStart = (int)writer.BaseStream.Position;
            int position = decisionTreeSectionStart;

            // Write tree index (place holder)
            position += (int)WriteTreeIndexes(writer, treeIndexes.ToArray());

            // Write trees
            for (int treeIndex = 0; treeIndex < forest.TreeList.Count; treeIndex++)
            {
                DecisionTree tree = forest.TreeList[treeIndex];
                TreeIndex index = treeIndexes[treeIndex];

                index.Offset = position - decisionTreeSectionStart;
                index.Size = (int)forestSerializer.Write(tree, writer, questionIndexes, namedOffsets);
                position += index.Size;
            }

            // Write tree index
            using (PositionRecover recover =
                new PositionRecover(writer, decisionTreeSectionStart, SeekOrigin.Begin))
            {
                WriteTreeIndexes(writer, treeIndexes.ToArray());
            }

            Debug.Assert(position % sizeof(uint) == 0, "Data should be 4-byte aligned.");

            return position - decisionTreeSectionStart;
        }
        /// <summary>
        /// Save pre-selection forest.
        /// </summary>
        /// <param name="decisionForest">The forest with each tree corresponding to a unit.</param>
        /// <param name="candidateGroups">The candidate group collection.</param>
        /// <param name="unitCandidateNameIds">Given candidate idx.</param>
        /// <param name="customFeatures">Cusotmized linguistic feature list.</param>
        /// <param name="outputPath">The output path.</param>
        public void Write(DecisionForest decisionForest,
            ICollection<CandidateGroup> candidateGroups, 
            IDictionary<string, int> unitCandidateNameIds,
            HashSet<string> customFeatures,
            string outputPath)
        {
            foreach (Question question in decisionForest.QuestionList)
            {
                question.Language = _phoneSet.Language;
                question.ValueSetToCodeValueSet(_posSet, _phoneSet, customFeatures);
            }

            FileStream file = new FileStream(outputPath, FileMode.Create);
            try
            {
                using (DataWriter writer = new DataWriter(file))
                {
                    file = null;
                    uint position = 0;

                    // Write header section place holder
                    PreselectionFileHeader header = new PreselectionFileHeader();
                    position += (uint)header.Write(writer);

                    HtsFontSerializer serializer = new HtsFontSerializer();

                    // Write feature, question and prepare string pool
                    HtsQuestionSet questionSet = new HtsQuestionSet
                    {
                        Items = decisionForest.QuestionList,
                        Header = new HtsQuestionSetHeader { HasQuestionName = false },
                        CustomFeatures = customFeatures,
                    };

                    using (StringPool stringPool = new StringPool())
                    {
                        Dictionary<string, uint> questionIndexes = new Dictionary<string, uint>();

                        header.QuestionOffset = position;
                        header.QuestionSize = serializer.Write(
                            questionSet, writer, stringPool, questionIndexes, customFeatures);
                        position += header.QuestionSize;

                        // Write leaf referenced data to buffer
                        IEnumerable<INodeData> dataNodes = GetCandidateNodes(candidateGroups);
                        using (MemoryStream candidateSetBuffer = new MemoryStream())
                        {
                            Dictionary<string, int> namedSetOffset = new Dictionary<string, int>();

                            int candidateSetSize = HtsFontSerializer.Write(
                                dataNodes, new DataWriter(candidateSetBuffer), namedSetOffset);

                            // Write decision forest
                            Dictionary<string, uint[]> namedOffsets =
                                namedSetOffset.ToDictionary(p => p.Key, p => new[] { (uint)p.Value });

                            header.DecisionTreeSectionOffset = position;

                            header.DecisionTreeSectionSize = (uint)Write(decisionForest, unitCandidateNameIds,
                                questionIndexes, questionSet, namedOffsets, new DecisionForestSerializer(), writer);
                            position += header.DecisionTreeSectionSize;

                            // Write string pool
                            header.StringPoolOffset = position;
                            header.StringPoolSize = HtsFontSerializer.Write(stringPool, writer);
                            position += header.StringPoolSize;

                            // Write leaf referenced data
                            header.CandidateSetSectionOffset = position;
                            header.CandidateSetSectionSize = writer.Write(candidateSetBuffer.ToArray());
                            position += header.CandidateSetSectionSize;
                        }

                        // Write header section place holder
                        using (PositionRecover recover = new PositionRecover(writer, 0))
                        {
                            header.Write(writer);
                        }
                    }              
                }
            }
            finally
            {
                if (null != file)
                {
                    file.Dispose();
                }
            }
        }