/// <summary> /// Save the gram data into stream. /// </summary> /// <param name="stream">Binary stream.</param> public void SaveToBinary(Stream stream) { if (stream == null) { throw new ArgumentNullException("stream"); } if (_grammarCount == 0) { throw new InvalidDataException("There is no nGram data"); } using (TrieTree graphemeTrieTree = new TrieTree(_graphemeDictionary)) { byte[] graphemeDictData = graphemeTrieTree.GetTrieData(); int graphemeDictLength = graphemeDictData.Length; // Keep data alignment as 2 bytes if (graphemeDictLength % 2 != 0) { graphemeDictLength++; } // Save NGramData GrammarState[] grammarStates = new GrammarState[_grammarCount + 2]; grammarStates[0] = new GrammarState(); grammarStates[0].ReferenceIndex = 1; Dictionary<string, int> grammarIndex = new Dictionary<string, int>(); int stateIndex = 1; int finalGramStateIndex = 0; for (int gram = 1; gram <= _maxNgram; gram++) { string lastReferredGrammar = string.Empty; if (gram == _maxNgram) { finalGramStateIndex = stateIndex; grammarStates[stateIndex++] = new GrammarState(); } foreach (string grammar in _nGramData[gram].Keys) { string[] graphemes = grammar.Split(GrammarSeparator, StringSplitOptions.RemoveEmptyEntries); // last grapheme string lastGrapheme = graphemes[graphemes.Length - 1]; int len = 0; int graphemeId = graphemeTrieTree.FindLongest(lastGrapheme, out len); Debug.Assert(graphemeId != -1); Debug.Assert(!grammarIndex.ContainsKey(grammar)); // Save the state index for easily query grammarIndex.Add(grammar, stateIndex); grammarStates[stateIndex] = new GrammarState(); grammarStates[stateIndex].GraphId = (GrapId)graphemeId; // Convert the probability into ProbabilityInt type with amplifier if (_nGramData[gram][grammar].Probability * _probabilityAmplifier < ProbabilityInt.MinValue) { grammarStates[stateIndex].Prob = ProbabilityInt.MinValue; } else { grammarStates[stateIndex].Prob = (ProbabilityInt)(_nGramData[gram][grammar].Probability * _probabilityAmplifier); } if (_nGramData[gram][grammar].Backoff * _probabilityAmplifier < short.MinValue) { grammarStates[stateIndex].Backoff = ProbabilityInt.MinValue; } else { grammarStates[stateIndex].Backoff = (ProbabilityInt)(_nGramData[gram][grammar].Backoff * _probabilityAmplifier); } // set the reference index for lower level gram data if (gram != 1) { string referredGrammar = graphemes[0]; for (int i = 1; i < graphemes.Length - 1; i++) { referredGrammar = referredGrammar + " " + graphemes[i]; } if (!referredGrammar.Equals(lastReferredGrammar, StringComparison.Ordinal)) { // Update the reference index for the lower level gram lastReferredGrammar = referredGrammar; Debug.Assert(grammarIndex.ContainsKey(lastReferredGrammar)); int referredIndex = grammarIndex[lastReferredGrammar]; Debug.Assert(grammarStates[referredIndex] != null); if (gram != _maxNgram) { grammarStates[referredIndex].ReferenceIndex = (ReferenceIndex)stateIndex; } else { grammarStates[referredIndex].ReferenceIndex = (ReferenceIndex)(stateIndex - finalGramStateIndex); } } } stateIndex++; } } // Save the model into binary stream BinaryWriter bw = new BinaryWriter(stream); { // Write the language ID bw.Write((ushort)_language); // Write the Gram Count bw.Write((ushort)this._maxNgram); // Write the Probability Amplifier bw.Write((int)_probabilityAmplifier); // Write the grammar state number bw.Write((uint)finalGramStateIndex); // Write the Final grammar state number bw.Write((uint)(_grammarCount + 2 - finalGramStateIndex)); int headerSize = sizeof(ushort) + sizeof(ushort) + sizeof(int) + sizeof(uint) + sizeof(uint) + sizeof(uint) + sizeof(uint) + sizeof(uint); // Write the offset of Dictionary bw.Write((uint)headerSize); // Write the offset of Grammar State bw.Write((uint)(headerSize + graphemeDictLength)); // Write the offset of Final Grammar State bw.Write((uint)(headerSize + graphemeDictLength + (finalGramStateIndex * (sizeof(GrapId) + sizeof(ProbabilityInt) + sizeof(ProbabilityInt) + sizeof(ReferenceIndex))))); // Write the grapheme Trie Dictionary bw.Write(graphemeDictData, 0, graphemeDictData.Length); // Add the data alignment for grapheme Trie Dictionary for (int i = graphemeDictData.Length; i < graphemeDictLength; i++) { bw.Write((byte)0); } // Write the grammar states for low level gram for (int i = 0; i < finalGramStateIndex; i++) { bw.Write(grammarStates[i].GraphId); bw.Write(grammarStates[i].Prob); bw.Write(grammarStates[i].Backoff); bw.Write(grammarStates[i].ReferenceIndex); } // Write the grammar state for final level gram for (int i = finalGramStateIndex; i < _grammarCount + 2; i++) { bw.Write(grammarStates[i].GraphId); bw.Write(grammarStates[i].Prob); } } } }