/// <summary> /// Load utterances from environment. /// </summary> /// <returns>Task</returns> internal async Task LoadUtterancesAsync() { using (var reader = await Environment.RecallUtterancesAsync()) { var lineNo = 0; for (var line = await reader.ReadLineAsync(); line != null; line = await reader.ReadLineAsync()) { lineNo++; var utteranceData = UtteranceData.FromLine(line); var utterance = utteranceData.Sequence; Debug.Assert(utterance.Count != 0); var sequence = new List <int>(new[] { 0 }); var isFirstWordToCome = true; var environment = Model.Environment; foreach (var tile in utterance) { var tokenString = tile.ToTokenString(); Debug.Assert(!string.IsNullOrWhiteSpace(tokenString)); if (isFirstWordToCome) { if (environment.TryCapitalizeFirstWord(tokenString) != null) { isFirstWordToCome = false; var lowerString = tokenString.ToLowerInvariant(); var capitalizedString = environment.TryCapitalizeFirstWord(lowerString); if (capitalizedString == tokenString && lowerString != capitalizedString && _tokens.TryGetToken(lowerString, out var _)) { tokenString = lowerString; } } } var token = _tokens.GetToken(tokenString); Debug.Assert(token != 0); sequence.Add(token); } sequence.Add(0); PersistantPredictor.AddSequence(sequence, PersistedSequenceWeight); } } PopulateVocabularyList(); SetSuggestionsView(0, Count, false); }
internal SpellingVocabularySource(ApplicationModel model, WordVocabularySource wordVocabularySource) : base(model: model, predictorWidth: 4) { _wordVocabularySource = wordVocabularySource; _characterSet = new SortedSet <string>(Environment); foreach (var word in _wordVocabularySource.Words) { var actualLength = word.Length; var effectiveLength = 0; while (effectiveLength < actualLength && word[effectiveLength] != 0) { effectiveLength++; } if (effectiveLength != 0) { var sequence = new int[effectiveLength + 2]; var index = 1; for (var i = 0; i < effectiveLength; i++) { sequence[index] = word[i]; index++; } Debug.Assert(sequence[0] == 0); Debug.Assert(sequence[effectiveLength + 2 - 1] == 0); PersistantPredictor.AddSequence(sequence, WordVocabularySource.SeedSequenceWeight); } } foreach (var symbol in model.Environment.GetAdditionalSymbols()) { var sequence = new int[] { symbol }; PersistantPredictor.AddSequence(sequence, WordVocabularySource.SeedSequenceWeight); } _unicodeVocabularySource = new UnicodeVocabularySource(model, this); PopulateVocabularyList(); }
internal void AddNewWord(string word) { var needToRepopuplate = false; var sequence = new List <int>(word.Length + 2) { 0 }; for (var index = 0; index < word.Length;) { int utf32 = char.ConvertToUtf32(word, index); if (char.GetUnicodeCategory(word, index) != UnicodeCategory.Surrogate) { index++; } else { index += 2; } sequence.Add(utf32); if (_characterSet.Add(char.ConvertFromUtf32(utf32))) { needToRepopuplate = true; } } sequence.Add(0); PersistantPredictor.AddSequence(sequence, WordVocabularySource.SeedSequenceWeight); if (needToRepopuplate) { PopulateVocabularyList(); } }
internal void RollbackAndAddSequence(IReadOnlyList <int> sequence, int increment) { PersistantPredictor.Subtract(DeltaPredictor); PersistantPredictor.AddSequence(sequence, increment); }