/// <summary> /// Load utterances from environment. /// </summary> /// <returns>Task</returns> internal async Task LoadUtterancesAsync() { using (var reader = await Environment.RecallUtterancesAsync()) { var lineNo = 0; for (var line = await reader.ReadLineAsync(); line != null; line = await reader.ReadLineAsync()) { lineNo++; var utteranceData = UtteranceData.FromLine(line); var utterance = utteranceData.Sequence; Debug.Assert(utterance.Count != 0); var sequence = new List <int>(new[] { 0 }); var isFirstWordToCome = true; var environment = Model.Environment; foreach (var tile in utterance) { var tokenString = tile.ToTokenString(); Debug.Assert(!string.IsNullOrWhiteSpace(tokenString)); if (isFirstWordToCome) { if (environment.TryCapitalizeFirstWord(tokenString) != null) { isFirstWordToCome = false; var lowerString = tokenString.ToLowerInvariant(); var capitalizedString = environment.TryCapitalizeFirstWord(lowerString); if (capitalizedString == tokenString && lowerString != capitalizedString && _tokens.TryGetToken(lowerString, out var _)) { tokenString = lowerString; } } } var token = _tokens.GetToken(tokenString); Debug.Assert(token != 0); sequence.Add(token); } sequence.Add(0); PersistantPredictor.AddSequence(sequence, PersistedSequenceWeight); } } PopulateVocabularyList(); SetSuggestionsView(0, Count, false); }
static void BreakOnProblemCase(StringTokens tokens, int[] context, params string[] words) { if (words.Length <= context.Length) { var contextLim = context.Length; var wordsLim = words.Length; while (0 < wordsLim && tokens.TryGetToken(words[wordsLim - 1], out var token) && context[contextLim - 1] == token) { wordsLim--; contextLim--; } if (wordsLim == 0) { Debugger.Break(); } } }
private void CreatePrefixPredictions() { for (var compoundPredictionIndex = 0; compoundPredictionIndex < _nascents.Count; compoundPredictionIndex++) { var compoundPrediction = _nascents[compoundPredictionIndex]._list; var length = 0; for (var position = 0; position < compoundPrediction.Count; position++) { var prediction = compoundPrediction[position]; while (length < prediction.Text.Length) { if (char.IsSurrogate(prediction.Text[length])) { length += 2; } else { length++; } if (length < prediction.Text.Length) { var candidate = prediction.Text.Substring(0, length); if (_tokens.TryGetToken(candidate, out var candidateToken)) { if (!_predictedTokens.Contains(candidateToken)) { var candidateIndex = _source.GetTokenIndex(candidateToken); if (_lowerBound <= candidateIndex && candidateIndex < _upperBound && _isTokenVisible(candidateToken)) { var candidateScore = _maker.GetScore(candidateToken); var candidateText = _tokens[candidateToken]; var candidatePrediction = CreatePrediction(candidateScore, _startOnFirstWord); var insertPosition = 0; while (insertPosition < compoundPrediction.Count && compoundPrediction[insertPosition].Index < candidatePrediction.Index) { insertPosition++; } compoundPrediction.Insert(insertPosition, candidatePrediction); _predictedTokens.Add(candidateToken); Debug.WriteLine($"TODO: Should add {candidate} ({candidateScore})"); } } else { Debug.WriteLine($"Already included found {candidate}"); } } } } } var bubblePosition = compoundPredictionIndex; while (0 < bubblePosition && _nascents[compoundPredictionIndex]._list[0].Index < _nascents[bubblePosition - 1]._list[0].Index) { bubblePosition--; } if (bubblePosition != compoundPredictionIndex) { var nascent = _nascents[compoundPredictionIndex]; _nascents.RemoveAt(compoundPredictionIndex); _nascents.Insert(bubblePosition, nascent); } } }