Beispiel #1
0
        /// <summary>
        /// Load utterances from environment.
        /// </summary>
        /// <returns>Task</returns>
        internal async Task LoadUtterancesAsync()
        {
            using (var reader = await Environment.RecallUtterancesAsync())
            {
                var lineNo = 0;
                for (var line = await reader.ReadLineAsync();
                     line != null;
                     line = await reader.ReadLineAsync())
                {
                    lineNo++;

                    var utteranceData = UtteranceData.FromLine(line);
                    var utterance     = utteranceData.Sequence;

                    Debug.Assert(utterance.Count != 0);

                    var sequence          = new List <int>(new[] { 0 });
                    var isFirstWordToCome = true;
                    var environment       = Model.Environment;
                    foreach (var tile in utterance)
                    {
                        var tokenString = tile.ToTokenString();
                        Debug.Assert(!string.IsNullOrWhiteSpace(tokenString));
                        if (isFirstWordToCome)
                        {
                            if (environment.TryCapitalizeFirstWord(tokenString) != null)
                            {
                                isFirstWordToCome = false;

                                var lowerString       = tokenString.ToLowerInvariant();
                                var capitalizedString = environment.TryCapitalizeFirstWord(lowerString);
                                if (capitalizedString == tokenString &&
                                    lowerString != capitalizedString &&
                                    _tokens.TryGetToken(lowerString, out var _))
                                {
                                    tokenString = lowerString;
                                }
                            }
                        }
                        var token = _tokens.GetToken(tokenString);
                        Debug.Assert(token != 0);
                        sequence.Add(token);
                    }
                    sequence.Add(0);

                    PersistantPredictor.AddSequence(sequence, PersistedSequenceWeight);
                }
            }

            PopulateVocabularyList();
            SetSuggestionsView(0, Count, false);
        }
        static void BreakOnProblemCase(StringTokens tokens, int[] context, params string[] words)
        {
            if (words.Length <= context.Length)
            {
                var contextLim = context.Length;
                var wordsLim   = words.Length;
                while (0 < wordsLim && tokens.TryGetToken(words[wordsLim - 1], out var token) && context[contextLim - 1] == token)
                {
                    wordsLim--;
                    contextLim--;
                }

                if (wordsLim == 0)
                {
                    Debugger.Break();
                }
            }
        }
        private void CreatePrefixPredictions()
        {
            for (var compoundPredictionIndex = 0; compoundPredictionIndex < _nascents.Count; compoundPredictionIndex++)
            {
                var compoundPrediction = _nascents[compoundPredictionIndex]._list;

                var length = 0;

                for (var position = 0; position < compoundPrediction.Count; position++)
                {
                    var prediction = compoundPrediction[position];
                    while (length < prediction.Text.Length)
                    {
                        if (char.IsSurrogate(prediction.Text[length]))
                        {
                            length += 2;
                        }
                        else
                        {
                            length++;
                        }

                        if (length < prediction.Text.Length)
                        {
                            var candidate = prediction.Text.Substring(0, length);
                            if (_tokens.TryGetToken(candidate, out var candidateToken))
                            {
                                if (!_predictedTokens.Contains(candidateToken))
                                {
                                    var candidateIndex = _source.GetTokenIndex(candidateToken);
                                    if (_lowerBound <= candidateIndex && candidateIndex < _upperBound &&
                                        _isTokenVisible(candidateToken))
                                    {
                                        var candidateScore = _maker.GetScore(candidateToken);
                                        var candidateText  = _tokens[candidateToken];

                                        var candidatePrediction = CreatePrediction(candidateScore, _startOnFirstWord);

                                        var insertPosition = 0;
                                        while (insertPosition < compoundPrediction.Count &&
                                               compoundPrediction[insertPosition].Index < candidatePrediction.Index)
                                        {
                                            insertPosition++;
                                        }
                                        compoundPrediction.Insert(insertPosition, candidatePrediction);

                                        _predictedTokens.Add(candidateToken);

                                        Debug.WriteLine($"TODO: Should add {candidate} ({candidateScore})");
                                    }
                                }
                                else
                                {
                                    Debug.WriteLine($"Already included found {candidate}");
                                }
                            }
                        }
                    }
                }

                var bubblePosition = compoundPredictionIndex;
                while (0 < bubblePosition &&
                       _nascents[compoundPredictionIndex]._list[0].Index < _nascents[bubblePosition - 1]._list[0].Index)
                {
                    bubblePosition--;
                }

                if (bubblePosition != compoundPredictionIndex)
                {
                    var nascent = _nascents[compoundPredictionIndex];
                    _nascents.RemoveAt(compoundPredictionIndex);

                    _nascents.Insert(bubblePosition, nascent);
                }
            }
        }