示例#1
0
        /// <summary>
        /// Load utterances from environment.
        /// </summary>
        /// <returns>Task</returns>
        internal async Task LoadUtterancesAsync()
        {
            using (var reader = await Environment.RecallUtterancesAsync())
            {
                var lineNo = 0;
                for (var line = await reader.ReadLineAsync();
                     line != null;
                     line = await reader.ReadLineAsync())
                {
                    lineNo++;

                    var utteranceData = UtteranceData.FromLine(line);
                    var utterance     = utteranceData.Sequence;

                    Debug.Assert(utterance.Count != 0);

                    var sequence          = new List <int>(new[] { 0 });
                    var isFirstWordToCome = true;
                    var environment       = Model.Environment;
                    foreach (var tile in utterance)
                    {
                        var tokenString = tile.ToTokenString();
                        Debug.Assert(!string.IsNullOrWhiteSpace(tokenString));
                        if (isFirstWordToCome)
                        {
                            if (environment.TryCapitalizeFirstWord(tokenString) != null)
                            {
                                isFirstWordToCome = false;

                                var lowerString       = tokenString.ToLowerInvariant();
                                var capitalizedString = environment.TryCapitalizeFirstWord(lowerString);
                                if (capitalizedString == tokenString &&
                                    lowerString != capitalizedString &&
                                    _tokens.TryGetToken(lowerString, out var _))
                                {
                                    tokenString = lowerString;
                                }
                            }
                        }
                        var token = _tokens.GetToken(tokenString);
                        Debug.Assert(token != 0);
                        sequence.Add(token);
                    }
                    sequence.Add(0);

                    PersistantPredictor.AddSequence(sequence, PersistedSequenceWeight);
                }
            }

            PopulateVocabularyList();
            SetSuggestionsView(0, Count, false);
        }
        internal SpellingVocabularySource(ApplicationModel model, WordVocabularySource wordVocabularySource)
            : base(model: model, predictorWidth: 4)
        {
            _wordVocabularySource = wordVocabularySource;

            _characterSet = new SortedSet <string>(Environment);

            foreach (var word in _wordVocabularySource.Words)
            {
                var actualLength    = word.Length;
                var effectiveLength = 0;
                while (effectiveLength < actualLength && word[effectiveLength] != 0)
                {
                    effectiveLength++;
                }
                if (effectiveLength != 0)
                {
                    var sequence = new int[effectiveLength + 2];

                    var index = 1;
                    for (var i = 0; i < effectiveLength; i++)
                    {
                        sequence[index] = word[i];
                        index++;
                    }
                    Debug.Assert(sequence[0] == 0);
                    Debug.Assert(sequence[effectiveLength + 2 - 1] == 0);

                    PersistantPredictor.AddSequence(sequence, WordVocabularySource.SeedSequenceWeight);
                }
            }

            foreach (var symbol in model.Environment.GetAdditionalSymbols())
            {
                var sequence = new int[] { symbol };
                PersistantPredictor.AddSequence(sequence, WordVocabularySource.SeedSequenceWeight);
            }

            _unicodeVocabularySource = new UnicodeVocabularySource(model, this);

            PopulateVocabularyList();
        }
        internal void AddNewWord(string word)
        {
            var needToRepopuplate = false;

            var sequence = new List <int>(word.Length + 2)
            {
                0
            };

            for (var index = 0; index < word.Length;)
            {
                int utf32 = char.ConvertToUtf32(word, index);
                if (char.GetUnicodeCategory(word, index) != UnicodeCategory.Surrogate)
                {
                    index++;
                }
                else
                {
                    index += 2;
                }

                sequence.Add(utf32);
                if (_characterSet.Add(char.ConvertFromUtf32(utf32)))
                {
                    needToRepopuplate = true;
                }
            }
            sequence.Add(0);

            PersistantPredictor.AddSequence(sequence, WordVocabularySource.SeedSequenceWeight);

            if (needToRepopuplate)
            {
                PopulateVocabularyList();
            }
        }
示例#4
0
 internal void RollbackAndAddSequence(IReadOnlyList <int> sequence, int increment)
 {
     PersistantPredictor.Subtract(DeltaPredictor);
     PersistantPredictor.AddSequence(sequence, increment);
 }