Exemple #1
0
        public void Learn(TPhrase phrase)
        {
            Logger.Info($"Learning phrase: '{phrase}'");
            if (phrase == null || phrase.Equals(default(TPhrase)))
            {
                return;
            }

            // Ignore particularly short phrases
            if (SplitTokens(phrase).Count() < Level)
            {
                Logger.Info($"Phrase {phrase} too short - skipped");
                return;
            }

            // Add it to the source lines so we can ignore it
            // when learning in future
            if (!SourcePhrases.Contains(phrase))
            {
                Logger.Debug($"Adding phrase {phrase} to source lines");
                SourcePhrases.Add(phrase);
            }

            // Split the sentence to an array of words
            var tokens = SplitTokens(phrase).ToArray();

            LearnTokens(tokens);

            var lastCol = new List <TUnigram>();

            for (var j = Level; j > 0; j--)
            {
                TUnigram previous;
                try
                {
                    previous = tokens[tokens.Length - j];
                    Logger.Debug($"Adding TGram ({typeof(TUnigram)}) {previous} to lastCol");
                    lastCol.Add(previous);
                }
                catch (IndexOutOfRangeException e)
                {
                    Logger.Warn($"Caught an exception: {e}");
                    previous = GetPrepadUnigram();
                    lastCol.Add(previous);
                }
            }

            Logger.Debug($"Reached final key for phrase {phrase}");
            var finalKey = new NgramContainer <TUnigram>(lastCol.ToArray());

            Chain.AddOrCreate(finalKey, GetTerminatorUnigram());
        }
        public void Learn(TPhrase phrase)
        {
            if (phrase == null || phrase.Equals(default(TPhrase)))
            {
                return;
            }

            // Ignore particularly short phrases
            if (SplitTokens(phrase).Count() < Level)
            {
                return;
            }

            // Add it to the source lines so we can ignore it
            // when learning in future
            if (!SourcePhrases.Contains(phrase))
            {
                SourcePhrases.Add(phrase);
            }

            // Split the sentence to an array of words
            var tokens = SplitTokens(phrase).ToArray();

            LearnTokens(tokens);

            var lastCol = new List <TUnigram>();

            for (var j = Level; j > 0; j--)
            {
                TUnigram previous;
                try
                {
                    previous = tokens[tokens.Length - j];

                    lastCol.Add(previous);
                }
                catch (IndexOutOfRangeException e)
                {
                    previous = GetPrepadUnigram();
                    lastCol.Add(previous);
                }
            }


            var finalKey = new NgramContainer <TUnigram>(lastCol.ToArray());

            Chain.AddOrCreate(finalKey, GetTerminatorUnigram());
        }
Exemple #3
0
        /// <summary>
        /// Iterate over a list of TGrams and store each of them in the model at a composite key genreated from its prior [Level] number of TGrams
        /// </summary>
        /// <param name="tokens"></param>
        private void LearnTokens(IReadOnlyList <TUnigram> tokens)
        {
            for (var i = 0; i < tokens.Count; i++)
            {
                var current     = tokens[i];
                var previousCol = new List <TUnigram>();

                // From the current token's index, get hold of the previous [Level] number of tokens that came before it
                for (var j = Level; j > 0; j--)
                {
                    TUnigram previous;
                    try
                    {
                        // this case addresses when we are at a token index less then the value of [Level],
                        // and we effectively would be looking at tokens before the beginning phrase
                        if (i - j < 0)
                        {
                            previousCol.Add(GetPrepadUnigram());
                        }
                        else
                        {
                            previous = tokens[i - j];
                            previousCol.Add(previous);
                        }
                    }
                    catch (IndexOutOfRangeException)
                    {
                        previous = GetPrepadUnigram();
                        previousCol.Add(previous);
                    }
                }

                // create the composite key based on previous tokens
                var key = new NgramContainer <TUnigram>(previousCol.ToArray());

                // add the current token to the markov model at the composite key
                Chain.AddOrCreate(key, current);
            }
        }