예제 #1
0
        // Returns any viable options for the next word based on
        // what was provided as input, based on the trained model.
        public List <TUnigram> GetMatches(TPhrase input)
        {
            var inputArray = SplitTokens(input).ToArray();

            if (inputArray.Count() > Level)
            {
                inputArray = inputArray.Skip(inputArray.Length - Level).ToArray();
            }
            else if (inputArray.Count() < Level)
            {
                inputArray = PadArrayLow(inputArray);
            }

            var key    = new NgramContainer <TUnigram>(inputArray);
            var chosen = new List <TUnigram>();

            try
            {
                chosen = Chain.GetValuesForKey(key);
            }
            catch (KeyNotFoundException)
            { }

            return(chosen);
        }
예제 #2
0
        public void Learn(TPhrase phrase)
        {
            Logger.Info($"Learning phrase: '{phrase}'");
            if (phrase == null || phrase.Equals(default(TPhrase)))
            {
                return;
            }

            // Ignore particularly short phrases
            if (SplitTokens(phrase).Count() < Level)
            {
                Logger.Info($"Phrase {phrase} too short - skipped");
                return;
            }

            // Add it to the source lines so we can ignore it
            // when learning in future
            if (!SourcePhrases.Contains(phrase))
            {
                Logger.Debug($"Adding phrase {phrase} to source lines");
                SourcePhrases.Add(phrase);
            }

            // Split the sentence to an array of words
            var tokens = SplitTokens(phrase).ToArray();

            LearnTokens(tokens);

            var lastCol = new List <TUnigram>();

            for (var j = Level; j > 0; j--)
            {
                TUnigram previous;
                try
                {
                    previous = tokens[tokens.Length - j];
                    Logger.Debug($"Adding TGram ({typeof(TUnigram)}) {previous} to lastCol");
                    lastCol.Add(previous);
                }
                catch (IndexOutOfRangeException e)
                {
                    Logger.Warn($"Caught an exception: {e}");
                    previous = GetPrepadUnigram();
                    lastCol.Add(previous);
                }
            }

            Logger.Debug($"Reached final key for phrase {phrase}");
            var finalKey = new NgramContainer <TUnigram>(lastCol.ToArray());

            Chain.AddOrCreate(finalKey, GetTerminatorUnigram());
        }
        public void Learn(TPhrase phrase)
        {
            if (phrase == null || phrase.Equals(default(TPhrase)))
            {
                return;
            }

            // Ignore particularly short phrases
            if (SplitTokens(phrase).Count() < Level)
            {
                return;
            }

            // Add it to the source lines so we can ignore it
            // when learning in future
            if (!SourcePhrases.Contains(phrase))
            {
                SourcePhrases.Add(phrase);
            }

            // Split the sentence to an array of words
            var tokens = SplitTokens(phrase).ToArray();

            LearnTokens(tokens);

            var lastCol = new List <TUnigram>();

            for (var j = Level; j > 0; j--)
            {
                TUnigram previous;
                try
                {
                    previous = tokens[tokens.Length - j];

                    lastCol.Add(previous);
                }
                catch (IndexOutOfRangeException e)
                {
                    previous = GetPrepadUnigram();
                    lastCol.Add(previous);
                }
            }


            var finalKey = new NgramContainer <TUnigram>(lastCol.ToArray());

            Chain.AddOrCreate(finalKey, GetTerminatorUnigram());
        }
예제 #4
0
        /// <summary>
        /// Generate a single phrase of output data based on the current model
        /// </summary>
        /// <param name="seed">Optionally provide the start of the phrase to generate from</param>
        /// <returns></returns>
        private TPhrase WalkLine(TPhrase seed)
        {
            IEnumerable <TUnigram> tokensSeed = SplitTokens(seed);
            var paddedSeed = PadArrayLow(tokensSeed != null ? tokensSeed.ToArray() : null);
            var built      = new List <TUnigram> ();

            // Allocate a queue to act as the memory, which is n
            // levels deep of previous words that were used
            var q = new Queue(paddedSeed);

            // If the start of the generated text has been seeded,
            // append that before generating the rest
            if (!seed.Equals(GetPrepadUnigram()))
            {
                built.AddRange(SplitTokens(seed));
            }

            while (built.Count < 1500)
            {
                // Choose a new token to add from the model
                var key = new NgramContainer <TUnigram> (q.Cast <TUnigram> ().ToArray());
                if (Chain.Contains(key))
                {
                    TUnigram chosen;

                    if (built.Count == 0)
                    {
                        chosen = new UnweightedRandomUnigramSelector <TUnigram> ().SelectUnigram(Chain.GetValuesForKey(key));
                    }
                    else
                    {
                        chosen = UnigramSelector.SelectUnigram(Chain.GetValuesForKey(key));
                    }

                    q.Dequeue();
                    q.Enqueue(chosen);
                    built.Add(chosen);
                }
                else
                {
                    break;
                }
            }

            return(RebuildPhrase(built));
        }
예제 #5
0
        /// <summary>
        /// Iterate over a list of TGrams and store each of them in the model at a composite key genreated from its prior [Level] number of TGrams
        /// </summary>
        /// <param name="tokens"></param>
        private void LearnTokens(IReadOnlyList <TUnigram> tokens)
        {
            for (var i = 0; i < tokens.Count; i++)
            {
                var current     = tokens[i];
                var previousCol = new List <TUnigram>();

                // From the current token's index, get hold of the previous [Level] number of tokens that came before it
                for (var j = Level; j > 0; j--)
                {
                    TUnigram previous;
                    try
                    {
                        // this case addresses when we are at a token index less then the value of [Level],
                        // and we effectively would be looking at tokens before the beginning phrase
                        if (i - j < 0)
                        {
                            previousCol.Add(GetPrepadUnigram());
                        }
                        else
                        {
                            previous = tokens[i - j];
                            previousCol.Add(previous);
                        }
                    }
                    catch (IndexOutOfRangeException)
                    {
                        previous = GetPrepadUnigram();
                        previousCol.Add(previous);
                    }
                }

                // create the composite key based on previous tokens
                var key = new NgramContainer <TUnigram>(previousCol.ToArray());

                // add the current token to the markov model at the composite key
                Chain.AddOrCreate(key, current);
            }
        }