Ejemplo n.º 1
0
        private void LearnTokens(IReadOnlyList <TGram> tokens)
        {
            for (var i = 0; i < tokens.Count; i++)
            {
                var current = tokens[i];

                var previousCol = new List <TGram>();
                for (var j = Level; j > 0; j--)
                {
                    TGram previous;
                    try
                    {
                        if (i - j < 0)
                        {
                            previousCol.Add(GetPrepadGram());
                        }
                        else
                        {
                            previous = tokens[i - j];
                            previousCol.Add(previous);
                        }
                    }
                    catch (IndexOutOfRangeException)
                    {
                        previous = GetPrepadGram();
                        previousCol.Add(previous);
                    }
                }

                var key = new SourceGrams <TGram>(previousCol.ToArray());
                AddOrCreate(key, current);
            }
        }
Ejemplo n.º 2
0
        public void Learn(TPhrase phrase)
        {
            //Logger.Info($"Learning phrase: '{phrase}'");
            if (phrase == null || phrase.Equals(default(TPhrase)))
            {
                return;
            }

            // Ignore particularly short sentences
            if (SplitTokens(phrase).Count() < Level)
            {
                //Logger.Info($"Phrase {phrase} too short - skipped");
                return;
            }

            // Add it to the source lines so we can ignore it
            // when learning in future
            if (!SourceLines.Contains(phrase))
            {
                //Logger.Debug($"Adding phrase {phrase} to source lines");
                SourceLines.Add(phrase);
            }

            // Split the sentence to an array of words
            var tokens = SplitTokens(phrase).ToArray();

            LearnTokens(tokens);

            var lastCol = new List <TGram>();

            for (var j = Level; j > 0; j--)
            {
                TGram previous;
                try
                {
                    previous = tokens[tokens.Length - j];
                    //Logger.Debug($"Adding TGram ({typeof(TGram)}) {previous} to lastCol");
                    lastCol.Add(previous);
                }
                catch (IndexOutOfRangeException e)
                {
                    //Logger.Warn($"Caught an exception: {e}");
                    previous = GetPrepadGram();
                    lastCol.Add(previous);
                }
            }

            //Logger.Debug($"Reached final key for phrase {phrase}");
            var finalKey = new SourceGrams <TGram>(lastCol.ToArray());

            AddOrCreate(finalKey, GetTerminatorGram());
        }
Ejemplo n.º 3
0
 /// <summary>
 /// Add a TGram to the markov models store with a composite key of the previous [Level] number of TGrams
 /// </summary>
 /// <param name="key">The composite key under which to add the TGram value</param>
 /// <param name="value">The value to add to the store</param>
 private void AddOrCreate(SourceGrams <TGram> key, TGram value)
 {
     lock (lockObj)
     {
         if (!Model.ContainsKey(key))
         {
             Model.TryAdd(key, new List <TGram> {
                 value
             });
         }
         else
         {
             Model[key].Add(value);
         }
     }
 }
Ejemplo n.º 4
0
        public void Learn(TPhrase phrase)
        {
            if (phrase == null || phrase.Equals(default(TPhrase)))
            {
                return;
            }

            // Ignore particularly short sentences
            if (SplitTokens(phrase).Count() < Level)
            {
                return;
            }

            // Add it to the source lines so we can ignore it
            // when learning in future
            if (!SourceLines.Contains(phrase))
            {
                SourceLines.Add(phrase);
            }

            // Split the sentence to an array of words
            var tokens = SplitTokens(phrase).ToArray();

            LearnTokens(tokens);

            var lastCol = new List <TGram>();

            for (var j = Level; j > 0; j--)
            {
                TGram previous;
                try
                {
                    previous = tokens[tokens.Length - j];
                    lastCol.Add(previous);
                }
                catch (IndexOutOfRangeException)
                {
                    previous = GetPrepadGram();
                    lastCol.Add(previous);
                }
            }

            var finalKey = new SourceGrams <TGram>(lastCol.ToArray());

            AddOrCreate(finalKey, GetTerminatorGram());
        }
Ejemplo n.º 5
0
        // Returns any viable options for the next word based on
        // what was provided as input, based on the trained model.
        public List <TGram> GetMatches(TPhrase input)
        {
            var inputArray = SplitTokens(input).ToArray();

            if (inputArray.Count() > Level)
            {
                inputArray = inputArray.Skip(inputArray.Length - Level).ToArray();
            }
            else if (inputArray.Count() < Level)
            {
                inputArray = PadArrayLow(inputArray);
            }

            var key    = new SourceGrams <TGram>(inputArray);
            var chosen = Model[key];

            return(chosen);
        }
Ejemplo n.º 6
0
        /// <summary>
        /// Iterate over a list of TGrams and store each of them in the model at a composite key genreated from its prior [Level] number of TGrams
        /// </summary>
        /// <param name="tokens"></param>
        private void LearnTokens(IReadOnlyList <TGram> tokens)
        {
            for (var i = 0; i < tokens.Count; i++)
            {
                var current     = tokens[i];
                var previousCol = new List <TGram>();

                // From the current token's index, get hold of the previous [Level] number of tokens that came before it
                for (var j = Level; j > 0; j--)
                {
                    TGram previous;
                    try
                    {
                        // this case addresses when we are at a token index less then the value of [Level],
                        // and we effectively would be looking at tokens before the beginning phrase
                        if (i - j < 0)
                        {
                            previousCol.Add(GetPrepadGram());
                        }
                        else
                        {
                            previous = tokens[i - j];
                            previousCol.Add(previous);
                        }
                    }
                    catch (IndexOutOfRangeException)
                    {
                        previous = GetPrepadGram();
                        previousCol.Add(previous);
                    }
                }

                // create the composite key based on previous tokens
                var key = new SourceGrams <TGram>(previousCol.ToArray());

                // add the current token to the markov model at the composite key
                AddOrCreate(key, current);
            }
        }
Ejemplo n.º 7
0
        private TPhrase WalkLine(TPhrase seed)
        {
            var          arraySeed = PadArrayLow(SplitTokens(seed)?.ToArray());
            List <TGram> built     = new List <TGram>();

            // Allocate a queue to act as the memory, which is n
            // levels deep of previous words that were used
            var q = new Queue(arraySeed);

            // If the start of the generated text has been seeded,
            // append that before generating the rest
            if (!seed.Equals(GetPrepadGram()))
            {
                built.AddRange(SplitTokens(seed));
            }

            while (built.Count < 1500)
            {
                // Choose a new word to add from the model
                //Logger.Info($"In Walkline loop: builtcount = {built.Count}");
                var key = new SourceGrams <TGram>(q.Cast <TGram>().ToArray());
                if (Model.ContainsKey(key))
                {
                    //var chosen = Model[key].OrderBy(x => Guid.NewGuid()).First(); This is soo bad
                    var list   = Model[key];
                    var chosen = list[RandomGenerator.Next(list.Count)];

                    q.Dequeue();
                    q.Enqueue(chosen);
                    built.Add(chosen);
                }
                else
                {
                    break;
                }
            }

            return(RebuildPhrase(built));
        }