private void LearnTokens(IReadOnlyList <TGram> tokens) { for (var i = 0; i < tokens.Count; i++) { var current = tokens[i]; var previousCol = new List <TGram>(); for (var j = Level; j > 0; j--) { TGram previous; try { if (i - j < 0) { previousCol.Add(GetPrepadGram()); } else { previous = tokens[i - j]; previousCol.Add(previous); } } catch (IndexOutOfRangeException) { previous = GetPrepadGram(); previousCol.Add(previous); } } var key = new SourceGrams <TGram>(previousCol.ToArray()); AddOrCreate(key, current); } }
public void Learn(TPhrase phrase) { //Logger.Info($"Learning phrase: '{phrase}'"); if (phrase == null || phrase.Equals(default(TPhrase))) { return; } // Ignore particularly short sentences if (SplitTokens(phrase).Count() < Level) { //Logger.Info($"Phrase {phrase} too short - skipped"); return; } // Add it to the source lines so we can ignore it // when learning in future if (!SourceLines.Contains(phrase)) { //Logger.Debug($"Adding phrase {phrase} to source lines"); SourceLines.Add(phrase); } // Split the sentence to an array of words var tokens = SplitTokens(phrase).ToArray(); LearnTokens(tokens); var lastCol = new List <TGram>(); for (var j = Level; j > 0; j--) { TGram previous; try { previous = tokens[tokens.Length - j]; //Logger.Debug($"Adding TGram ({typeof(TGram)}) {previous} to lastCol"); lastCol.Add(previous); } catch (IndexOutOfRangeException e) { //Logger.Warn($"Caught an exception: {e}"); previous = GetPrepadGram(); lastCol.Add(previous); } } //Logger.Debug($"Reached final key for phrase {phrase}"); var finalKey = new SourceGrams <TGram>(lastCol.ToArray()); AddOrCreate(finalKey, GetTerminatorGram()); }
/// <summary> /// Add a TGram to the markov models store with a composite key of the previous [Level] number of TGrams /// </summary> /// <param name="key">The composite key under which to add the TGram value</param> /// <param name="value">The value to add to the store</param> private void AddOrCreate(SourceGrams <TGram> key, TGram value) { lock (lockObj) { if (!Model.ContainsKey(key)) { Model.TryAdd(key, new List <TGram> { value }); } else { Model[key].Add(value); } } }
public void Learn(TPhrase phrase) { if (phrase == null || phrase.Equals(default(TPhrase))) { return; } // Ignore particularly short sentences if (SplitTokens(phrase).Count() < Level) { return; } // Add it to the source lines so we can ignore it // when learning in future if (!SourceLines.Contains(phrase)) { SourceLines.Add(phrase); } // Split the sentence to an array of words var tokens = SplitTokens(phrase).ToArray(); LearnTokens(tokens); var lastCol = new List <TGram>(); for (var j = Level; j > 0; j--) { TGram previous; try { previous = tokens[tokens.Length - j]; lastCol.Add(previous); } catch (IndexOutOfRangeException) { previous = GetPrepadGram(); lastCol.Add(previous); } } var finalKey = new SourceGrams <TGram>(lastCol.ToArray()); AddOrCreate(finalKey, GetTerminatorGram()); }
// Returns any viable options for the next word based on // what was provided as input, based on the trained model. public List <TGram> GetMatches(TPhrase input) { var inputArray = SplitTokens(input).ToArray(); if (inputArray.Count() > Level) { inputArray = inputArray.Skip(inputArray.Length - Level).ToArray(); } else if (inputArray.Count() < Level) { inputArray = PadArrayLow(inputArray); } var key = new SourceGrams <TGram>(inputArray); var chosen = Model[key]; return(chosen); }
/// <summary> /// Iterate over a list of TGrams and store each of them in the model at a composite key genreated from its prior [Level] number of TGrams /// </summary> /// <param name="tokens"></param> private void LearnTokens(IReadOnlyList <TGram> tokens) { for (var i = 0; i < tokens.Count; i++) { var current = tokens[i]; var previousCol = new List <TGram>(); // From the current token's index, get hold of the previous [Level] number of tokens that came before it for (var j = Level; j > 0; j--) { TGram previous; try { // this case addresses when we are at a token index less then the value of [Level], // and we effectively would be looking at tokens before the beginning phrase if (i - j < 0) { previousCol.Add(GetPrepadGram()); } else { previous = tokens[i - j]; previousCol.Add(previous); } } catch (IndexOutOfRangeException) { previous = GetPrepadGram(); previousCol.Add(previous); } } // create the composite key based on previous tokens var key = new SourceGrams <TGram>(previousCol.ToArray()); // add the current token to the markov model at the composite key AddOrCreate(key, current); } }
private TPhrase WalkLine(TPhrase seed) { var arraySeed = PadArrayLow(SplitTokens(seed)?.ToArray()); List <TGram> built = new List <TGram>(); // Allocate a queue to act as the memory, which is n // levels deep of previous words that were used var q = new Queue(arraySeed); // If the start of the generated text has been seeded, // append that before generating the rest if (!seed.Equals(GetPrepadGram())) { built.AddRange(SplitTokens(seed)); } while (built.Count < 1500) { // Choose a new word to add from the model //Logger.Info($"In Walkline loop: builtcount = {built.Count}"); var key = new SourceGrams <TGram>(q.Cast <TGram>().ToArray()); if (Model.ContainsKey(key)) { //var chosen = Model[key].OrderBy(x => Guid.NewGuid()).First(); This is soo bad var list = Model[key]; var chosen = list[RandomGenerator.Next(list.Count)]; q.Dequeue(); q.Enqueue(chosen); built.Add(chosen); } else { break; } } return(RebuildPhrase(built)); }