public void Learn(TPhrase phrase) { Logger.Info($"Learning phrase: '{phrase}'"); if (phrase == null || phrase.Equals(default(TPhrase))) { return; } // Ignore particularly short phrases if (SplitTokens(phrase).Count() < Level) { Logger.Info($"Phrase {phrase} too short - skipped"); return; } // Add it to the source lines so we can ignore it // when learning in future if (!SourcePhrases.Contains(phrase)) { Logger.Debug($"Adding phrase {phrase} to source lines"); SourcePhrases.Add(phrase); } // Split the sentence to an array of words var tokens = SplitTokens(phrase).ToArray(); LearnTokens(tokens); var lastCol = new List <TUnigram>(); for (var j = Level; j > 0; j--) { TUnigram previous; try { previous = tokens[tokens.Length - j]; Logger.Debug($"Adding TGram ({typeof(TUnigram)}) {previous} to lastCol"); lastCol.Add(previous); } catch (IndexOutOfRangeException e) { Logger.Warn($"Caught an exception: {e}"); previous = GetPrepadUnigram(); lastCol.Add(previous); } } Logger.Debug($"Reached final key for phrase {phrase}"); var finalKey = new NgramContainer <TUnigram>(lastCol.ToArray()); Chain.AddOrCreate(finalKey, GetTerminatorUnigram()); }
public void Learn(IEnumerable <TPhrase> phrases, bool ignoreAlreadyLearnt = true) { if (ignoreAlreadyLearnt) { var newTerms = phrases.Where(s => !SourcePhrases.Contains(s)); // For every sentence which hasnt already been learnt, learn it Parallel.ForEach(phrases, Learn); } else { // For every sentence, learn it Parallel.ForEach(phrases, Learn); } }
public void Learn(TPhrase phrase) { if (phrase == null || phrase.Equals(default(TPhrase))) { return; } // Ignore particularly short phrases if (SplitTokens(phrase).Count() < Level) { return; } // Add it to the source lines so we can ignore it // when learning in future if (!SourcePhrases.Contains(phrase)) { SourcePhrases.Add(phrase); } // Split the sentence to an array of words var tokens = SplitTokens(phrase).ToArray(); LearnTokens(tokens); var lastCol = new List <TUnigram>(); for (var j = Level; j > 0; j--) { TUnigram previous; try { previous = tokens[tokens.Length - j]; lastCol.Add(previous); } catch (IndexOutOfRangeException e) { previous = GetPrepadUnigram(); lastCol.Add(previous); } } var finalKey = new NgramContainer <TUnigram>(lastCol.ToArray()); Chain.AddOrCreate(finalKey, GetTerminatorUnigram()); }
public void Learn(IEnumerable <TPhrase> phrases, bool ignoreAlreadyLearnt = true) { if (ignoreAlreadyLearnt) { var newTerms = phrases.Where(s => !SourcePhrases.Contains(s)); Logger.Info(string.Format("Learning {0} lines", newTerms.Count())); // For every sentence which hasnt already been learnt, learn it Parallel.ForEach(phrases, Learn); } else { Logger.Info(string.Format("Learning {0} lines", phrases.Count())); // For every sentence, learn it Parallel.ForEach(phrases, Learn); } }
/// <summary> /// Generate a collection of phrase output data based on the current model /// </summary> /// <param name="lines">The number of phrases to emit</param> /// <param name="seed">Optionally provide the start of the phrase to generate from</param> /// <returns></returns> public IEnumerable <TPhrase> Walk(int lines = 1, TPhrase seed = default(TPhrase)) { if (seed == null) { seed = RebuildPhrase(new List <TUnigram>() { GetPrepadUnigram() }); } Logger.Info($"Walking to return {lines} phrases from {Chain.Count} states"); if (lines < 1) { throw new ArgumentException("Invalid argument - line count for walk must be a positive integer", nameof(lines)); } var sentences = new List <TPhrase>(); int genCount = 0; int created = 0; while (created < lines) { if (genCount == lines * 10) { Logger.Info($"Breaking out of walk early - {genCount} generations did not produce {lines} distinct lines ({sentences.Count} were created)"); break; } var result = WalkLine(seed); if ((!EnsureUniqueWalk || !SourcePhrases.Contains(result)) && (!EnsureUniqueWalk || !sentences.Contains(result))) { sentences.Add(result); created++; yield return(result); } genCount++; } }
/// <summary> /// Generate a collection of phrase output data based on the current model /// </summary> /// <param name="lines">The number of phrases to emit</param> /// <param name="seed">Optionally provide the start of the phrase to generate from</param> /// <returns></returns> public IEnumerable <TPhrase> Walk(int lines = 1, TPhrase seed = default(TPhrase)) { if (seed == null) { seed = RebuildPhrase(new List <TUnigram>() { GetPrepadUnigram() }); } if (lines < 1) { throw new ArgumentException("Invalid argument - line count for walk must be a positive integer", nameof(lines)); } var sentences = new List <TPhrase>(); int genCount = 0; int created = 0; while (created < lines) { if (genCount == lines * 10) { break; } var result = WalkLine(seed); if ((!EnsureUniqueWalk || !SourcePhrases.Contains(result)) && (!EnsureUniqueWalk || !sentences.Contains(result))) { sentences.Add(result); created++; yield return(result); } genCount++; } }
public void Learn(TPhrase phrase) { // Logger.Info($"Learning phrase: '{phrase}'"); if (phrase == null || phrase.Equals(default(TPhrase))) { return; } // Ignore particularly short phrases if (SplitTokens(phrase).Count() < Level) { // Logger.Info($"Phrase {phrase} too short - skipped"); return; } // Add it to the source lines so we can ignore it // when learning in future if (!SourcePhrases.Contains(phrase)) { // Logger.Debug($"Adding phrase {phrase} to source lines"); SourcePhrases.Add(phrase); } // Split the sentence to an array of words var tokens = SplitTokens(phrase).ToArray(); LearnTokens(tokens); var lastCol = new List <TUnigram>(); for (var j = Level; j > 0; j--) { TUnigram previous; try { previous = tokens[^ j];
/// <summary> /// Generate a collection of phrase output data based on the current model /// </summary> /// <param name="lines">The number of phrases to emit</param> /// <param name="seed">Optionally provide the start of the phrase to generate from</param> /// <returns></returns> public IEnumerable <TPhrase> Walk(int lines = 1, TPhrase seed = default(TPhrase), bool useLast = false) { if (seed == null) { seed = RebuildPhrase(new List <TUnigram> () { GetPrepadUnigram() }); } TPhrase originalSeed = seed; Logger.Info(string.Format("Walking to return {0} phrases from {1} states", lines, Chain.Count)); if (lines < 1) { throw new ArgumentException("Invalid argument - line count for walk must be a positive integer", "int lines"); } var sentences = new List <TPhrase> (); int genCount = 0; int created = 0; int lastTries = -1; IEnumerable <TUnigram> lastTokens = default(IEnumerable <TUnigram>); while (created < lines) { if (genCount == lines * 100) { UnityEngine.Debug.LogWarning(string.Format("Breaking out of walk early - {0} generations did not produce {1} distinct lines ({2} were created)", genCount, lines, sentences.Count)); break; } TPhrase result = WalkLine(seed); TUnigram[] tokens = SplitTokens(result).ToArray(); int numTokens = tokens.Count(); if (numTokens > 1 && (!EnsureUniqueWalk || !SourcePhrases.Contains(result)) && (!EnsureUniqueWalk || !sentences.Contains(result))) { sentences.Add(result); created++; yield return(result); if (useLast) { lastTries = numTokens; lastTokens = tokens; seed = RebuildPhrase(new TUnigram[] { lastTokens.ElementAt(--lastTries) }); //UnityEngine.Debug.LogFormat ( "Result: {0}, new seed = |{1}|", result, seed ); } } else if (useLast && seed.Equals(originalSeed) == false) { if (lastTries > 0) { seed = RebuildPhrase(new TUnigram[] { lastTokens.ElementAt(--lastTries) }); UnityEngine.Debug.LogFormat("'{0}' tokens = {1}", seed, lastTokens.ArrayToString()); } else { seed = originalSeed; UnityEngine.Debug.LogFormat("Reverting to original seed: '{0}'", seed); } } genCount++; } }