public string[] GetUnUsedLines() { if (currentIncrement < 0) { return(SourceLines); } return(SourceLines.Skip(currentIncrement + 1).ToArray()); }
public void Learn(TPhrase phrase) { //Logger.Info($"Learning phrase: '{phrase}'"); if (phrase == null || phrase.Equals(default(TPhrase))) { return; } // Ignore particularly short sentences if (SplitTokens(phrase).Count() < Level) { //Logger.Info($"Phrase {phrase} too short - skipped"); return; } // Add it to the source lines so we can ignore it // when learning in future if (!SourceLines.Contains(phrase)) { //Logger.Debug($"Adding phrase {phrase} to source lines"); SourceLines.Add(phrase); } // Split the sentence to an array of words var tokens = SplitTokens(phrase).ToArray(); LearnTokens(tokens); var lastCol = new List <TGram>(); for (var j = Level; j > 0; j--) { TGram previous; try { previous = tokens[tokens.Length - j]; //Logger.Debug($"Adding TGram ({typeof(TGram)}) {previous} to lastCol"); lastCol.Add(previous); } catch (IndexOutOfRangeException e) { //Logger.Warn($"Caught an exception: {e}"); previous = GetPrepadGram(); lastCol.Add(previous); } } //Logger.Debug($"Reached final key for phrase {phrase}"); var finalKey = new SourceGrams <TGram>(lastCol.ToArray()); AddOrCreate(finalKey, GetTerminatorGram()); }
public void Learn(IEnumerable <TPhrase> phrases, bool ignoreAlreadyLearnt = true) { if (ignoreAlreadyLearnt) { var newTerms = phrases.Where(s => !SourceLines.Contains(s)); // For every sentence which hasnt already been learnt, learn it Parallel.ForEach(phrases, Learn); } else { // For every sentence, learn it Parallel.ForEach(phrases, Learn); } }
public void Learn(TPhrase phrase) { if (phrase == null || phrase.Equals(default(TPhrase))) { return; } // Ignore particularly short sentences if (SplitTokens(phrase).Count() < Level) { return; } // Add it to the source lines so we can ignore it // when learning in future if (!SourceLines.Contains(phrase)) { SourceLines.Add(phrase); } // Split the sentence to an array of words var tokens = SplitTokens(phrase).ToArray(); LearnTokens(tokens); var lastCol = new List <TGram>(); for (var j = Level; j > 0; j--) { TGram previous; try { previous = tokens[tokens.Length - j]; lastCol.Add(previous); } catch (IndexOutOfRangeException) { previous = GetPrepadGram(); lastCol.Add(previous); } } var finalKey = new SourceGrams <TGram>(lastCol.ToArray()); AddOrCreate(finalKey, GetTerminatorGram()); }
/// <summary> /// Generate a collection of phrase output data based on the current model /// </summary> /// <param name="lines">The number of phrases to emit</param> /// <param name="seed">Optionally provide the start of the phrase to generate from</param> /// <returns></returns> public IEnumerable <TPhrase> Walk(int lines = 1, TPhrase seed = default(TPhrase)) { if (seed == null) { seed = RebuildPhrase(new List <TGram>() { GetPrepadGram() }); } //Logger.Info($"Walking to return {lines} phrases from {Model.Count} states"); if (lines < 1) { throw new ArgumentException("Invalid argument - line count for walk must be a positive integer", nameof(lines)); } var sentences = new List <TPhrase>(); //for (var z = 0; z < lines; z++)k int genCount = 0; int created = 0; while (created < lines) { if (genCount == lines * 10) { //Logger.Info($"Breaking out of walk early - {genCount} generations did not produce {lines} distinct lines ({sentences.Count} were created)"); break; } var result = WalkLine(seed); if ((!EnsureUniqueWalk || !SourceLines.Contains(result)) && (!EnsureUniqueWalk || !sentences.Contains(result))) { sentences.Add(result); created++; yield return(result); } genCount++; } }
public IEnumerable <TPhrase> Walk(int lines = 1, TPhrase seed = default(TPhrase)) { if (seed == null) { seed = RebuildPhrase(new List <TGram>() { GetPrepadGram() }); } if (lines < 1) { throw new ArgumentException("Invalid argument - line count for walk must be a positive integer", nameof(lines)); } var sentences = new List <TPhrase>(); //for (var z = 0; z < lines; z++)k int genCount = 0; int created = 0; while (created < lines) { if (genCount == lines * 10) { break; } var result = WalkLine(seed); if ((!EnsureUniqueWalk || !SourceLines.Contains(result)) && (!EnsureUniqueWalk || !sentences.Contains(result))) { sentences.Add(result); created++; yield return(result); } genCount++; } }