public string[] GetUnUsedLines()
        {
            if (currentIncrement < 0)
            {
                return(SourceLines);
            }

            return(SourceLines.Skip(currentIncrement + 1).ToArray());
        }
        public void Learn(TPhrase phrase)
        {
            //Logger.Info($"Learning phrase: '{phrase}'");
            if (phrase == null || phrase.Equals(default(TPhrase)))
            {
                return;
            }

            // Ignore particularly short sentences
            if (SplitTokens(phrase).Count() < Level)
            {
                //Logger.Info($"Phrase {phrase} too short - skipped");
                return;
            }

            // Add it to the source lines so we can ignore it
            // when learning in future
            if (!SourceLines.Contains(phrase))
            {
                //Logger.Debug($"Adding phrase {phrase} to source lines");
                SourceLines.Add(phrase);
            }

            // Split the sentence to an array of words
            var tokens = SplitTokens(phrase).ToArray();

            LearnTokens(tokens);

            var lastCol = new List <TGram>();

            for (var j = Level; j > 0; j--)
            {
                TGram previous;
                try
                {
                    previous = tokens[tokens.Length - j];
                    //Logger.Debug($"Adding TGram ({typeof(TGram)}) {previous} to lastCol");
                    lastCol.Add(previous);
                }
                catch (IndexOutOfRangeException e)
                {
                    //Logger.Warn($"Caught an exception: {e}");
                    previous = GetPrepadGram();
                    lastCol.Add(previous);
                }
            }

            //Logger.Debug($"Reached final key for phrase {phrase}");
            var finalKey = new SourceGrams <TGram>(lastCol.ToArray());

            AddOrCreate(finalKey, GetTerminatorGram());
        }
Exemple #3
0
 public void Learn(IEnumerable <TPhrase> phrases, bool ignoreAlreadyLearnt = true)
 {
     if (ignoreAlreadyLearnt)
     {
         var newTerms = phrases.Where(s => !SourceLines.Contains(s));
         // For every sentence which hasnt already been learnt, learn it
         Parallel.ForEach(phrases, Learn);
     }
     else
     {
         // For every sentence, learn it
         Parallel.ForEach(phrases, Learn);
     }
 }
Exemple #4
0
        public void Learn(TPhrase phrase)
        {
            if (phrase == null || phrase.Equals(default(TPhrase)))
            {
                return;
            }

            // Ignore particularly short sentences
            if (SplitTokens(phrase).Count() < Level)
            {
                return;
            }

            // Add it to the source lines so we can ignore it
            // when learning in future
            if (!SourceLines.Contains(phrase))
            {
                SourceLines.Add(phrase);
            }

            // Split the sentence to an array of words
            var tokens = SplitTokens(phrase).ToArray();

            LearnTokens(tokens);

            var lastCol = new List <TGram>();

            for (var j = Level; j > 0; j--)
            {
                TGram previous;
                try
                {
                    previous = tokens[tokens.Length - j];
                    lastCol.Add(previous);
                }
                catch (IndexOutOfRangeException)
                {
                    previous = GetPrepadGram();
                    lastCol.Add(previous);
                }
            }

            var finalKey = new SourceGrams <TGram>(lastCol.ToArray());

            AddOrCreate(finalKey, GetTerminatorGram());
        }
        /// <summary>
        /// Generate a collection of phrase output data based on the current model
        /// </summary>
        /// <param name="lines">The number of phrases to emit</param>
        /// <param name="seed">Optionally provide the start of the phrase to generate from</param>
        /// <returns></returns>
        public IEnumerable <TPhrase> Walk(int lines = 1, TPhrase seed = default(TPhrase))
        {
            if (seed == null)
            {
                seed = RebuildPhrase(new List <TGram>()
                {
                    GetPrepadGram()
                });
            }

            //Logger.Info($"Walking to return {lines} phrases from {Model.Count} states");
            if (lines < 1)
            {
                throw new ArgumentException("Invalid argument - line count for walk must be a positive integer", nameof(lines));
            }

            var sentences = new List <TPhrase>();

            //for (var z = 0; z < lines; z++)k
            int genCount = 0;
            int created  = 0;

            while (created < lines)
            {
                if (genCount == lines * 10)
                {
                    //Logger.Info($"Breaking out of walk early - {genCount} generations did not produce {lines} distinct lines ({sentences.Count} were created)");
                    break;
                }
                var result = WalkLine(seed);
                if ((!EnsureUniqueWalk || !SourceLines.Contains(result)) && (!EnsureUniqueWalk || !sentences.Contains(result)))
                {
                    sentences.Add(result);
                    created++;
                    yield return(result);
                }
                genCount++;
            }
        }
Exemple #6
0
        public IEnumerable <TPhrase> Walk(int lines = 1, TPhrase seed = default(TPhrase))
        {
            if (seed == null)
            {
                seed = RebuildPhrase(new List <TGram>()
                {
                    GetPrepadGram()
                });
            }

            if (lines < 1)
            {
                throw new ArgumentException("Invalid argument - line count for walk must be a positive integer",
                                            nameof(lines));
            }

            var sentences = new List <TPhrase>();

            //for (var z = 0; z < lines; z++)k
            int genCount = 0;
            int created  = 0;

            while (created < lines)
            {
                if (genCount == lines * 10)
                {
                    break;
                }
                var result = WalkLine(seed);
                if ((!EnsureUniqueWalk || !SourceLines.Contains(result)) &&
                    (!EnsureUniqueWalk || !sentences.Contains(result)))
                {
                    sentences.Add(result);
                    created++;
                    yield return(result);
                }
                genCount++;
            }
        }