Exemple #1
0
        public void Learn(TPhrase phrase)
        {
            Logger.Info($"Learning phrase: '{phrase}'");
            if (phrase == null || phrase.Equals(default(TPhrase)))
            {
                return;
            }

            // Ignore particularly short phrases
            if (SplitTokens(phrase).Count() < Level)
            {
                Logger.Info($"Phrase {phrase} too short - skipped");
                return;
            }

            // Add it to the source lines so we can ignore it
            // when learning in future
            if (!SourcePhrases.Contains(phrase))
            {
                Logger.Debug($"Adding phrase {phrase} to source lines");
                SourcePhrases.Add(phrase);
            }

            // Split the sentence to an array of words
            var tokens = SplitTokens(phrase).ToArray();

            LearnTokens(tokens);

            var lastCol = new List <TUnigram>();

            for (var j = Level; j > 0; j--)
            {
                TUnigram previous;
                try
                {
                    previous = tokens[tokens.Length - j];
                    Logger.Debug($"Adding TGram ({typeof(TUnigram)}) {previous} to lastCol");
                    lastCol.Add(previous);
                }
                catch (IndexOutOfRangeException e)
                {
                    Logger.Warn($"Caught an exception: {e}");
                    previous = GetPrepadUnigram();
                    lastCol.Add(previous);
                }
            }

            Logger.Debug($"Reached final key for phrase {phrase}");
            var finalKey = new NgramContainer <TUnigram>(lastCol.ToArray());

            Chain.AddOrCreate(finalKey, GetTerminatorUnigram());
        }
Exemple #2
0
 public void Learn(IEnumerable <TPhrase> phrases, bool ignoreAlreadyLearnt = true)
 {
     if (ignoreAlreadyLearnt)
     {
         var newTerms = phrases.Where(s => !SourcePhrases.Contains(s));
         // For every sentence which hasnt already been learnt, learn it
         Parallel.ForEach(phrases, Learn);
     }
     else
     {
         // For every sentence, learn it
         Parallel.ForEach(phrases, Learn);
     }
 }
        public void Learn(TPhrase phrase)
        {
            if (phrase == null || phrase.Equals(default(TPhrase)))
            {
                return;
            }

            // Ignore particularly short phrases
            if (SplitTokens(phrase).Count() < Level)
            {
                return;
            }

            // Add it to the source lines so we can ignore it
            // when learning in future
            if (!SourcePhrases.Contains(phrase))
            {
                SourcePhrases.Add(phrase);
            }

            // Split the sentence to an array of words
            var tokens = SplitTokens(phrase).ToArray();

            LearnTokens(tokens);

            var lastCol = new List <TUnigram>();

            for (var j = Level; j > 0; j--)
            {
                TUnigram previous;
                try
                {
                    previous = tokens[tokens.Length - j];

                    lastCol.Add(previous);
                }
                catch (IndexOutOfRangeException e)
                {
                    previous = GetPrepadUnigram();
                    lastCol.Add(previous);
                }
            }


            var finalKey = new NgramContainer <TUnigram>(lastCol.ToArray());

            Chain.AddOrCreate(finalKey, GetTerminatorUnigram());
        }
Exemple #4
0
        public void Learn(IEnumerable <TPhrase> phrases, bool ignoreAlreadyLearnt = true)
        {
            if (ignoreAlreadyLearnt)
            {
                var newTerms = phrases.Where(s => !SourcePhrases.Contains(s));

                Logger.Info(string.Format("Learning {0} lines", newTerms.Count()));
                // For every sentence which hasnt already been learnt, learn it
                Parallel.ForEach(phrases, Learn);
            }
            else
            {
                Logger.Info(string.Format("Learning {0} lines", phrases.Count()));
                // For every sentence, learn it
                Parallel.ForEach(phrases, Learn);
            }
        }
Exemple #5
0
        /// <summary>
        /// Generate a collection of phrase output data based on the current model
        /// </summary>
        /// <param name="lines">The number of phrases to emit</param>
        /// <param name="seed">Optionally provide the start of the phrase to generate from</param>
        /// <returns></returns>
        public IEnumerable <TPhrase> Walk(int lines = 1, TPhrase seed = default(TPhrase))
        {
            if (seed == null)
            {
                seed = RebuildPhrase(new List <TUnigram>()
                {
                    GetPrepadUnigram()
                });
            }

            Logger.Info($"Walking to return {lines} phrases from {Chain.Count} states");
            if (lines < 1)
            {
                throw new ArgumentException("Invalid argument - line count for walk must be a positive integer", nameof(lines));
            }

            var sentences = new List <TPhrase>();

            int genCount = 0;
            int created  = 0;

            while (created < lines)
            {
                if (genCount == lines * 10)
                {
                    Logger.Info($"Breaking out of walk early - {genCount} generations did not produce {lines} distinct lines ({sentences.Count} were created)");
                    break;
                }
                var result = WalkLine(seed);
                if ((!EnsureUniqueWalk || !SourcePhrases.Contains(result)) && (!EnsureUniqueWalk || !sentences.Contains(result)))
                {
                    sentences.Add(result);
                    created++;
                    yield return(result);
                }
                genCount++;
            }
        }
        /// <summary>
        /// Generate a collection of phrase output data based on the current model
        /// </summary>
        /// <param name="lines">The number of phrases to emit</param>
        /// <param name="seed">Optionally provide the start of the phrase to generate from</param>
        /// <returns></returns>
        public IEnumerable <TPhrase> Walk(int lines = 1, TPhrase seed = default(TPhrase))
        {
            if (seed == null)
            {
                seed = RebuildPhrase(new List <TUnigram>()
                {
                    GetPrepadUnigram()
                });
            }


            if (lines < 1)
            {
                throw new ArgumentException("Invalid argument - line count for walk must be a positive integer", nameof(lines));
            }

            var sentences = new List <TPhrase>();

            int genCount = 0;
            int created  = 0;

            while (created < lines)
            {
                if (genCount == lines * 10)
                {
                    break;
                }
                var result = WalkLine(seed);
                if ((!EnsureUniqueWalk || !SourcePhrases.Contains(result)) && (!EnsureUniqueWalk || !sentences.Contains(result)))
                {
                    sentences.Add(result);
                    created++;
                    yield return(result);
                }
                genCount++;
            }
        }
Exemple #7
0
        public void Learn(TPhrase phrase)
        {
            // Logger.Info($"Learning phrase: '{phrase}'");
            if (phrase == null || phrase.Equals(default(TPhrase)))
            {
                return;
            }

            // Ignore particularly short phrases
            if (SplitTokens(phrase).Count() < Level)
            {
                // Logger.Info($"Phrase {phrase} too short - skipped");
                return;
            }

            // Add it to the source lines so we can ignore it
            // when learning in future
            if (!SourcePhrases.Contains(phrase))
            {
                // Logger.Debug($"Adding phrase {phrase} to source lines");
                SourcePhrases.Add(phrase);
            }

            // Split the sentence to an array of words
            var tokens = SplitTokens(phrase).ToArray();

            LearnTokens(tokens);

            var lastCol = new List <TUnigram>();

            for (var j = Level; j > 0; j--)
            {
                TUnigram previous;
                try
                {
                    previous = tokens[^ j];
Exemple #8
0
        /// <summary>
        /// Generate a collection of phrase output data based on the current model
        /// </summary>
        /// <param name="lines">The number of phrases to emit</param>
        /// <param name="seed">Optionally provide the start of the phrase to generate from</param>
        /// <returns></returns>
        public IEnumerable <TPhrase> Walk(int lines = 1, TPhrase seed = default(TPhrase), bool useLast = false)
        {
            if (seed == null)
            {
                seed = RebuildPhrase(new List <TUnigram> ()
                {
                    GetPrepadUnigram()
                });
            }

            TPhrase originalSeed = seed;

            Logger.Info(string.Format("Walking to return {0} phrases from {1} states", lines, Chain.Count));
            if (lines < 1)
            {
                throw new ArgumentException("Invalid argument - line count for walk must be a positive integer", "int lines");
            }

            var sentences = new List <TPhrase> ();

            int genCount = 0;
            int created  = 0;

            int lastTries = -1;
            IEnumerable <TUnigram> lastTokens = default(IEnumerable <TUnigram>);

            while (created < lines)
            {
                if (genCount == lines * 100)
                {
                    UnityEngine.Debug.LogWarning(string.Format("Breaking out of walk early - {0} generations did not produce {1} distinct lines ({2} were created)", genCount, lines, sentences.Count));
                    break;
                }
                TPhrase    result    = WalkLine(seed);
                TUnigram[] tokens    = SplitTokens(result).ToArray();
                int        numTokens = tokens.Count();
                if (numTokens > 1 && (!EnsureUniqueWalk || !SourcePhrases.Contains(result)) && (!EnsureUniqueWalk || !sentences.Contains(result)))
                {
                    sentences.Add(result);
                    created++;
                    yield return(result);

                    if (useLast)
                    {
                        lastTries  = numTokens;
                        lastTokens = tokens;
                        seed       = RebuildPhrase(new TUnigram[] { lastTokens.ElementAt(--lastTries) });

                        //UnityEngine.Debug.LogFormat ( "Result: {0}, new seed = |{1}|", result, seed );
                    }
                }
                else if (useLast && seed.Equals(originalSeed) == false)
                {
                    if (lastTries > 0)
                    {
                        seed = RebuildPhrase(new TUnigram[] { lastTokens.ElementAt(--lastTries) });
                        UnityEngine.Debug.LogFormat("'{0}' tokens = {1}", seed, lastTokens.ArrayToString());
                    }
                    else
                    {
                        seed = originalSeed;
                        UnityEngine.Debug.LogFormat("Reverting to original seed: '{0}'", seed);
                    }
                }

                genCount++;
            }
        }