Example #1
0
        public static StringMarkov LearnFrom(string fileName)
        {
            StringMarkov markov = new StringMarkov(1);


            string[] markovTraining = System.IO.File.ReadAllLines(TestHelper.GetResourceContentPath(fileName));

            Dictionary <string, StringMarkov.Unigram> unigrams = new Dictionary <string, StringMarkov.Unigram>();

            // Read all lines, and seperate words.
            // Put words in s_Words. Word get's added to previous word (if any), and is counted.
            foreach (var l in markovTraining)
            {
                string[]             words = GetWords(l);
                StringMarkov.Unigram last = null, now = null;
                foreach (var word in words)
                {
                    now = null;

                    if (last == null)
                    {
                        bool didNew;
                        now = markov.FindOrNew(word, out didNew);
                        if (didNew)
                        {
                            unigrams.Add(word, now);
                        }
                    }

                    if (now == null && unigrams.TryGetValue(word, out now) == false)
                    {
                        now = new StringMarkov.Unigram(word);
                        unigrams.Add(word, now);
                    }

                    if (last != null)
                    {
                        last.Add(now);
                    }

                    now.frequency++;
                    last = now;
                }

                if (last != null)
                {
                    last.Add(markov.Terminator);
                }
            }

            return(markov);
        }
Example #2
0
        public StringMarkovFingerprint(StringMarkov markov)
        {
            terminator = markov.Terminator;
            unigrams   = new HashSet <StringMarkov.Unigram>(markov.starts.Count * 4);
            foreach (var unigram in markov.starts)
            {
                if (unigrams.Contains(unigram) == false)
                {
                    unigrams.Add(unigram);
                }

                foreach (var next in unigram.weights)
                {
                    if (unigrams.Contains(next.Key) == false)
                    {
                        unigrams.Add(next.Key);
                    }
                }
            }
        }