コード例 #1
0
        public void TrainModel3()
        {
            var trainer = BrightWireProvider.CreateMarkovTrainer3 <string>();

            _Train(trainer);
            var model = trainer.Build().AsDictionary;

            // generate some text
            var    rand = new Random();
            string prevPrev = default(string), prev = default(string), curr = default(string);
            var    output = new List <string>();

            for (var i = 0; i < 1024; i++)
            {
                var transitions  = model.GetTransitions(prevPrev, prev, curr);
                var distribution = new Categorical(transitions.Select(d => Convert.ToDouble(d.Probability)).ToArray());
                var next         = transitions[distribution.Sample()].NextState;
                output.Add(next);
                if (SimpleTokeniser.IsEndOfSentence(next))
                {
                    break;
                }
                prevPrev = prev;
                prev     = curr;
                curr     = next;
            }
            Assert.IsTrue(output.Count < 1024);
        }
コード例 #2
0
        /// <summary>
        /// Builds a n-gram based language model and generates new text from the model
        /// </summary>
        public static void MarkovChains()
        {
            // tokenise the novel "The Beautiful and the Damned" by F. Scott Fitzgerald
            List <IReadOnlyList <string> > sentences;

            using (var client = new WebClient())
            {
                var data = client.DownloadString("http://www.gutenberg.org/cache/epub/9830/pg9830.txt");
                var pos  = data.IndexOf("CHAPTER I");
                sentences = SimpleTokeniser.FindSentences(SimpleTokeniser.Tokenise(data.Substring(pos))).
                            ToList();
            }

            // create a markov trainer that uses a window of size 3
            var trainer = BrightWireProvider.CreateMarkovTrainer3 <string>();

            foreach (var sentence in sentences)
            {
                trainer.Add(sentence);
            }
            var model = trainer.Build().AsDictionary;

            // generate some text
            var rand = new Random();

            for (var i = 0; i < 50; i++)
            {
                var    sb = new StringBuilder();
                string prevPrev = default, prev = default, curr = default;
コード例 #3
0
ファイル: MarkovChains.cs プロジェクト: lulzzz/brightwire
        /// <summary>
        /// Builds a n-gram based language model and generates new text from the model
        /// </summary>
        public static void MarkovChains()
        {
            // tokenise the novel "The Beautiful and the Damned" by F. Scott Fitzgerald
            List <IReadOnlyList <string> > sentences;

            using (var client = new WebClient()) {
                var data = client.DownloadString("http://www.gutenberg.org/cache/epub/9830/pg9830.txt");
                var pos  = data.IndexOf("CHAPTER I");
                sentences = SimpleTokeniser.FindSentences(SimpleTokeniser.Tokenise(data.Substring(pos))).ToList();
            }

            // create a markov trainer that uses a window of size 3
            var trainer = BrightWireProvider.CreateMarkovTrainer3 <string>();

            foreach (var sentence in sentences)
            {
                trainer.Add(sentence);
            }
            var model = trainer.Build().AsDictionary;

            // generate some text
            var rand = new Random();

            for (var i = 0; i < 50; i++)
            {
                var    sb = new StringBuilder();
                string prevPrev = default(string), prev = default(string), curr = default(string);
                for (var j = 0; j < 256; j++)
                {
                    var transitions  = model.GetTransitions(prevPrev, prev, curr);
                    var distribution = new Categorical(transitions.Select(d => Convert.ToDouble(d.Probability)).ToArray());
                    var next         = transitions[distribution.Sample()].NextState;
                    if (Char.IsLetterOrDigit(next[0]) && sb.Length > 0)
                    {
                        var lastChar = sb[sb.Length - 1];
                        if (lastChar != '\'' && lastChar != '-')
                        {
                            sb.Append(' ');
                        }
                    }
                    sb.Append(next);

                    if (SimpleTokeniser.IsEndOfSentence(next))
                    {
                        break;
                    }
                    prevPrev = prev;
                    prev     = curr;
                    curr     = next;
                }
                Console.WriteLine(sb.ToString());
            }
        }
コード例 #4
0
        public static void MarkovChains()
        {
            var lines = res.trump.Split('\n');

            StringBuilder sb = new StringBuilder();

            foreach (var l in lines)
            {
                if (string.IsNullOrWhiteSpace(l))
                {
                    continue;
                }

                var spl = l.Split(',');
                if (spl.Length > 1)
                {
                    sb.Append(spl[1]);
                    if (spl[1].Last() == '!' || spl[1].Last() == '?' || spl[1].Last() == '.')
                    {
                        continue;
                    }
                    sb.Append(". ");
                }
            }

            List <IReadOnlyList <string> > sentences;

            sentences = SimpleTokeniser.FindSentences(SimpleTokeniser.Tokenise(sb.ToString()))
                        .ToList();

            var sentencesRW = sentences
                              .Select(m => m.ToList())
                              .Where(m => m.Count > 1)
                              .Where(m => !((m.Contains("https") || m.Contains("http")) && m.Count < 10))
                              .Where(m => !(m[0] == "co"))
                              .ToList();

            var trainer = BrightWireProvider.CreateMarkovTrainer3 <string>();

            foreach (var sentence in sentencesRW)
            {
                trainer.Add(sentence);
            }
            var model = trainer.Build().AsDictionary;

            // generate some text
            for (var i = 0; i < 5000000; i++)
            {
                sb = new StringBuilder();
                string prevPrev = default(string), prev = default(string), curr = default(string);
                for (var j = 0; j < 256; j++)
                {
                    var transitions  = model.GetTransitions(prevPrev, prev, curr);
                    var distribution = new Categorical(transitions.Select(d => Convert.ToDouble(d.Probability)).ToArray());
                    var next         = transitions[distribution.Sample()].NextState;
                    if (Char.IsLetterOrDigit(next[0]) && sb.Length > 0)
                    {
                        var lastChar = sb[sb.Length - 1];
                        if (lastChar != '\'' && lastChar != '-')
                        {
                            sb.Append(' ');
                        }
                    }
                    sb.Append(next);

                    if (SimpleTokeniser.IsEndOfSentence(next))
                    {
                        break;
                    }
                    prevPrev = prev;
                    prev     = curr;
                    curr     = next;
                }

                if (sb.Length < 10)
                {
                    continue;
                }

                if (i % 10000 == 0)
                {
                    Console.WriteLine($"Writing line {i}");
                }

                File.AppendAllText("sts.txt", sb.ToString() + Environment.NewLine);
            }
        }