public void TrainModel3() { var trainer = BrightWireProvider.CreateMarkovTrainer3 <string>(); _Train(trainer); var model = trainer.Build().AsDictionary; // generate some text var rand = new Random(); string prevPrev = default(string), prev = default(string), curr = default(string); var output = new List <string>(); for (var i = 0; i < 1024; i++) { var transitions = model.GetTransitions(prevPrev, prev, curr); var distribution = new Categorical(transitions.Select(d => Convert.ToDouble(d.Probability)).ToArray()); var next = transitions[distribution.Sample()].NextState; output.Add(next); if (SimpleTokeniser.IsEndOfSentence(next)) { break; } prevPrev = prev; prev = curr; curr = next; } Assert.IsTrue(output.Count < 1024); }
/// <summary> /// Builds a n-gram based language model and generates new text from the model /// </summary> public static void MarkovChains() { // tokenise the novel "The Beautiful and the Damned" by F. Scott Fitzgerald List <IReadOnlyList <string> > sentences; using (var client = new WebClient()) { var data = client.DownloadString("http://www.gutenberg.org/cache/epub/9830/pg9830.txt"); var pos = data.IndexOf("CHAPTER I"); sentences = SimpleTokeniser.FindSentences(SimpleTokeniser.Tokenise(data.Substring(pos))). ToList(); } // create a markov trainer that uses a window of size 3 var trainer = BrightWireProvider.CreateMarkovTrainer3 <string>(); foreach (var sentence in sentences) { trainer.Add(sentence); } var model = trainer.Build().AsDictionary; // generate some text var rand = new Random(); for (var i = 0; i < 50; i++) { var sb = new StringBuilder(); string prevPrev = default, prev = default, curr = default;
/// <summary> /// Builds a n-gram based language model and generates new text from the model /// </summary> public static void MarkovChains() { // tokenise the novel "The Beautiful and the Damned" by F. Scott Fitzgerald List <IReadOnlyList <string> > sentences; using (var client = new WebClient()) { var data = client.DownloadString("http://www.gutenberg.org/cache/epub/9830/pg9830.txt"); var pos = data.IndexOf("CHAPTER I"); sentences = SimpleTokeniser.FindSentences(SimpleTokeniser.Tokenise(data.Substring(pos))).ToList(); } // create a markov trainer that uses a window of size 3 var trainer = BrightWireProvider.CreateMarkovTrainer3 <string>(); foreach (var sentence in sentences) { trainer.Add(sentence); } var model = trainer.Build().AsDictionary; // generate some text var rand = new Random(); for (var i = 0; i < 50; i++) { var sb = new StringBuilder(); string prevPrev = default(string), prev = default(string), curr = default(string); for (var j = 0; j < 256; j++) { var transitions = model.GetTransitions(prevPrev, prev, curr); var distribution = new Categorical(transitions.Select(d => Convert.ToDouble(d.Probability)).ToArray()); var next = transitions[distribution.Sample()].NextState; if (Char.IsLetterOrDigit(next[0]) && sb.Length > 0) { var lastChar = sb[sb.Length - 1]; if (lastChar != '\'' && lastChar != '-') { sb.Append(' '); } } sb.Append(next); if (SimpleTokeniser.IsEndOfSentence(next)) { break; } prevPrev = prev; prev = curr; curr = next; } Console.WriteLine(sb.ToString()); } }
public static void MarkovChains() { var lines = res.trump.Split('\n'); StringBuilder sb = new StringBuilder(); foreach (var l in lines) { if (string.IsNullOrWhiteSpace(l)) { continue; } var spl = l.Split(','); if (spl.Length > 1) { sb.Append(spl[1]); if (spl[1].Last() == '!' || spl[1].Last() == '?' || spl[1].Last() == '.') { continue; } sb.Append(". "); } } List <IReadOnlyList <string> > sentences; sentences = SimpleTokeniser.FindSentences(SimpleTokeniser.Tokenise(sb.ToString())) .ToList(); var sentencesRW = sentences .Select(m => m.ToList()) .Where(m => m.Count > 1) .Where(m => !((m.Contains("https") || m.Contains("http")) && m.Count < 10)) .Where(m => !(m[0] == "co")) .ToList(); var trainer = BrightWireProvider.CreateMarkovTrainer3 <string>(); foreach (var sentence in sentencesRW) { trainer.Add(sentence); } var model = trainer.Build().AsDictionary; // generate some text for (var i = 0; i < 5000000; i++) { sb = new StringBuilder(); string prevPrev = default(string), prev = default(string), curr = default(string); for (var j = 0; j < 256; j++) { var transitions = model.GetTransitions(prevPrev, prev, curr); var distribution = new Categorical(transitions.Select(d => Convert.ToDouble(d.Probability)).ToArray()); var next = transitions[distribution.Sample()].NextState; if (Char.IsLetterOrDigit(next[0]) && sb.Length > 0) { var lastChar = sb[sb.Length - 1]; if (lastChar != '\'' && lastChar != '-') { sb.Append(' '); } } sb.Append(next); if (SimpleTokeniser.IsEndOfSentence(next)) { break; } prevPrev = prev; prev = curr; curr = next; } if (sb.Length < 10) { continue; } if (i % 10000 == 0) { Console.WriteLine($"Writing line {i}"); } File.AppendAllText("sts.txt", sb.ToString() + Environment.NewLine); } }