public void TestReLoadingText() { var model = Model.Load("model.txt"); Model m2; using (var s = new MemoryStream()) { using (var writer = new TextModelWriter(s, true)) { writer.Write(model); } s.Seek(0, SeekOrigin.Begin); using (var tmr = new TextModelReader(s)) { m2 = Model.Load(tmr); } } Assert.AreEqual(model.Words, m2.Words); Assert.AreEqual(model.Size, m2.Size); }
public void TestReLoadingText() { var model = Model.Load("model.txt"); Model m2; using (var s = new MemoryStream()) { using (var writer = new TextModelWriter(s, true)) { writer.Write(model); } s.Seek(0, SeekOrigin.Begin); var tmr = new TextModelReader(s); { m2 = Model.Load(tmr); } } Assert.AreEqual(model.Words, m2.Words); Assert.AreEqual(model.Size, m2.Size); }
static void Main(string[] args) { try { IText text = new Text(); IConcordance concordance = new Concordance(); var concordanceParser = new ConcordanceParser(); var configuration = new GlobalConfiguration(); if (args.Length != 0) { configuration.FileNames = args; } else { using var reader = new StreamReader("../../../config.json"); var json = reader.ReadToEnd(); configuration = JsonConvert.DeserializeObject <GlobalConfiguration>(json); } foreach (var fileName in configuration.FileNames) { using var stream = new StreamReader(new FileStream(fileName, FileMode.Open)); var textParser = new TextParser(); var textReader = new TextModelReader(stream); text = textParser.ParseText(textReader.ReadAllText()); } foreach (var fileName in configuration.FileNames) { using var stream = new StreamReader(new FileStream(fileName, FileMode.Open)); var textParser = new TextParser(); var textReader = new TextModelReader(stream); concordance = concordanceParser.ParseText(textReader.ReadAllText()); } var jsonText = JsonTextSerializer.Serialize(text); var jsonConc = JsonTextSerializer.Serialize(concordance); using (var writer = new StreamWriter("../../../text.json")) { var textModelWriter = new TextModelWriter(writer); textModelWriter.Write(jsonText); } using (var writer = new StreamWriter("../../../concordance.json")) { var textModelWriter = new TextModelWriter(writer); textModelWriter.Write(jsonConc); } Console.WriteLine(); Console.WriteLine("----Select words from question sentences with length 10------------------------"); Console.WriteLine(); foreach (var word in text.GetWordsFromQuestionSentences(10)) { Console.WriteLine(word); } Console.WriteLine(); Console.WriteLine("----Order sentences by words count-------------------------"); Console.WriteLine(); foreach (var sentence in text.OrderSentencesByWordsCount()) { Console.Write(sentence); Console.Write(" --- "); Console.Write($"{sentence.WordsCount} words"); Console.WriteLine(); } Console.WriteLine(); Console.WriteLine("-----Deleting words with length 10--------------"); Console.WriteLine(); text.DeleteWords(10); foreach (var sentence in text.Sentences) { Console.WriteLine(sentence); } Console.WriteLine(); Console.WriteLine("-----Replacing words: \"In\" replace by \"In word replaced\"----------------"); Console.WriteLine(); text.ReplaceWord("In", "In word replaced"); foreach (var sentence in text.Sentences) { Console.WriteLine(sentence); } Console.WriteLine("------------------------------------"); } catch (Exception e) { Console.WriteLine(e.Message); } }
public void DumpToStream(TextModelWriter sw) { sw.Write(typeof(TagSet).FullName); sw.WriteOption("VER", VER); sw.NestLevel += 1; sw.WriteOption("ROOT", ROOT); PTs.DumpToStream(sw); NTs.DumpToStream(sw); sw.NestLevel -= 1; sw.Write(typeof(TagSet).FullName); }
public void DumpToStream(TextModelWriter sw) { var name = typeof(Vocabulary).FullName; sw.Write(name); sw.WriteOption("SIG", SIG); sw.WriteOption("VER", VER); sw.WriteOption("knownWordCount", vocab.Count); //sw.WriteOption("rareWordCount", rareVocab.Count); sw.WriteOption("sigCount", signitureVocab.Count); sw.NestLevel += 1; vocab.DumpToStream(sw); //rareVocab.DumpToStream(sw); signitureVocab.DumpToStream(sw); sw.NestLevel -= 1; sw.Write(name); }
public void DumpToStream(TextModelWriter sw, TagSet tagSet, Vocabulary vocab) { var name = typeof(LAPCFGrammar).FullName; sw.Write(name); sw.WriteOption("VER", VER); sw.WriteOption("NTCount", NTCount); sw.WriteOption("PTCount", PTCount); sw.WriteOption("ROOTID", ROOTID); sw.Write("TerminalRule"); sw.NestLevel += 1; foreach (var x in trules) { if (x != null) { foreach (var y in x) { if (y != null) { var word = vocab.GetWordString(y.word); var tag = tagSet.GetTagString(y.tag); for (int p = 0; p < y.scores.Length; ++p) { if (!double.IsInfinity(y.scores [p]) && !double.IsNaN(y.scores [p])) { sw.Write(string.Format("{0}_{1}\t{2}\t{3}", tag, p, word, y.scores [p])); } } } } } } sw.NestLevel -= 1; sw.Write("UnaryRule"); sw.NestLevel += 1; foreach (var x in urules) { if (x != null) { foreach (var y in x) { if (y != null) { var ptag = tagSet.GetTagString(y.ptag); var ctag = tagSet.GetTagString(y.ctag); for (int c = 0; c < y.scores.Length; ++c) { for (int p = 0; p < y.scores[c].Length; ++p) { if (!double.IsInfinity(y.scores [c] [p]) && !double.IsNaN(y.scores [c] [p])) { sw.Write(string.Format("{0}_{1}\t{2}_{3}\t{4}", ptag, p, ctag, c, y.scores [c] [p])); } } } } } } } sw.NestLevel -= 1; sw.Write("BinaryRule"); sw.NestLevel += 1; foreach (var x in brules) { if (x != null) { foreach (var y in x) { if (y != null) { foreach (var z in y) { if (z != null) { var ptag = tagSet.GetTagString(z.ptag); var ltag = tagSet.GetTagString(z.ltag); var rtag = tagSet.GetTagString(z.rtag); for (int l = 0; l < z.scores.Length; ++l) { for (int r = 0; r < z.scores[l].Length; ++r) { for (int p = 0; p < z.scores[l][r].Length; ++p) { if (!double.IsInfinity(z.scores [l] [r] [p]) && !double.IsNaN(z.scores [l] [r] [p])) { sw.Write( string.Format("{0}_{1}\t{2}_{3}\t{4}_{5}\t{6}", ptag, p, ltag, l, rtag, r, z.scores [l] [r] [p]) ); } } } } } } } } } } sw.NestLevel -= 1; sw.WriteOption("TraceCount", subtagTraces.Count); foreach (var trace in subtagTraces) { sw.WriteOption("TRACE", trace.Length); sw.NestLevel += 1; foreach (var t in trace) { sw.Write(string.Join(" ", t)); } sw.NestLevel -= 1; } sw.Write(name); }