public static TagSet LoadFromStream(TextModelReader sr) { string xname = sr.Read(); if (xname != typeof(TagSet).FullName) { throw new Exception("model name does not match"); } int startlvl = sr.NestLevel; var xver = sr.ReadOptionUInt64("VER"); if (xver != VER) { throw new Exception("version number does not match"); } var ts = new TagSet(); ts.ROOT = sr.ReadOptionString("ROOT"); ts.PTs = CodeBook32.LoadFromStream(sr); ts.NTs = CodeBook32.LoadFromStream(sr); xname = sr.Read(); if(xname != typeof(TagSet).FullName || sr.NestLevel != startlvl) { throw new Exception("model name does not match"); } return ts; }
public static Vocabulary LoadFromStream(TextModelReader sr) { var v = new Vocabulary(); int knownWordCount = 0; int sigCount = 0; string name = typeof(Vocabulary).FullName; int startLvl = 0; string line = sr.Read(); startLvl = sr.NestLevel; if (line != name) { throw new Exception("error in model file!"); } var xsig = sr.ReadOptionUInt64("SIG"); var xver = sr.ReadOptionUInt64("VER"); if (xsig != SIG || xver != VER) { throw new Exception("Signiture or version does not match!"); } knownWordCount = sr.ReadOptionInt("knownWordCount"); sigCount = sr.ReadOptionInt("sigCount"); v.vocab = CodeBook32.LoadFromStream(sr); v.signitureVocab = CodeBook32.LoadFromStream(sr); if (v.vocab.Count != knownWordCount || v.signitureVocab.Count != sigCount) { throw new Exception("vocab size does not match"); } string closeline = sr.Read(); if (sr.NestLevel != startLvl || closeline != name) { throw new Exception("model is not closed!"); } return v; }
public static LAPCFGrammar LoadFromStream(TextModelReader sr, Vocabulary vocab, TagSet tagSet) { var grammar = new LAPCFGrammar(); var name = typeof(LAPCFGrammar).FullName; sr.Require(name); sr.Require("VER", VER); grammar.NTCount = sr.ReadOptionInt("NTCount"); grammar.PTCount = sr.ReadOptionInt("PTCount"); grammar.ROOTID = sr.ReadOptionInt("ROOTID"); sr.Require("TerminalRule"); int lvl = sr.NestLevel; var truleStrings = new HashSet<string>(); var uruleStrings = new HashSet<string>(); var bruleStrings = new HashSet<string>(); string line = sr.Read(); while (sr.NestLevel > lvl) { truleStrings.Add(line); line = sr.Read(); } if (line != "UnaryRule") { throw new Exception("wrong model!"); } line = sr.Read(); while (sr.NestLevel > lvl) { uruleStrings.Add(line); line = sr.Read(); } if (line != "BinaryRule") { throw new Exception("wrong model!"); } line = sr.Read(); while (sr.NestLevel > lvl) { bruleStrings.Add(line); line = sr.Read(); } string[] parts = line.Split('\t'); if (parts [0] != "TraceCount") { throw new Exception("error in model"); } int subtraceCount = int.Parse(parts [1]); grammar.subtagTraces = new List<int[][]>(); for (int i = 0; i < subtraceCount; ++i) { int tlen = sr.ReadOptionInt("TRACE"); int[][] trace = new int[tlen][]; for (int j = 0; j < tlen; ++j) { trace [j] = sr.ReadIntArray(); } grammar.subtagTraces.Add(trace); } if (grammar.subtagTraces.Count == 0) { grammar.subTagCounts = new int[grammar.TotalTagCount]; ArrayHelper.Fill(grammar.subTagCounts, 1); } else { var trace = grammar.subtagTraces [grammar.subtagTraces.Count - 1]; grammar.subTagCounts = trace.Select(x => x.Length).ToArray(); } sr.Require(name); foreach (var str in uruleStrings) { grammar.BuildUnaryRule(str, tagSet); } foreach (var str in truleStrings) { grammar.BuildTerminalRule(str, vocab, tagSet); } foreach (var str in bruleStrings) { grammar.BuildBinaryRule(str, tagSet); } return grammar; }