コード例 #1
0
ファイル: Vocabulary.cs プロジェクト: nyanyanya/la-pcfg
        public static Vocabulary LoadFromStream(TextModelReader sr)
        {
            var v = new Vocabulary();
            int knownWordCount = 0;
            int sigCount = 0;
            string name = typeof(Vocabulary).FullName;
            int startLvl = 0;

            string line = sr.Read();
            startLvl = sr.NestLevel;
            if (line != name)
            {
                throw new Exception("error in model file!");
            }

            var xsig = sr.ReadOptionUInt64("SIG");
            var xver = sr.ReadOptionUInt64("VER");
            if (xsig != SIG || xver != VER)
            {
                throw new Exception("Signiture or version does not match!");
            }
            knownWordCount = sr.ReadOptionInt("knownWordCount");
            sigCount = sr.ReadOptionInt("sigCount");
            v.vocab = CodeBook32.LoadFromStream(sr);
            v.signitureVocab = CodeBook32.LoadFromStream(sr);

            if (v.vocab.Count != knownWordCount || v.signitureVocab.Count != sigCount)
            {
                throw new Exception("vocab size does not match");
            }

            string closeline = sr.Read();

            if (sr.NestLevel != startLvl || closeline != name)
            {
                throw new Exception("model is not closed!");
            }

            return v;
        }
コード例 #2
0
ファイル: LAPCFGrammar.cs プロジェクト: nyanyanya/la-pcfg
        public static LAPCFGrammar LoadFromStream(TextModelReader sr, Vocabulary vocab, TagSet tagSet)
        {
            var grammar = new LAPCFGrammar();
            var name = typeof(LAPCFGrammar).FullName;

            sr.Require(name);
            sr.Require("VER", VER);

            grammar.NTCount = sr.ReadOptionInt("NTCount");
            grammar.PTCount = sr.ReadOptionInt("PTCount");
            grammar.ROOTID = sr.ReadOptionInt("ROOTID");

            sr.Require("TerminalRule");

            int lvl = sr.NestLevel;
            var truleStrings = new HashSet<string>();
            var uruleStrings = new HashSet<string>();
            var bruleStrings = new HashSet<string>();

            string line = sr.Read();
            while (sr.NestLevel > lvl)
            {
                truleStrings.Add(line);
                line = sr.Read();
            }

            if (line != "UnaryRule")
            {
                throw new Exception("wrong model!");
            }
            line = sr.Read();
            while (sr.NestLevel > lvl)
            {
                uruleStrings.Add(line);
                line = sr.Read();
            }

            if (line != "BinaryRule")
            {
                throw new Exception("wrong model!");
            }
            line = sr.Read();
            while (sr.NestLevel > lvl)
            {
                bruleStrings.Add(line);
                line = sr.Read();
            }

            string[] parts = line.Split('\t');

            if (parts [0] != "TraceCount")
            {
                throw new Exception("error in model");
            }

            int subtraceCount = int.Parse(parts [1]);

            grammar.subtagTraces = new List<int[][]>();

            for (int i = 0; i < subtraceCount; ++i)
            {
                int tlen = sr.ReadOptionInt("TRACE");
                int[][] trace = new int[tlen][];

                for (int j = 0; j < tlen; ++j)
                {
                    trace [j] = sr.ReadIntArray();
                }

                grammar.subtagTraces.Add(trace);
            }

            if (grammar.subtagTraces.Count == 0)
            {
                grammar.subTagCounts = new int[grammar.TotalTagCount];
                ArrayHelper.Fill(grammar.subTagCounts, 1);
            } else
            {
                var trace = grammar.subtagTraces [grammar.subtagTraces.Count - 1];
                grammar.subTagCounts = trace.Select(x => x.Length).ToArray();
            }

            sr.Require(name);

            foreach (var str in uruleStrings)
            {
                grammar.BuildUnaryRule(str, tagSet);
            }

            foreach (var str in truleStrings)
            {
                grammar.BuildTerminalRule(str, vocab, tagSet);
            }

            foreach (var str in bruleStrings)
            {
                grammar.BuildBinaryRule(str, tagSet);
            }

            return grammar;
        }