Esempi di codice in C# (CSharp) per Vocabulary, PCFGParser

Esempio n. 1

0

Mostra file

File: Vocabulary.cs Progetto: nyanyanya/la-pcfg

        public static Vocabulary LoadFromStream(TextModelReader sr)
        {
            var v = new Vocabulary();
            int knownWordCount = 0;
            int sigCount = 0;
            string name = typeof(Vocabulary).FullName;
            int startLvl = 0;

            string line = sr.Read();
            startLvl = sr.NestLevel;
            if (line != name)
            {
                throw new Exception("error in model file!");
            }

            var xsig = sr.ReadOptionUInt64("SIG");
            var xver = sr.ReadOptionUInt64("VER");
            if (xsig != SIG || xver != VER)
            {
                throw new Exception("Signiture or version does not match!");
            }
            knownWordCount = sr.ReadOptionInt("knownWordCount");
            sigCount = sr.ReadOptionInt("sigCount");
            v.vocab = CodeBook32.LoadFromStream(sr);
            v.signitureVocab = CodeBook32.LoadFromStream(sr);

            if (v.vocab.Count != knownWordCount || v.signitureVocab.Count != sigCount)
            {
                throw new Exception("vocab size does not match");
            }

            string closeline = sr.Read();

            if (sr.NestLevel != startLvl || closeline != name)
            {
                throw new Exception("model is not closed!");
            }

            return v;
        }

Esempio n. 2

0

Mostra file

File: Program.cs Progetto: nyanyanya/la-pcfg

        private static bool[][] AssignTagConstraints(Vocabulary vocab, TagSet tagSet, string[] words, int[] wids)
        {
            bool[][] allowedTags = new bool[wids.Length][];

            for (int i = 0; i < wids.Length; ++i)
            {
                //allowedTags[i] = new bool[tagSet.PTCount];

                //allowedTags[i][tagSet.GetID(tags[i])] = true;

                //continue;
                if (vocab.IsRareOrUNK(wids[i]))
                {
                    var lemmas = EMorph.EnglishMorph.GetBaseForm(words[i]);

                    if (lemmas == null || lemmas.Count == 0)
                    {
                        continue;
                    }

                    allowedTags[i] = new bool[tagSet.PTCount];

                    if (char.IsUpper(words[i][0]))
                    {
                        allowedTags[i][tagSet.GetID("NNP")] = true;
                        allowedTags[i][tagSet.GetID("NNPS")] = true;
                    }

                    foreach (var lemma in lemmas)
                    {
                        switch (lemma.PoS)
                        {
                            case EMorph.MorphPoS.NN:
                                allowedTags[i][tagSet.GetID("NN")] = true;
                                var w = words[i].ToLower();
                                if (EMorph.EnglishMorph.IsNoChangeNoun(w)
                                    || w.EndsWith("ese") || w.EndsWith("ise"))
                                {
                                    allowedTags[i][tagSet.GetID("NNS")] = true;
                                }
                                break;
                            case EMorph.MorphPoS.NNS:
                                allowedTags[i][tagSet.GetID("NNS")] = true;
                                //allowedTags[i][tagSet.GetID("NN")] = true;
                                break;
                            case EMorph.MorphPoS.JJ:
                                allowedTags[i][tagSet.GetID("JJ")] = true;
                                break;
                            case EMorph.MorphPoS.JJR:
                                allowedTags[i][tagSet.GetID("JJR")] = true;
                                break;
                            case EMorph.MorphPoS.JJS:
                                allowedTags[i][tagSet.GetID("JJS")] = true;
                                break;
                            case EMorph.MorphPoS.RB:
                                allowedTags[i][tagSet.GetID("RB")] = true;
                                break;
                            case EMorph.MorphPoS.RBR:
                                allowedTags[i][tagSet.GetID("RBR")] = true;
                                break;
                            case EMorph.MorphPoS.RBS:
                                allowedTags[i][tagSet.GetID("RBS")] = true;
                                break;
                            case EMorph.MorphPoS.VB:
                                allowedTags[i][tagSet.GetID("VB")] = true;
                                allowedTags[i][tagSet.GetID("VBP")] = true;
                                break;
                            case EMorph.MorphPoS.VBD:
                                allowedTags[i][tagSet.GetID("VBD")] = true;
                                allowedTags[i][tagSet.GetID("VBN")] = true;
                                //allowedTags[i][tagSet.GetID("JJ")] = true;
                                break;
                            case EMorph.MorphPoS.VBG:
                                allowedTags[i][tagSet.GetID("VBG")] = true;
                                //allowedTags[i][tagSet.GetID("JJ")] = true;
                                break;
                            case EMorph.MorphPoS.VBZ:
                                allowedTags[i][tagSet.GetID("VBZ")] = true;
                                break;
                            default:
                                throw new Exception("not recognized morph lemma!");
                        }
                    }

                    //if(!allowedTags[i][tagSet.GetID(tags[i])])
                    //{
                    //    Console.Error.WriteLine("!");
                    //}
                }
            }
            return allowedTags;
        }

Esempio n. 3

0

Mostra file

File: Program.cs Progetto: nyanyanya/la-pcfg

        private static double ParseGraphs(int nthread,
            List<PhrasalTree> treebank,
            LAPCFGrammar rules,
            Vocabulary vocab,
            TagSet tagSet,
            out int failed)
        {
            double llhd = 0;
            failed = 0;

            int xfail = 0;
            var handle = new object();
            Parallel.For(0, nthread, threadid =>
            {
                int fail = 0;
                double xllhd = 0;
                var parser = new HyperGraphParser(vocab, tagSet, rules);
                for (int i = threadid; i < treebank.Count; i += nthread)
                {
                    try
                    {
                        var graph = parser.BuildHyperGraph(treebank [i]);

                        graph.SumForward();
                        graph.SumBackward();

                        if (double.IsInfinity(graph.RootScore) || double.IsNaN(graph.RootScore))
                        {
                            fail += 1;
                            continue;
                        }
                        xllhd += graph.RootScore;
                    } catch
                    {
                        fail += 1;
                    }

                }

                lock (handle)
                {
                    xfail += fail;
                    llhd += xllhd;
                }
            }
            );
            failed = xfail;
            return llhd;
        }

Esempio n. 4

0

Mostra file

File: Program.cs Progetto: nyanyanya/la-pcfg

        private static double ParseGraphAndCollect(int nthread,
            List<PhrasalTree> treebank,
            LAPCFGrammar rules,
            Vocabulary vocab,
            TagSet tagSet,
            out int failed)
        {
            double llhd = 0;
            failed = 0;

            int xfail = 0;
            var handle = new object();
            var rulelist = new List<LAPCFGrammar>();
            rulelist.Add(rules);
            while (rulelist.Count < nthread)
            {
                rulelist.Add(rules.CloneWithSharedParameters());
            }
            Parallel.For(0, nthread, threadid =>
            {
                int fail = 0;
                double xllhd = 0;
                var parser = new HyperGraphParser(vocab, tagSet, rulelist [threadid]);
                for (int i = threadid; i < treebank.Count; i += nthread)
                {
                    try
                    {
                        var graph = parser.BuildHyperGraph(treebank [i]);

                        graph.SumForward();
                        graph.SumBackward();

                        if (double.IsInfinity(graph.RootScore) || double.IsNaN(graph.RootScore))
                        {
                            fail += 1;
                            continue;
                        }

                        graph.CollectExpectedCount();
                        xllhd += graph.RootScore;
                    } catch
                    {
                        fail += 1;
                    }

                }

                lock (handle)
                {
                    xfail += fail;
                    llhd += xllhd;
                }
            }
            );

            for (int i = 1; i < rulelist.Count; ++i)
            {
                LAPCFGrammar.ApplyToRules((x, y) => x.Add(y), rules.tposteriorCounts, rulelist [i].tposteriorCounts);
                LAPCFGrammar.ApplyToRules((x, y) => x.Add(y), rules.uposteriorCounts, rulelist [i].uposteriorCounts);
                LAPCFGrammar.ApplyToRules((x, y) => x.Add(y), rules.bposteriorCounts, rulelist [i].bposteriorCounts);
            }
            failed = xfail;
            //Console.Error.WriteLine("fail: {0}\tllhd: {1}", failed, llhd);
            return llhd;
        }

Esempio n. 5

0

Mostra file