Пример #1
0
        public virtual LexicalizedParser GetParserDataFromTreebank(Treebank trainTreebank)
        {
            log.Info("Binarizing training trees...");
            IList <Tree> binaryTrainTrees = GetAnnotatedBinaryTreebankFromTreebank(trainTreebank);

            Timing.Tick("done.");
            IIndex <string> stateIndex = new HashIndex <string>();

            log.Info("Extracting PCFG...");
            IExtractor <Pair <UnaryGrammar, BinaryGrammar> > bgExtractor = new BinaryGrammarExtractor(op, stateIndex);
            Pair <UnaryGrammar, BinaryGrammar> bgug = bgExtractor.Extract(binaryTrainTrees);
            BinaryGrammar bg = bgug.second;

            bg.SplitRules();
            UnaryGrammar ug = bgug.first;

            ug.PurgeRules();
            Timing.Tick("done.");
            log.Info("Extracting Lexicon...");
            IIndex <string> wordIndex = new HashIndex <string>();
            IIndex <string> tagIndex  = new HashIndex <string>();
            ILexicon        lex       = op.tlpParams.Lex(op, wordIndex, tagIndex);

            lex.InitializeTraining(binaryTrainTrees.Count);
            lex.Train(binaryTrainTrees);
            lex.FinishTraining();
            Timing.Tick("done.");
            IExtractor <IDependencyGrammar> dgExtractor = op.tlpParams.DependencyGrammarExtractor(op, wordIndex, tagIndex);
            IDependencyGrammar dg = null;

            if (op.doDep)
            {
                log.Info("Extracting Dependencies...");
                dg = dgExtractor.Extract(binaryTrainTrees);
                dg.SetLexicon(lex);
                Timing.Tick("done.");
            }
            log.Info("Done extracting grammars and lexicon.");
            return(new LexicalizedParser(lex, bg, ug, dg, stateIndex, wordIndex, tagIndex, op));
        }
        internal LexicalizedParserQuery(LexicalizedParser parser)
        {
            this.op = parser.GetOp();
            BinaryGrammar      bg         = parser.bg;
            UnaryGrammar       ug         = parser.ug;
            ILexicon           lex        = parser.lex;
            IDependencyGrammar dg         = parser.dg;
            IIndex <string>    stateIndex = parser.stateIndex;
            IIndex <string>    wordIndex  = new DeltaIndex <string>(parser.wordIndex);
            IIndex <string>    tagIndex   = parser.tagIndex;

            this.debinarizer     = new Debinarizer(op.forceCNF);
            this.boundaryRemover = new BoundaryRemover();
            if (op.doPCFG)
            {
                if (op.testOptions.iterativeCKY)
                {
                    pparser = new IterativeCKYPCFGParser(bg, ug, lex, op, stateIndex, wordIndex, tagIndex);
                }
                else
                {
                    pparser = new ExhaustivePCFGParser(bg, ug, lex, op, stateIndex, wordIndex, tagIndex);
                }
            }
            else
            {
                pparser = null;
            }
            if (op.doDep)
            {
                dg.SetLexicon(lex);
                if (!op.testOptions.useFastFactored)
                {
                    dparser = new ExhaustiveDependencyParser(dg, lex, op, wordIndex, tagIndex);
                }
                else
                {
                    dparser = null;
                }
            }
            else
            {
                dparser = null;
            }
            if (op.doDep && op.doPCFG)
            {
                if (op.testOptions.useFastFactored)
                {
                    MLEDependencyGrammar mledg = (MLEDependencyGrammar)dg;
                    int numToFind = 1;
                    if (op.testOptions.printFactoredKGood > 0)
                    {
                        numToFind = op.testOptions.printFactoredKGood;
                    }
                    bparser = new FastFactoredParser(pparser, mledg, op, numToFind, wordIndex, tagIndex);
                }
                else
                {
                    IScorer scorer = new TwinScorer(pparser, dparser);
                    //Scorer scorer = parser;
                    if (op.testOptions.useN5)
                    {
                        bparser = new BiLexPCFGParser.N5BiLexPCFGParser(scorer, pparser, dparser, bg, ug, dg, lex, op, stateIndex, wordIndex, tagIndex);
                    }
                    else
                    {
                        bparser = new BiLexPCFGParser(scorer, pparser, dparser, bg, ug, dg, lex, op, stateIndex, wordIndex, tagIndex);
                    }
                }
            }
            else
            {
                bparser = null;
            }
            subcategoryStripper = op.tlpParams.SubcategoryStripper();
        }