public virtual LexicalizedParser GetParserDataFromTreebank(Treebank trainTreebank) { log.Info("Binarizing training trees..."); IList <Tree> binaryTrainTrees = GetAnnotatedBinaryTreebankFromTreebank(trainTreebank); Timing.Tick("done."); IIndex <string> stateIndex = new HashIndex <string>(); log.Info("Extracting PCFG..."); IExtractor <Pair <UnaryGrammar, BinaryGrammar> > bgExtractor = new BinaryGrammarExtractor(op, stateIndex); Pair <UnaryGrammar, BinaryGrammar> bgug = bgExtractor.Extract(binaryTrainTrees); BinaryGrammar bg = bgug.second; bg.SplitRules(); UnaryGrammar ug = bgug.first; ug.PurgeRules(); Timing.Tick("done."); log.Info("Extracting Lexicon..."); IIndex <string> wordIndex = new HashIndex <string>(); IIndex <string> tagIndex = new HashIndex <string>(); ILexicon lex = op.tlpParams.Lex(op, wordIndex, tagIndex); lex.InitializeTraining(binaryTrainTrees.Count); lex.Train(binaryTrainTrees); lex.FinishTraining(); Timing.Tick("done."); IExtractor <IDependencyGrammar> dgExtractor = op.tlpParams.DependencyGrammarExtractor(op, wordIndex, tagIndex); IDependencyGrammar dg = null; if (op.doDep) { log.Info("Extracting Dependencies..."); dg = dgExtractor.Extract(binaryTrainTrees); dg.SetLexicon(lex); Timing.Tick("done."); } log.Info("Done extracting grammars and lexicon."); return(new LexicalizedParser(lex, bg, ug, dg, stateIndex, wordIndex, tagIndex, op)); }
internal LexicalizedParserQuery(LexicalizedParser parser) { this.op = parser.GetOp(); BinaryGrammar bg = parser.bg; UnaryGrammar ug = parser.ug; ILexicon lex = parser.lex; IDependencyGrammar dg = parser.dg; IIndex <string> stateIndex = parser.stateIndex; IIndex <string> wordIndex = new DeltaIndex <string>(parser.wordIndex); IIndex <string> tagIndex = parser.tagIndex; this.debinarizer = new Debinarizer(op.forceCNF); this.boundaryRemover = new BoundaryRemover(); if (op.doPCFG) { if (op.testOptions.iterativeCKY) { pparser = new IterativeCKYPCFGParser(bg, ug, lex, op, stateIndex, wordIndex, tagIndex); } else { pparser = new ExhaustivePCFGParser(bg, ug, lex, op, stateIndex, wordIndex, tagIndex); } } else { pparser = null; } if (op.doDep) { dg.SetLexicon(lex); if (!op.testOptions.useFastFactored) { dparser = new ExhaustiveDependencyParser(dg, lex, op, wordIndex, tagIndex); } else { dparser = null; } } else { dparser = null; } if (op.doDep && op.doPCFG) { if (op.testOptions.useFastFactored) { MLEDependencyGrammar mledg = (MLEDependencyGrammar)dg; int numToFind = 1; if (op.testOptions.printFactoredKGood > 0) { numToFind = op.testOptions.printFactoredKGood; } bparser = new FastFactoredParser(pparser, mledg, op, numToFind, wordIndex, tagIndex); } else { IScorer scorer = new TwinScorer(pparser, dparser); //Scorer scorer = parser; if (op.testOptions.useN5) { bparser = new BiLexPCFGParser.N5BiLexPCFGParser(scorer, pparser, dparser, bg, ug, dg, lex, op, stateIndex, wordIndex, tagIndex); } else { bparser = new BiLexPCFGParser(scorer, pparser, dparser, bg, ug, dg, lex, op, stateIndex, wordIndex, tagIndex); } } } else { bparser = null; } subcategoryStripper = op.tlpParams.SubcategoryStripper(); }