public ParseFiles(Options op, TreePrint treePrint, LexicalizedParser pqFactory) { this.op = op; this.pqFactory = pqFactory; this.treePrint = treePrint; this.tlp = op.tlpParams.TreebankLanguagePack(); this.pwOut = op.tlpParams.Pw(); this.pwErr = op.tlpParams.Pw(System.Console.Error); if (op.testOptions.verbose) { pwErr.Println("Sentence final words are: " + Arrays.AsList(tlp.SentenceFinalPunctuationWords())); pwErr.Println("File encoding is: " + op.tlpParams.GetInputEncoding()); } // evaluation setup this.runningAverages = bool.ParseBoolean(op.testOptions.evals.GetProperty("runningAverages")); this.summary = bool.ParseBoolean(op.testOptions.evals.GetProperty("summary")); if (bool.ParseBoolean(op.testOptions.evals.GetProperty("pcfgLL"))) { this.pcfgLL = new AbstractEval.ScoreEval("pcfgLL", runningAverages); } else { this.pcfgLL = null; } if (bool.ParseBoolean(op.testOptions.evals.GetProperty("depLL"))) { this.depLL = new AbstractEval.ScoreEval("depLL", runningAverages); } else { this.depLL = null; } if (bool.ParseBoolean(op.testOptions.evals.GetProperty("factLL"))) { this.factLL = new AbstractEval.ScoreEval("factLL", runningAverages); } else { this.factLL = null; } }
public EvaluateTreebank(Options op, ILexicon lex, ParserGrammar pqFactory, Func <IList <IHasWord>, IList <TaggedWord> > tagger) { // private final Lexicon lex; // no annotation this.op = op; this.debinarizer = new Debinarizer(op.forceCNF); this.subcategoryStripper = op.tlpParams.SubcategoryStripper(); this.evals = Generics.NewArrayList(); Sharpen.Collections.AddAll(evals, pqFactory.GetExtraEvals()); this.parserQueryEvals = pqFactory.GetParserQueryEvals(); // this.lex = lex; this.pqFactory = pqFactory; this.tagger = tagger; collinizer = op.tlpParams.Collinizer(); boundaryRemover = new BoundaryRemover(); bool runningAverages = bool.Parse(op.testOptions.evals.GetProperty("runningAverages")); summary = bool.Parse(op.testOptions.evals.GetProperty("summary")); tsv = bool.Parse(op.testOptions.evals.GetProperty("tsv")); if (!op.trainOptions.leftToRight) { binarizerOnly = new TreeAnnotatorAndBinarizer(op.tlpParams, op.forceCNF, false, false, op); } else { binarizerOnly = new TreeAnnotatorAndBinarizer(op.tlpParams.HeadFinder(), new LeftHeadFinder(), op.tlpParams, op.forceCNF, false, false, op); } if (bool.Parse(op.testOptions.evals.GetProperty("pcfgLB"))) { pcfgLB = new Evalb("pcfg LP/LR", runningAverages); } // TODO: might be nice to allow more than one child-specific scorer if (op.testOptions.evals.GetProperty("pcfgChildSpecific") != null) { string filter = op.testOptions.evals.GetProperty("pcfgChildSpecific"); pcfgChildSpecific = FilteredEval.ChildFilteredEval("pcfg children matching " + filter + " LP/LR", runningAverages, op.Langpack(), filter); } if (bool.Parse(op.testOptions.evals.GetProperty("pcfgLA"))) { pcfgLA = new LeafAncestorEval("pcfg LeafAncestor"); } if (bool.Parse(op.testOptions.evals.GetProperty("pcfgCB"))) { pcfgCB = new Evalb.CBEval("pcfg CB", runningAverages); } if (bool.Parse(op.testOptions.evals.GetProperty("pcfgDA"))) { pcfgDA = new UnlabeledAttachmentEval("pcfg DA", runningAverages, op.Langpack().HeadFinder()); } if (bool.Parse(op.testOptions.evals.GetProperty("pcfgTA"))) { pcfgTA = new TaggingEval("pcfg Tag", runningAverages, lex); } if (bool.Parse(op.testOptions.evals.GetProperty("depDA"))) { depDA = new UnlabeledAttachmentEval("dep DA", runningAverages, null, op.Langpack().PunctuationWordRejectFilter()); } if (bool.Parse(op.testOptions.evals.GetProperty("depTA"))) { depTA = new TaggingEval("dep Tag", runningAverages, lex); } if (bool.Parse(op.testOptions.evals.GetProperty("factLB"))) { factLB = new Evalb("factor LP/LR", runningAverages); } if (op.testOptions.evals.GetProperty("factChildSpecific") != null) { string filter = op.testOptions.evals.GetProperty("factChildSpecific"); factChildSpecific = FilteredEval.ChildFilteredEval("fact children matching " + filter + " LP/LR", runningAverages, op.Langpack(), filter); } if (bool.Parse(op.testOptions.evals.GetProperty("factLA"))) { factLA = new LeafAncestorEval("factor LeafAncestor"); } if (bool.Parse(op.testOptions.evals.GetProperty("factCB"))) { factCB = new Evalb.CBEval("fact CB", runningAverages); } if (bool.Parse(op.testOptions.evals.GetProperty("factDA"))) { factDA = new UnlabeledAttachmentEval("factor DA", runningAverages, null); } if (bool.Parse(op.testOptions.evals.GetProperty("factTA"))) { factTA = new TaggingEval("factor Tag", runningAverages, lex); } if (bool.Parse(op.testOptions.evals.GetProperty("pcfgRUO"))) { pcfgRUO = new AbstractEval.RuleErrorEval("pcfg Rule under/over"); } if (bool.Parse(op.testOptions.evals.GetProperty("pcfgCUO"))) { pcfgCUO = new AbstractEval.CatErrorEval("pcfg Category under/over"); } if (bool.Parse(op.testOptions.evals.GetProperty("pcfgCatE"))) { pcfgCatE = new EvalbByCat("pcfg Category Eval", runningAverages); } if (bool.Parse(op.testOptions.evals.GetProperty("pcfgLL"))) { pcfgLL = new AbstractEval.ScoreEval("pcfgLL", runningAverages); } if (bool.Parse(op.testOptions.evals.GetProperty("depLL"))) { depLL = new AbstractEval.ScoreEval("depLL", runningAverages); } if (bool.Parse(op.testOptions.evals.GetProperty("factLL"))) { factLL = new AbstractEval.ScoreEval("factLL", runningAverages); } if (bool.Parse(op.testOptions.evals.GetProperty("topMatch"))) { evals.Add(new TopMatchEval("topMatch", runningAverages)); } // this one is for the various k Good/Best options. Just for individual results kGoodLB = new Evalb("kGood LP/LR", false); if (bool.Parse(op.testOptions.evals.GetProperty("pcfgTopK"))) { topKEvals.Add(new BestOfTopKEval(new Evalb("pcfg top k comparisons", false), new Evalb("pcfg top k LP/LR", runningAverages))); } if (topKEvals.Count > 0) { kbestPCFG = op.testOptions.evalPCFGkBest; } if (op.testOptions.printPCFGkBest > 0) { kbestPCFG = Math.Max(kbestPCFG, op.testOptions.printPCFGkBest); } }