Esempio n. 1
0
        private void TestOnTreebank(LexicalizedParser pd, ITreebankLangParserParams tlpParams, Treebank testTreebank, string treebankRoot, IIndex <string> stateIndex)
        {
            Timing.StartTime();
            ITreeTransformer annotator = new TreeAnnotator(tlpParams.HeadFinder(), tlpParams, op);

            // CDM: Aug 2004: With new implementation of treebank split categories,
            // I've hardwired this to load English ones.  Otherwise need training data.
            // op.trainOptions.splitters = new HashSet(Arrays.asList(op.tlpParams.splitters()));
            op.trainOptions.splitters       = ParentAnnotationStats.GetEnglishSplitCategories(treebankRoot);
            op.trainOptions.sisterSplitters = Generics.NewHashSet(Arrays.AsList(op.tlpParams.SisterSplitters()));
            foreach (Tree goldTree in testTreebank)
            {
                goldTree = annotator.TransformTree(goldTree);
                //      System.out.println();
                //      System.out.println("Checking tree: " + goldTree);
                foreach (Tree localTree in goldTree)
                {
                    // now try to use the grammar to score this local tree
                    if (localTree.IsLeaf() || localTree.IsPreTerminal() || localTree.Children().Length < 2)
                    {
                        continue;
                    }
                    System.Console.Out.WriteLine(LocalTreeToRule(localTree));
                    double score = ComputeLocalTreeScore(localTree, stateIndex, pd);
                    if (score == double.NegativeInfinity)
                    {
                    }
                    //          System.out.println(localTreeToRule(localTree));
                    System.Console.Out.WriteLine("score: " + score);
                }
            }
        }
 public TreebankAnnotator(Options op, string treebankRoot)
 {
     //    op.tlpParams = new EnglishTreebankParserParams();
     // CDM: Aug 2004: With new implementation of treebank split categories,
     // I've hardwired this to load English ones.  Otherwise need training data.
     // op.trainOptions.splitters = Generics.newHashSet(Arrays.asList(op.tlpParams.splitters()));
     op.trainOptions.splitters       = ParentAnnotationStats.GetEnglishSplitCategories(treebankRoot);
     op.trainOptions.sisterSplitters = Generics.NewHashSet(Arrays.AsList(op.tlpParams.SisterSplitters()));
     op.SetOptions("-acl03pcfg", "-cnf");
     treeTransformer = new TreeAnnotatorAndBinarizer(op.tlpParams, op.forceCNF, !op.trainOptions.OutsideFactor(), true, op);
     //    BinarizerFactory.TreeAnnotator.setTreebankLang(op.tlpParams);
     treeUnTransformer = new Debinarizer(op.forceCNF);
     collinizer        = op.tlpParams.Collinizer();
     this.op           = op;
 }