Exemple #1
0
        /// <summary>
        /// demoAPI demonstrates other ways of calling the parser with
        /// already tokenized text, or in some cases, raw text that needs to
        /// be tokenized as a single sentence.
        /// </summary>
        /// <remarks>
        /// demoAPI demonstrates other ways of calling the parser with
        /// already tokenized text, or in some cases, raw text that needs to
        /// be tokenized as a single sentence.  Output is handled with a
        /// TreePrint object.  Note that the options used when creating the
        /// TreePrint can determine what results to print out.  Once again,
        /// one can capture the output by passing a PrintWriter to
        /// TreePrint.printTree. This code is for English.
        /// </remarks>
        public static void DemoAPI(LexicalizedParser lp)
        {
            // This option shows parsing a list of correctly tokenized words
            string[]          sent     = new string[] { "This", "is", "an", "easy", "sentence", "." };
            IList <CoreLabel> rawWords = SentenceUtils.ToCoreLabelList(sent);
            Tree parse = lp.Apply(rawWords);

            parse.PennPrint();
            System.Console.Out.WriteLine();
            // This option shows loading and using an explicit tokenizer
            string sent2 = "This is another sentence.";
            ITokenizerFactory <CoreLabel> tokenizerFactory = PTBTokenizer.Factory(new CoreLabelTokenFactory(), string.Empty);
            ITokenizer <CoreLabel>        tok       = tokenizerFactory.GetTokenizer(new StringReader(sent2));
            IList <CoreLabel>             rawWords2 = tok.Tokenize();

            parse = lp.Apply(rawWords2);
            ITreebankLanguagePack tlp = lp.TreebankLanguagePack();
            // PennTreebankLanguagePack for English
            IGrammaticalStructureFactory gsf = tlp.GrammaticalStructureFactory();
            GrammaticalStructure         gs  = gsf.NewGrammaticalStructure(parse);
            IList <TypedDependency>      tdl = gs.TypedDependenciesCCprocessed();

            System.Console.Out.WriteLine(tdl);
            System.Console.Out.WriteLine();
            // You can also use a TreePrint object to print trees and dependencies
            TreePrint tp = new TreePrint("penn,typedDependenciesCollapsed");

            tp.PrintTree(parse);
        }
Exemple #2
0
        public virtual void TestConll2007()
        {
            Tree test = Tree.ValueOf("((S (NP (PRP It)) (VP (VBZ is) (RB not) (ADJP (JJ normal)) (SBAR (IN for) (S (NP (NNS dogs)) (VP (TO to) (VP (VB be) (VP (VBG vomiting)))))))))");

            string[]     words     = new string[] { "It", "is", "not", "normal", "for", "dogs", "to", "be", "vomiting" };
            string[]     tags      = new string[] { "PRP", "VBZ", "RB", "JJ", "IN", "NNS", "TO", "VB", "VBG" };
            TreePrint    treePrint = new TreePrint("conll2007");
            StringWriter writer    = new StringWriter();
            PrintWriter  wrapped   = new PrintWriter(writer);

            treePrint.PrintTree(test, wrapped);
            wrapped.Close();
            string @out = writer.ToString();

            string[] lines = @out.Trim().Split("\n");
            for (int i = 0; i < lines.Length; ++i)
            {
                string[] pieces  = lines[i].Trim().Split("\\s+");
                int      lineNum = int.Parse(pieces[0]);
                NUnit.Framework.Assert.AreEqual((i + 1), lineNum);
                NUnit.Framework.Assert.AreEqual(words[i], pieces[1]);
                NUnit.Framework.Assert.AreEqual(tags[i], pieces[3]);
                NUnit.Framework.Assert.AreEqual(tags[i], pieces[4]);
            }
        }
        /// <summary>Parse a (speech) lattice with the PCFG parser.</summary>
        /// <param name="lr">a lattice to parse</param>
        /// <returns>Whether the lattice could be parsed by the grammar</returns>
        internal virtual bool Parse(HTKLatticeReader lr)
        {
            TreePrint   treePrint = GetTreePrint();
            PrintWriter pwOut     = op.tlpParams.Pw();

            parseSucceeded   = false;
            parseNoMemory    = false;
            parseUnparsable  = false;
            parseSkipped     = false;
            parseFallback    = false;
            whatFailed       = null;
            originalSentence = null;
            if (lr.GetNumStates() > op.testOptions.maxLength + 1)
            {
                // + 1 for boundary symbol
                parseSkipped = true;
                throw new NotSupportedException("Lattice too big: " + lr.GetNumStates());
            }
            if (op.doPCFG)
            {
                if (!pparser.Parse(lr))
                {
                    return(parseSucceeded);
                }
                if (op.testOptions.verbose)
                {
                    pwOut.Println("PParser output");
                    treePrint.PrintTree(GetBestPCFGParse(false), pwOut);
                }
            }
            parseSucceeded = true;
            return(true);
        }
 public virtual void ProduceTrees(PrintWriter pout, int numTrees)
 {
     for (int i = 0; i < numTrees; ++i)
     {
         Tree tree = ProduceTree("ROOT");
         Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ProcessPatternsOnTree(tsurgeons, tree);
         tp.PrintTree(tree, pout);
     }
 }
        /// <summary>Generates the XML content for a constituent tree</summary>
        private static void AddConstituentTreeInfo(Element treeInfo, Tree tree, TreePrint constituentTreePrinter)
        {
            StringWriter treeStrWriter = new StringWriter();

            constituentTreePrinter.PrintTree(tree, new PrintWriter(treeStrWriter, true));
            string temp = treeStrWriter.ToString();

            //log.info(temp);
            treeInfo.AppendChild(temp);
        }
Exemple #6
0
 private static void DisplayTree(Tree t, TreePrint tp, PrintWriter pw)
 {
     if (t == null)
     {
         pw.Println("null");
     }
     else
     {
         tp.PrintTree(t, pw);
     }
 }
Exemple #7
0
 static int Main(string[] args)
 {
     if (args.Length < 1 || !File.Exists(args[0]))
     {
         Console.WriteLine("FATAL: First command line parameter must be an existing file");
         return(-1);
     }
     try
     {
         FileLoader loader = new FileLoader(json_tree.json_tree.encodingClass, json_tree.json_tree.unicodeDetection, args[0]);
         Debug.Assert(!loader.IsBinaryFile());
         string src;
         if (loader.LoadFile(out src))
         {
             json_tree.json_tree jsonParser = new json_tree.json_tree(src, Console.Out);
             bool bMatches = jsonParser.json_text();
             if (bMatches)
             {
                 Console.WriteLine("SUCCESS: Json Parser matched input file '{0}'", args[0]);
                 TreePrint tprint = new TreePrint(Console.Out, src, 60, new NodePrinter(jsonParser).GetNodeName, false);
                 tprint.PrintTree(jsonParser.GetRoot(), 0, 0);
             }
             else
             {
                 Console.WriteLine("FAILURE: Json Parser did not match input file '{0]'", args[0]);
             }
             return(0);
         }
         else
         {
             Console.WriteLine("FATAL: File '{0}' could not be loaded", args[0]);
             return(-1);
         }
     }
     catch (Exception e)
     {
         Console.WriteLine("FATAL: Program terminated by exception '{0}'", e.Message);
         return(-1);
     }
 }
        public virtual void ProcessResults(IParserQuery pq, Tree goldTree, PrintWriter pwErr, PrintWriter pwOut, PrintWriter pwFileOut, PrintWriter pwStats, TreePrint treePrint)
        {
            if (pq.SaidMemMessage())
            {
                saidMemMessage = true;
            }
            Tree             tree;
            IList <IHasWord> sentence = pq.OriginalSentence();

            try
            {
                tree = pq.GetBestParse();
            }
            catch (NoSuchParseException)
            {
                tree = null;
            }
            IList <ScoredObject <Tree> > kbestPCFGTrees = null;

            if (tree != null && kbestPCFG > 0)
            {
                kbestPCFGTrees = pq.GetKBestPCFGParses(kbestPCFG);
            }
            //combo parse goes to pwOut (System.out)
            if (op.testOptions.verbose)
            {
                pwOut.Println("ComboParser best");
                Tree ot = tree;
                if (ot != null && !op.tlpParams.TreebankLanguagePack().IsStartSymbol(ot.Value()))
                {
                    ot = ot.TreeFactory().NewTreeNode(op.tlpParams.TreebankLanguagePack().StartSymbol(), Java.Util.Collections.SingletonList(ot));
                }
                treePrint.PrintTree(ot, pwOut);
            }
            else
            {
                treePrint.PrintTree(tree, pwOut);
            }
            // **OUTPUT**
            // print various n-best like outputs (including 1-best)
            // print various statistics
            if (tree != null)
            {
                if (op.testOptions.printAllBestParses)
                {
                    IList <ScoredObject <Tree> > parses = pq.GetBestPCFGParses();
                    int sz = parses.Count;
                    if (sz > 1)
                    {
                        pwOut.Println("There were " + sz + " best PCFG parses with score " + parses[0].Score() + '.');
                        Tree transGoldTree = collinizer.TransformTree(goldTree);
                        int  iii           = 0;
                        foreach (ScoredObject <Tree> sot in parses)
                        {
                            iii++;
                            Tree tb  = sot.Object();
                            Tree tbd = debinarizer.TransformTree(tb);
                            tbd = subcategoryStripper.TransformTree(tbd);
                            pq.RestoreOriginalWords(tbd);
                            pwOut.Println("PCFG Parse #" + iii + " with score " + tbd.Score());
                            tbd.PennPrint(pwOut);
                            Tree tbtr = collinizer.TransformTree(tbd);
                            // pwOut.println("Tree size = " + tbtr.size() + "; depth = " + tbtr.depth());
                            kGoodLB.Evaluate(tbtr, transGoldTree, pwErr);
                        }
                    }
                }
                else
                {
                    // Huang and Chiang (2006) Algorithm 3 output from the PCFG parser
                    if (op.testOptions.printPCFGkBest > 0 && op.testOptions.outputkBestEquivocation == null)
                    {
                        IList <ScoredObject <Tree> > trees = kbestPCFGTrees.SubList(0, op.testOptions.printPCFGkBest);
                        Tree transGoldTree = collinizer.TransformTree(goldTree);
                        int  i             = 0;
                        foreach (ScoredObject <Tree> tp in trees)
                        {
                            i++;
                            pwOut.Println("PCFG Parse #" + i + " with score " + tp.Score());
                            Tree tbd = tp.Object();
                            tbd.PennPrint(pwOut);
                            Tree tbtr = collinizer.TransformTree(tbd);
                            kGoodLB.Evaluate(tbtr, transGoldTree, pwErr);
                        }
                    }
                    else
                    {
                        // Chart parser (factored) n-best list
                        if (op.testOptions.printFactoredKGood > 0 && pq.HasFactoredParse())
                        {
                            // DZ: debug n best trees
                            IList <ScoredObject <Tree> > trees = pq.GetKGoodFactoredParses(op.testOptions.printFactoredKGood);
                            Tree transGoldTree = collinizer.TransformTree(goldTree);
                            int  ii            = 0;
                            foreach (ScoredObject <Tree> tp in trees)
                            {
                                ii++;
                                pwOut.Println("Factored Parse #" + ii + " with score " + tp.Score());
                                Tree tbd = tp.Object();
                                tbd.PennPrint(pwOut);
                                Tree tbtr = collinizer.TransformTree(tbd);
                                kGoodLB.Evaluate(tbtr, transGoldTree, pwOut);
                            }
                        }
                        else
                        {
                            //1-best output
                            if (pwFileOut != null)
                            {
                                pwFileOut.Println(tree.ToString());
                            }
                        }
                    }
                }
                //Print the derivational entropy
                if (op.testOptions.outputkBestEquivocation != null && op.testOptions.printPCFGkBest > 0)
                {
                    IList <ScoredObject <Tree> > trees = kbestPCFGTrees.SubList(0, op.testOptions.printPCFGkBest);
                    double[] logScores = new double[trees.Count];
                    int      treeId    = 0;
                    foreach (ScoredObject <Tree> kBestTree in trees)
                    {
                        logScores[treeId++] = kBestTree.Score();
                    }
                    //Re-normalize
                    double entropy = 0.0;
                    double denom   = ArrayMath.LogSum(logScores);
                    foreach (double logScore in logScores)
                    {
                        double logPr = logScore - denom;
                        entropy += System.Math.Exp(logPr) * (logPr / System.Math.Log(2));
                    }
                    entropy *= -1;
                    //Convert to bits
                    pwStats.Printf("%f\t%d\t%d\n", entropy, trees.Count, sentence.Count);
                }
            }
            // **EVALUATION**
            // Perform various evaluations specified by the user
            if (tree != null)
            {
                //Strip subcategories and remove punctuation for evaluation
                tree = subcategoryStripper.TransformTree(tree);
                Tree treeFact = collinizer.TransformTree(tree);
                //Setup the gold tree
                if (op.testOptions.verbose)
                {
                    pwOut.Println("Correct parse");
                    treePrint.PrintTree(goldTree, pwOut);
                }
                Tree transGoldTree = collinizer.TransformTree(goldTree);
                if (transGoldTree != null)
                {
                    transGoldTree = subcategoryStripper.TransformTree(transGoldTree);
                }
                //Can't do evaluation in these two cases
                if (transGoldTree == null)
                {
                    pwErr.Println("Couldn't transform gold tree for evaluation, skipping eval. Gold tree was:");
                    goldTree.PennPrint(pwErr);
                    numSkippedEvals++;
                    return;
                }
                else
                {
                    if (treeFact == null)
                    {
                        pwErr.Println("Couldn't transform hypothesis tree for evaluation, skipping eval. Tree was:");
                        tree.PennPrint(pwErr);
                        numSkippedEvals++;
                        return;
                    }
                    else
                    {
                        if (treeFact.Yield().Count != transGoldTree.Yield().Count)
                        {
                            IList <ILabel> fYield = treeFact.Yield();
                            IList <ILabel> gYield = transGoldTree.Yield();
                            pwErr.Println("WARNING: Evaluation could not be performed due to gold/parsed yield mismatch.");
                            pwErr.Printf("  sizes: gold: %d (transf) %d (orig); parsed: %d (transf) %d (orig).%n", gYield.Count, goldTree.Yield().Count, fYield.Count, tree.Yield().Count);
                            pwErr.Println("  gold: " + SentenceUtils.ListToString(gYield, true));
                            pwErr.Println("  pars: " + SentenceUtils.ListToString(fYield, true));
                            numSkippedEvals++;
                            return;
                        }
                    }
                }
                if (topKEvals.Count > 0)
                {
                    IList <Tree> transGuesses = new List <Tree>();
                    int          kbest        = System.Math.Min(op.testOptions.evalPCFGkBest, kbestPCFGTrees.Count);
                    foreach (ScoredObject <Tree> guess in kbestPCFGTrees.SubList(0, kbest))
                    {
                        transGuesses.Add(collinizer.TransformTree(guess.Object()));
                    }
                    foreach (BestOfTopKEval eval in topKEvals)
                    {
                        eval.Evaluate(transGuesses, transGoldTree, pwErr);
                    }
                }
                //PCFG eval
                Tree treePCFG = pq.GetBestPCFGParse();
                if (treePCFG != null)
                {
                    Tree treePCFGeval = collinizer.TransformTree(treePCFG);
                    if (pcfgLB != null)
                    {
                        pcfgLB.Evaluate(treePCFGeval, transGoldTree, pwErr);
                    }
                    if (pcfgChildSpecific != null)
                    {
                        pcfgChildSpecific.Evaluate(treePCFGeval, transGoldTree, pwErr);
                    }
                    if (pcfgLA != null)
                    {
                        pcfgLA.Evaluate(treePCFGeval, transGoldTree, pwErr);
                    }
                    if (pcfgCB != null)
                    {
                        pcfgCB.Evaluate(treePCFGeval, transGoldTree, pwErr);
                    }
                    if (pcfgDA != null)
                    {
                        // Re-index the leaves after Collinization, stripping traces, etc.
                        treePCFGeval.IndexLeaves(true);
                        transGoldTree.IndexLeaves(true);
                        pcfgDA.Evaluate(treePCFGeval, transGoldTree, pwErr);
                    }
                    if (pcfgTA != null)
                    {
                        pcfgTA.Evaluate(treePCFGeval, transGoldTree, pwErr);
                    }
                    if (pcfgLL != null && pq.GetPCFGParser() != null)
                    {
                        pcfgLL.RecordScore(pq.GetPCFGParser(), pwErr);
                    }
                    if (pcfgRUO != null)
                    {
                        pcfgRUO.Evaluate(treePCFGeval, transGoldTree, pwErr);
                    }
                    if (pcfgCUO != null)
                    {
                        pcfgCUO.Evaluate(treePCFGeval, transGoldTree, pwErr);
                    }
                    if (pcfgCatE != null)
                    {
                        pcfgCatE.Evaluate(treePCFGeval, transGoldTree, pwErr);
                    }
                }
                //Dependency eval
                // todo: is treeDep really useful here, or should we really use depDAEval tree (debinarized) throughout? We use it for parse, and it sure seems like we could use it for tag eval, but maybe not factDA?
                Tree treeDep = pq.GetBestDependencyParse(false);
                if (treeDep != null)
                {
                    Tree goldTreeB    = binarizerOnly.TransformTree(goldTree);
                    Tree goldTreeEval = goldTree.DeepCopy();
                    goldTreeEval.IndexLeaves(true);
                    goldTreeEval.PercolateHeads(op.Langpack().HeadFinder());
                    Tree depDAEval = pq.GetBestDependencyParse(true);
                    depDAEval.IndexLeaves(true);
                    depDAEval.PercolateHeadIndices();
                    if (depDA != null)
                    {
                        depDA.Evaluate(depDAEval, goldTreeEval, pwErr);
                    }
                    if (depTA != null)
                    {
                        Tree undoneTree = debinarizer.TransformTree(treeDep);
                        undoneTree = subcategoryStripper.TransformTree(undoneTree);
                        pq.RestoreOriginalWords(undoneTree);
                        // pwErr.println("subcategoryStripped tree: " + undoneTree.toStructureDebugString());
                        depTA.Evaluate(undoneTree, goldTree, pwErr);
                    }
                    if (depLL != null && pq.GetDependencyParser() != null)
                    {
                        depLL.RecordScore(pq.GetDependencyParser(), pwErr);
                    }
                    Tree factTreeB;
                    if (pq.HasFactoredParse())
                    {
                        factTreeB = pq.GetBestFactoredParse();
                    }
                    else
                    {
                        factTreeB = treeDep;
                    }
                    if (factDA != null)
                    {
                        factDA.Evaluate(factTreeB, goldTreeB, pwErr);
                    }
                }
                //Factored parser (1best) eval
                if (factLB != null)
                {
                    factLB.Evaluate(treeFact, transGoldTree, pwErr);
                }
                if (factChildSpecific != null)
                {
                    factChildSpecific.Evaluate(treeFact, transGoldTree, pwErr);
                }
                if (factLA != null)
                {
                    factLA.Evaluate(treeFact, transGoldTree, pwErr);
                }
                if (factTA != null)
                {
                    factTA.Evaluate(tree, boundaryRemover.TransformTree(goldTree), pwErr);
                }
                if (factLL != null && pq.GetFactoredParser() != null)
                {
                    factLL.RecordScore(pq.GetFactoredParser(), pwErr);
                }
                if (factCB != null)
                {
                    factCB.Evaluate(treeFact, transGoldTree, pwErr);
                }
                foreach (IEval eval_1 in evals)
                {
                    eval_1.Evaluate(treeFact, transGoldTree, pwErr);
                }
                if (parserQueryEvals != null)
                {
                    foreach (IParserQueryEval eval in parserQueryEvals)
                    {
                        eval_1.Evaluate(pq, transGoldTree, pwErr);
                    }
                }
                if (op.testOptions.evalb)
                {
                    // empty out scores just in case
                    NanScores(tree);
                    EvalbFormatWriter.WriteEVALBline(treeFact, transGoldTree);
                }
            }
            pwErr.Println();
        }
            // end parseFiles
            public virtual void ProcessResults(IParserQuery parserQuery, int num, PrintWriter pwo)
        {
            if (parserQuery.ParseSkipped())
            {
                IList <IHasWord> sentence = parserQuery.OriginalSentence();
                if (sentence != null)
                {
                    numWords -= sentence.Count;
                }
                numSkipped++;
            }
            if (parserQuery.ParseNoMemory())
            {
                numNoMemory++;
            }
            if (parserQuery.ParseUnparsable())
            {
                numUnparsable++;
            }
            if (parserQuery.ParseFallback())
            {
                numFallback++;
            }
            saidMemMessage = saidMemMessage || parserQuery.SaidMemMessage();
            Tree ansTree = parserQuery.GetBestParse();

            if (ansTree == null)
            {
                pwo.Println("(())");
                return;
            }
            if (pcfgLL != null && parserQuery.GetPCFGParser() != null)
            {
                pcfgLL.RecordScore(parserQuery.GetPCFGParser(), pwErr);
            }
            if (depLL != null && parserQuery.GetDependencyParser() != null)
            {
                depLL.RecordScore(parserQuery.GetDependencyParser(), pwErr);
            }
            if (factLL != null && parserQuery.GetFactoredParser() != null)
            {
                factLL.RecordScore(parserQuery.GetFactoredParser(), pwErr);
            }
            try
            {
                treePrint.PrintTree(ansTree, int.ToString(num), pwo);
            }
            catch (Exception re)
            {
                pwErr.Println("TreePrint.printTree skipped: out of memory (or other error)");
                Sharpen.Runtime.PrintStackTrace(re, pwErr);
                numNoMemory++;
                try
                {
                    treePrint.PrintTree(null, int.ToString(num), pwo);
                }
                catch (Exception e)
                {
                    pwErr.Println("Sentence skipped: out of memory or error calling TreePrint.");
                    pwo.Println("(())");
                    Sharpen.Runtime.PrintStackTrace(e, pwErr);
                }
            }
            // crude addition of k-best tree printing
            // TODO: interface with the RerankingParserQuery
            if (op.testOptions.printPCFGkBest > 0 && parserQuery.GetPCFGParser() != null && parserQuery.GetPCFGParser().HasParse())
            {
                IList <ScoredObject <Tree> > trees = parserQuery.GetKBestPCFGParses(op.testOptions.printPCFGkBest);
                treePrint.PrintTrees(trees, int.ToString(num), pwo);
            }
            else
            {
                if (op.testOptions.printFactoredKGood > 0 && parserQuery.GetFactoredParser() != null && parserQuery.GetFactoredParser().HasParse())
                {
                    // DZ: debug n best trees
                    IList <ScoredObject <Tree> > trees = parserQuery.GetKGoodFactoredParses(op.testOptions.printFactoredKGood);
                    treePrint.PrintTrees(trees, int.ToString(num), pwo);
                }
            }
        }
        /// <summary>Parse a sentence represented as a List of tokens.</summary>
        /// <remarks>
        /// Parse a sentence represented as a List of tokens.
        /// The text must already have been tokenized and
        /// normalized into tokens that are appropriate to the treebank
        /// which was used to train the parser.  The tokens can be of
        /// multiple types, and the list items need not be homogeneous as to type
        /// (in particular, only some words might be given tags):
        /// <ul>
        /// <li>If a token implements HasWord, then the word to be parsed is
        /// given by its word() value.</li>
        /// <li>If a token implements HasTag and the tag() value is not
        /// null or the empty String, then the parser is strongly advised to assign
        /// a part of speech tag that <i>begins</i> with this String.</li>
        /// </ul>
        /// </remarks>
        /// <param name="sentence">The sentence to parse</param>
        /// <returns>true Iff the sentence was accepted by the grammar</returns>
        /// <exception cref="System.NotSupportedException">
        /// If the Sentence is too long or
        /// of zero length or the parse
        /// otherwise fails for resource reasons
        /// </exception>
        private bool ParseInternal <_T0>(IList <_T0> sentence)
            where _T0 : IHasWord
        {
            parseSucceeded   = false;
            parseNoMemory    = false;
            parseUnparsable  = false;
            parseSkipped     = false;
            parseFallback    = false;
            whatFailed       = null;
            addedPunct       = false;
            originalSentence = sentence;
            int length = sentence.Count;

            if (length == 0)
            {
                parseSkipped = true;
                throw new NotSupportedException("Can't parse a zero-length sentence!");
            }
            IList <IHasWord> sentenceB;

            if (op.wordFunction != null)
            {
                sentenceB = Generics.NewArrayList();
                foreach (IHasWord word in originalSentence)
                {
                    if (word is ILabel)
                    {
                        ILabel label    = (ILabel)word;
                        ILabel newLabel = label.LabelFactory().NewLabel(label);
                        if (newLabel is IHasWord)
                        {
                            sentenceB.Add((IHasWord)newLabel);
                        }
                        else
                        {
                            throw new AssertionError("This should have been a HasWord");
                        }
                    }
                    else
                    {
                        if (word is IHasTag)
                        {
                            TaggedWord tw = new TaggedWord(word.Word(), ((IHasTag)word).Tag());
                            sentenceB.Add(tw);
                        }
                        else
                        {
                            sentenceB.Add(new Word(word.Word()));
                        }
                    }
                }
                foreach (IHasWord word_1 in sentenceB)
                {
                    word_1.SetWord(op.wordFunction.Apply(word_1.Word()));
                }
            }
            else
            {
                sentenceB = new List <IHasWord>(sentence);
            }
            if (op.testOptions.addMissingFinalPunctuation)
            {
                addedPunct = AddSentenceFinalPunctIfNeeded(sentenceB, length);
            }
            if (length > op.testOptions.maxLength)
            {
                parseSkipped = true;
                throw new NotSupportedException("Sentence too long: length " + length);
            }
            TreePrint   treePrint = GetTreePrint();
            PrintWriter pwOut     = op.tlpParams.Pw();

            //Insert the boundary symbol
            if (sentence[0] is CoreLabel)
            {
                CoreLabel boundary = new CoreLabel();
                boundary.SetWord(LexiconConstants.Boundary);
                boundary.SetValue(LexiconConstants.Boundary);
                boundary.SetTag(LexiconConstants.BoundaryTag);
                boundary.SetIndex(sentence.Count + 1);
                //1-based indexing used in the parser
                sentenceB.Add(boundary);
            }
            else
            {
                sentenceB.Add(new TaggedWord(LexiconConstants.Boundary, LexiconConstants.BoundaryTag));
            }
            if (Thread.Interrupted())
            {
                throw new RuntimeInterruptedException();
            }
            if (op.doPCFG)
            {
                if (!pparser.Parse(sentenceB))
                {
                    return(parseSucceeded);
                }
                if (op.testOptions.verbose)
                {
                    pwOut.Println("PParser output");
                    // getBestPCFGParse(false).pennPrint(pwOut); // with scores on nodes
                    treePrint.PrintTree(GetBestPCFGParse(false), pwOut);
                }
            }
            // without scores on nodes
            if (Thread.Interrupted())
            {
                throw new RuntimeInterruptedException();
            }
            if (op.doDep && !op.testOptions.useFastFactored)
            {
                if (!dparser.Parse(sentenceB))
                {
                    return(parseSucceeded);
                }
                // cdm nov 2006: should move these printing bits to the main printing section,
                // so don't calculate the best parse twice!
                if (op.testOptions.verbose)
                {
                    pwOut.Println("DParser output");
                    treePrint.PrintTree(dparser.GetBestParse(), pwOut);
                }
            }
            if (Thread.Interrupted())
            {
                throw new RuntimeInterruptedException();
            }
            if (op.doPCFG && op.doDep)
            {
                if (!bparser.Parse(sentenceB))
                {
                    return(parseSucceeded);
                }
                else
                {
                    parseSucceeded = true;
                }
            }
            return(true);
        }
Exemple #11
0
        /// <summary>
        /// Called by determineHead and may be overridden in subclasses
        /// if special treatment is necessary for particular categories.
        /// </summary>
        /// <param name="t">The tre to determine the head daughter of</param>
        /// <param name="parent">The parent of t (or may be null)</param>
        /// <returns>The head daughter of t</returns>
        protected internal virtual Tree DetermineNonTrivialHead(Tree t, Tree parent)
        {
            Tree   theHead   = null;
            string motherCat = tlp.BasicCategory(t.Label().Value());

            if (motherCat.StartsWith("@"))
            {
                motherCat = Sharpen.Runtime.Substring(motherCat, 1);
            }
            if (Debug)
            {
                log.Info("Looking for head of " + t.Label() + "; value is |" + t.Label().Value() + "|, " + " baseCat is |" + motherCat + '|');
            }
            // We know we have nonterminals underneath
            // (a bit of a Penn Treebank assumption, but).
            // Look at label.
            // a total special case....
            // first look for POS tag at end
            // this appears to be redundant in the Collins case since the rule already would do that
            //    Tree lastDtr = t.lastChild();
            //    if (tlp.basicCategory(lastDtr.label().value()).equals("POS")) {
            //      theHead = lastDtr;
            //    } else {
            string[][] how  = nonTerminalInfo[motherCat];
            Tree[]     kids = t.Children();
            if (how == null)
            {
                if (Debug)
                {
                    log.Info("Warning: No rule found for " + motherCat + " (first char: " + motherCat[0] + ')');
                    log.Info("Known nonterms are: " + nonTerminalInfo.Keys);
                }
                if (defaultRule != null)
                {
                    if (Debug)
                    {
                        log.Info("  Using defaultRule");
                    }
                    return(TraverseLocate(kids, defaultRule, true));
                }
                else
                {
                    // TreePrint because TreeGraphNode only prints the node number,
                    // doesn't print the tree structure
                    TreePrint    printer = new TreePrint("penn");
                    StringWriter buffer  = new StringWriter();
                    printer.PrintTree(t, new PrintWriter(buffer));
                    // TODO: we could get really fancy and define our own
                    // exception class to represent this
                    throw new ArgumentException("No head rule defined for " + motherCat + " using " + this.GetType() + " in " + buffer.ToString());
                }
            }
            for (int i = 0; i < how.Length; i++)
            {
                bool lastResort = (i == how.Length - 1);
                theHead = TraverseLocate(kids, how[i], lastResort);
                if (theHead != null)
                {
                    break;
                }
            }
            if (Debug)
            {
                log.Info("  Chose " + theHead.Label());
            }
            return(theHead);
        }