public static IList <Tree> GetTopParsesForOneTree(LexicalizedParser parser, int dvKBest, Tree tree, ITreeTransformer transformer)
        {
            IParserQuery pq       = parser.ParserQuery();
            IList <Word> sentence = tree.YieldWords();

            // Since the trees are binarized and otherwise manipulated, we
            // need to chop off the last word in order to remove the end of
            // sentence symbol
            if (sentence.Count <= 1)
            {
                return(null);
            }
            sentence = sentence.SubList(0, sentence.Count - 1);
            if (!pq.Parse(sentence))
            {
                log.Info("Failed to use the given parser to reparse sentence \"" + sentence + "\"");
                return(null);
            }
            IList <Tree> parses = new List <Tree>();
            IList <ScoredObject <Tree> > bestKParses = pq.GetKBestPCFGParses(dvKBest);

            foreach (ScoredObject <Tree> so in bestKParses)
            {
                Tree result = so.Object();
                if (transformer != null)
                {
                    result = transformer.TransformTree(result);
                }
                parses.Add(result);
            }
            return(parses);
        }
示例#2
0
 public RerankingParserQuery(Options op, IParserQuery parserQuery, IReranker reranker)
 {
     this.op            = op;
     this.parserQuery   = parserQuery;
     this.reranker      = reranker;
     this.rerankerKBest = op.rerankerKBest;
 }
示例#3
0
        private IList <Tree> DoOneSentence(IList <ParserConstraint> constraints, IList <CoreLabel> words)
        {
            IParserQuery pq = parser.ParserQuery();

            pq.SetConstraints(constraints);
            pq.Parse(words);
            IList <Tree> trees = Generics.NewLinkedList();

            try
            {
                // Use bestParse if kBest is set to 1.
                if (this.kBest == 1)
                {
                    Tree t = pq.GetBestParse();
                    if (t == null)
                    {
                        log.Warn("Parsing of sentence failed.  " + "Will ignore and continue: " + SentenceUtils.ListToString(words));
                    }
                    else
                    {
                        double score = pq.GetBestScore();
                        t.SetScore(score % -10000.0);
                        trees.Add(t);
                    }
                }
                else
                {
                    IList <ScoredObject <Tree> > scoredObjects = pq.GetKBestParses(this.kBest);
                    if (scoredObjects == null || scoredObjects.Count < 1)
                    {
                        log.Warn("Parsing of sentence failed.  " + "Will ignore and continue: " + SentenceUtils.ListToString(words));
                    }
                    else
                    {
                        foreach (ScoredObject <Tree> so in scoredObjects)
                        {
                            // -10000 denotes unknown words
                            Tree tree = so.Object();
                            tree.SetScore(so.Score() % -10000.0);
                            trees.Add(tree);
                        }
                    }
                }
            }
            catch (OutOfMemoryException e)
            {
                log.Error(e);
                // Beware that we can now get an OOM in logging, too.
                log.Warn("Parsing of sentence ran out of memory (length=" + words.Count + ").  " + "Will ignore and try to continue.");
            }
            catch (NoSuchParseException)
            {
                log.Warn("Parsing of sentence failed, possibly because of out of memory.  " + "Will ignore and continue: " + SentenceUtils.ListToString(words));
            }
            return(trees);
        }
        public virtual IParserQuery Process <_T0>(IList <_T0> sentence)
            where _T0 : IHasWord
        {
            IParserQuery pq = pqFactory.ParserQuery();

            if (pwErr != null)
            {
                pq.ParseAndReport(sentence, pwErr);
            }
            else
            {
                pq.Parse(sentence);
            }
            return(pq);
        }
示例#5
0
        public async Task <List <ParsedLink> > Parse(IParserQuery query)
        {
            _depth       = query.Depth;
            _startUrl    = query.StartUrl;
            _startSource = query.StartSource;

            if (_depth < 0)
            {
                Console.WriteLine("Maximum depth can not be negative!");
                return(_parsedLinks);
            }

            if (_depth == 0)
            {
                Console.WriteLine("Maximum depth is reached!");
                return(_parsedLinks);
            }

            await DownloadAndSaveAsync(_startUrl);

            return(_parsedLinks);
        }
            public override void Run()
            {
                bool         successful;
                IParserQuery parserQuery = this._enclosing.parser.ParserQuery();

                try
                {
                    successful = parserQuery.Parse(this.sentence);
                }
                catch (Exception)
                {
                    this._enclosing.StopProgressMonitor();
                    JOptionPane.ShowMessageDialog(this._enclosing, "Could not parse selected sentence\n(sentence probably too long)", null, JOptionPane.ErrorMessage);
                    this._enclosing.SetStatus("Error parsing");
                    return;
                }
                this._enclosing.StopProgressMonitor();
                this._enclosing.SetStatus("Done");
                if (successful)
                {
                    // display the best parse
                    Tree tree = parserQuery.GetBestParse();
                    //tree.pennPrint();
                    this._enclosing.treePanel.SetTree(tree);
                    this._enclosing.clearButton.SetEnabled(true);
                }
                else
                {
                    JOptionPane.ShowMessageDialog(this._enclosing, "Could not parse selected sentence", null, JOptionPane.ErrorMessage);
                    this._enclosing.SetStatus("Error parsing");
                    this._enclosing.treePanel.SetTree(null);
                    this._enclosing.clearButton.SetEnabled(false);
                }
                if (this._enclosing.scrollWhenDone)
                {
                    this._enclosing.ScrollForward();
                }
            }
        public virtual void Evaluate(IParserQuery query, Tree gold, PrintWriter pw)
        {
            if (!(query is ShiftReduceParserQuery))
            {
                throw new ArgumentException("This evaluator only works for the ShiftReduceParser");
            }
            ShiftReduceParserQuery srquery = (ShiftReduceParserQuery)query;

            try
            {
                switch (mode)
                {
                case TreeRecorder.Mode.Binarized:
                {
                    @out.Write(srquery.GetBestBinarizedParse().ToString());
                    break;
                }

                case TreeRecorder.Mode.Debinarized:
                {
                    @out.Write(srquery.debinarized.ToString());
                    break;
                }

                default:
                {
                    throw new ArgumentException("Unknown mode " + mode);
                }
                }
                @out.NewLine();
            }
            catch (IOException e)
            {
                throw new RuntimeIOException(e);
            }
        }
        /// <exception cref="System.IO.IOException"/>
        public static void Main(string[] args)
        {
            string         modelPath          = null;
            string         outputPath         = null;
            string         inputPath          = null;
            string         testTreebankPath   = null;
            IFileFilter    testTreebankFilter = null;
            IList <string> unusedArgs         = Generics.NewArrayList();

            for (int argIndex = 0; argIndex < args.Length;)
            {
                if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-model"))
                {
                    modelPath = args[argIndex + 1];
                    argIndex += 2;
                }
                else
                {
                    if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-output"))
                    {
                        outputPath = args[argIndex + 1];
                        argIndex  += 2;
                    }
                    else
                    {
                        if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-input"))
                        {
                            inputPath = args[argIndex + 1];
                            argIndex += 2;
                        }
                        else
                        {
                            if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-testTreebank"))
                            {
                                Pair <string, IFileFilter> treebankDescription = ArgUtils.GetTreebankDescription(args, argIndex, "-testTreebank");
                                argIndex           = argIndex + ArgUtils.NumSubArgs(args, argIndex) + 1;
                                testTreebankPath   = treebankDescription.First();
                                testTreebankFilter = treebankDescription.Second();
                            }
                            else
                            {
                                unusedArgs.Add(args[argIndex++]);
                            }
                        }
                    }
                }
            }
            string[]          newArgs = Sharpen.Collections.ToArray(unusedArgs, new string[unusedArgs.Count]);
            LexicalizedParser parser  = ((LexicalizedParser)LexicalizedParser.LoadModel(modelPath, newArgs));
            DVModel           model   = DVParser.GetModelFromLexicalizedParser(parser);
            File outputFile           = new File(outputPath);

            FileSystem.CheckNotExistsOrFail(outputFile);
            FileSystem.MkdirOrFail(outputFile);
            int count = 0;

            if (inputPath != null)
            {
                Reader input = new BufferedReader(new FileReader(inputPath));
                DocumentPreprocessor processor = new DocumentPreprocessor(input);
                foreach (IList <IHasWord> sentence in processor)
                {
                    count++;
                    // index from 1
                    IParserQuery pq = parser.ParserQuery();
                    if (!(pq is RerankingParserQuery))
                    {
                        throw new ArgumentException("Expected a RerankingParserQuery");
                    }
                    RerankingParserQuery rpq = (RerankingParserQuery)pq;
                    if (!rpq.Parse(sentence))
                    {
                        throw new Exception("Unparsable sentence: " + sentence);
                    }
                    IRerankerQuery reranker = rpq.RerankerQuery();
                    if (!(reranker is DVModelReranker.Query))
                    {
                        throw new ArgumentException("Expected a DVModelReranker");
                    }
                    DeepTree deepTree = ((DVModelReranker.Query)reranker).GetDeepTrees()[0];
                    IdentityHashMap <Tree, SimpleMatrix> vectors = deepTree.GetVectors();
                    foreach (KeyValuePair <Tree, SimpleMatrix> entry in vectors)
                    {
                        log.Info(entry.Key + "   " + entry.Value);
                    }
                    FileWriter     fout = new FileWriter(outputPath + File.separator + "sentence" + count + ".txt");
                    BufferedWriter bout = new BufferedWriter(fout);
                    bout.Write(SentenceUtils.ListToString(sentence));
                    bout.NewLine();
                    bout.Write(deepTree.GetTree().ToString());
                    bout.NewLine();
                    foreach (IHasWord word in sentence)
                    {
                        OutputMatrix(bout, model.GetWordVector(word.Word()));
                    }
                    Tree rootTree = FindRootTree(vectors);
                    OutputTreeMatrices(bout, rootTree, vectors);
                    bout.Flush();
                    fout.Close();
                }
            }
        }
        /// <summary>Test the parser on a treebank.</summary>
        /// <remarks>
        /// Test the parser on a treebank. Parses will be written to stdout, and
        /// various other information will be written to stderr and stdout,
        /// particularly if <code>op.testOptions.verbose</code> is true.
        /// </remarks>
        /// <param name="testTreebank">The treebank to parse</param>
        /// <returns>
        /// The labeled precision/recall F<sub>1</sub> (EVALB measure)
        /// of the parser on the treebank.
        /// </returns>
        public virtual double TestOnTreebank(Treebank testTreebank)
        {
            log.Info("Testing on treebank");
            Timing    treebankTotalTimer        = new Timing();
            TreePrint treePrint                 = op.testOptions.TreePrint(op.tlpParams);
            ITreebankLangParserParams tlpParams = op.tlpParams;
            ITreebankLanguagePack     tlp       = op.Langpack();
            PrintWriter pwOut;
            PrintWriter pwErr;

            if (op.testOptions.quietEvaluation)
            {
                NullOutputStream quiet = new NullOutputStream();
                pwOut = tlpParams.Pw(quiet);
                pwErr = tlpParams.Pw(quiet);
            }
            else
            {
                pwOut = tlpParams.Pw();
                pwErr = tlpParams.Pw(System.Console.Error);
            }
            if (op.testOptions.verbose)
            {
                pwErr.Print("Testing ");
                pwErr.Println(testTreebank.TextualSummary(tlp));
            }
            if (op.testOptions.evalb)
            {
                EvalbFormatWriter.InitEVALBfiles(tlpParams);
            }
            PrintWriter pwFileOut = null;

            if (op.testOptions.writeOutputFiles)
            {
                string fname = op.testOptions.outputFilesPrefix + "." + op.testOptions.outputFilesExtension;
                try
                {
                    pwFileOut = op.tlpParams.Pw(new FileOutputStream(fname));
                }
                catch (IOException ioe)
                {
                    Sharpen.Runtime.PrintStackTrace(ioe);
                }
            }
            PrintWriter pwStats = null;

            if (op.testOptions.outputkBestEquivocation != null)
            {
                try
                {
                    pwStats = op.tlpParams.Pw(new FileOutputStream(op.testOptions.outputkBestEquivocation));
                }
                catch (IOException ioe)
                {
                    Sharpen.Runtime.PrintStackTrace(ioe);
                }
            }
            if (op.testOptions.testingThreads != 1)
            {
                MulticoreWrapper <IList <IHasWord>, IParserQuery> wrapper = new MulticoreWrapper <IList <IHasWord>, IParserQuery>(op.testOptions.testingThreads, new ParsingThreadsafeProcessor(pqFactory, pwErr));
                LinkedList <Tree> goldTrees = new LinkedList <Tree>();
                foreach (Tree goldTree in testTreebank)
                {
                    IList <IHasWord> sentence = GetInputSentence(goldTree);
                    goldTrees.Add(goldTree);
                    pwErr.Println("Parsing [len. " + sentence.Count + "]: " + SentenceUtils.ListToString(sentence));
                    wrapper.Put(sentence);
                    while (wrapper.Peek())
                    {
                        IParserQuery pq = wrapper.Poll();
                        goldTree = goldTrees.Poll();
                        ProcessResults(pq, goldTree, pwErr, pwOut, pwFileOut, pwStats, treePrint);
                    }
                }
                // for tree iterator
                wrapper.Join();
                while (wrapper.Peek())
                {
                    IParserQuery pq         = wrapper.Poll();
                    Tree         goldTree_1 = goldTrees.Poll();
                    ProcessResults(pq, goldTree_1, pwErr, pwOut, pwFileOut, pwStats, treePrint);
                }
            }
            else
            {
                IParserQuery pq = pqFactory.ParserQuery();
                foreach (Tree goldTree in testTreebank)
                {
                    IList <CoreLabel> sentence = GetInputSentence(goldTree);
                    pwErr.Println("Parsing [len. " + sentence.Count + "]: " + SentenceUtils.ListToString(sentence));
                    pq.ParseAndReport(sentence, pwErr);
                    ProcessResults(pq, goldTree, pwErr, pwOut, pwFileOut, pwStats, treePrint);
                }
            }
            // for tree iterator
            //Done parsing...print the results of the evaluations
            treebankTotalTimer.Done("Testing on treebank");
            if (op.testOptions.quietEvaluation)
            {
                pwErr = tlpParams.Pw(System.Console.Error);
            }
            if (saidMemMessage)
            {
                ParserUtils.PrintOutOfMemory(pwErr);
            }
            if (op.testOptions.evalb)
            {
                EvalbFormatWriter.CloseEVALBfiles();
            }
            if (numSkippedEvals != 0)
            {
                pwErr.Printf("Unable to evaluate %d parser hypotheses due to yield mismatch\n", numSkippedEvals);
            }
            // only created here so we know what parser types are supported...
            IParserQuery pq_1 = pqFactory.ParserQuery();

            if (summary)
            {
                if (pcfgLB != null)
                {
                    pcfgLB.Display(false, pwErr);
                }
                if (pcfgChildSpecific != null)
                {
                    pcfgChildSpecific.Display(false, pwErr);
                }
                if (pcfgLA != null)
                {
                    pcfgLA.Display(false, pwErr);
                }
                if (pcfgCB != null)
                {
                    pcfgCB.Display(false, pwErr);
                }
                if (pcfgDA != null)
                {
                    pcfgDA.Display(false, pwErr);
                }
                if (pcfgTA != null)
                {
                    pcfgTA.Display(false, pwErr);
                }
                if (pcfgLL != null && pq_1.GetPCFGParser() != null)
                {
                    pcfgLL.Display(false, pwErr);
                }
                if (depDA != null)
                {
                    depDA.Display(false, pwErr);
                }
                if (depTA != null)
                {
                    depTA.Display(false, pwErr);
                }
                if (depLL != null && pq_1.GetDependencyParser() != null)
                {
                    depLL.Display(false, pwErr);
                }
                if (factLB != null)
                {
                    factLB.Display(false, pwErr);
                }
                if (factChildSpecific != null)
                {
                    factChildSpecific.Display(false, pwErr);
                }
                if (factLA != null)
                {
                    factLA.Display(false, pwErr);
                }
                if (factCB != null)
                {
                    factCB.Display(false, pwErr);
                }
                if (factDA != null)
                {
                    factDA.Display(false, pwErr);
                }
                if (factTA != null)
                {
                    factTA.Display(false, pwErr);
                }
                if (factLL != null && pq_1.GetFactoredParser() != null)
                {
                    factLL.Display(false, pwErr);
                }
                if (pcfgCatE != null)
                {
                    pcfgCatE.Display(false, pwErr);
                }
                foreach (IEval eval in evals)
                {
                    eval.Display(false, pwErr);
                }
                foreach (BestOfTopKEval eval_1 in topKEvals)
                {
                    eval_1.Display(false, pwErr);
                }
            }
            // these ones only have a display mode, so display if turned on!!
            if (pcfgRUO != null)
            {
                pcfgRUO.Display(true, pwErr);
            }
            if (pcfgCUO != null)
            {
                pcfgCUO.Display(true, pwErr);
            }
            if (tsv)
            {
                NumberFormat nf = new DecimalFormat("0.00");
                pwErr.Println("factF1\tfactDA\tfactEx\tpcfgF1\tdepDA\tfactTA\tnum");
                if (factLB != null)
                {
                    pwErr.Print(nf.Format(factLB.GetEvalbF1Percent()));
                }
                pwErr.Print("\t");
                if (pq_1.GetDependencyParser() != null && factDA != null)
                {
                    pwErr.Print(nf.Format(factDA.GetEvalbF1Percent()));
                }
                pwErr.Print("\t");
                if (factLB != null)
                {
                    pwErr.Print(nf.Format(factLB.GetExactPercent()));
                }
                pwErr.Print("\t");
                if (pcfgLB != null)
                {
                    pwErr.Print(nf.Format(pcfgLB.GetEvalbF1Percent()));
                }
                pwErr.Print("\t");
                if (pq_1.GetDependencyParser() != null && depDA != null)
                {
                    pwErr.Print(nf.Format(depDA.GetEvalbF1Percent()));
                }
                pwErr.Print("\t");
                if (pq_1.GetPCFGParser() != null && factTA != null)
                {
                    pwErr.Print(nf.Format(factTA.GetEvalbF1Percent()));
                }
                pwErr.Print("\t");
                if (factLB != null)
                {
                    pwErr.Print(factLB.GetNum());
                }
                pwErr.Println();
            }
            double f1 = 0.0;

            if (factLB != null)
            {
                f1 = factLB.GetEvalbF1();
            }
            //Close files (if necessary)
            if (pwFileOut != null)
            {
                pwFileOut.Close();
            }
            if (pwStats != null)
            {
                pwStats.Close();
            }
            if (parserQueryEvals != null)
            {
                foreach (IParserQueryEval parserQueryEval in parserQueryEvals)
                {
                    parserQueryEval.Display(false, pwErr);
                }
            }
            return(f1);
        }
        public virtual void ProcessResults(IParserQuery pq, Tree goldTree, PrintWriter pwErr, PrintWriter pwOut, PrintWriter pwFileOut, PrintWriter pwStats, TreePrint treePrint)
        {
            if (pq.SaidMemMessage())
            {
                saidMemMessage = true;
            }
            Tree             tree;
            IList <IHasWord> sentence = pq.OriginalSentence();

            try
            {
                tree = pq.GetBestParse();
            }
            catch (NoSuchParseException)
            {
                tree = null;
            }
            IList <ScoredObject <Tree> > kbestPCFGTrees = null;

            if (tree != null && kbestPCFG > 0)
            {
                kbestPCFGTrees = pq.GetKBestPCFGParses(kbestPCFG);
            }
            //combo parse goes to pwOut (System.out)
            if (op.testOptions.verbose)
            {
                pwOut.Println("ComboParser best");
                Tree ot = tree;
                if (ot != null && !op.tlpParams.TreebankLanguagePack().IsStartSymbol(ot.Value()))
                {
                    ot = ot.TreeFactory().NewTreeNode(op.tlpParams.TreebankLanguagePack().StartSymbol(), Java.Util.Collections.SingletonList(ot));
                }
                treePrint.PrintTree(ot, pwOut);
            }
            else
            {
                treePrint.PrintTree(tree, pwOut);
            }
            // **OUTPUT**
            // print various n-best like outputs (including 1-best)
            // print various statistics
            if (tree != null)
            {
                if (op.testOptions.printAllBestParses)
                {
                    IList <ScoredObject <Tree> > parses = pq.GetBestPCFGParses();
                    int sz = parses.Count;
                    if (sz > 1)
                    {
                        pwOut.Println("There were " + sz + " best PCFG parses with score " + parses[0].Score() + '.');
                        Tree transGoldTree = collinizer.TransformTree(goldTree);
                        int  iii           = 0;
                        foreach (ScoredObject <Tree> sot in parses)
                        {
                            iii++;
                            Tree tb  = sot.Object();
                            Tree tbd = debinarizer.TransformTree(tb);
                            tbd = subcategoryStripper.TransformTree(tbd);
                            pq.RestoreOriginalWords(tbd);
                            pwOut.Println("PCFG Parse #" + iii + " with score " + tbd.Score());
                            tbd.PennPrint(pwOut);
                            Tree tbtr = collinizer.TransformTree(tbd);
                            // pwOut.println("Tree size = " + tbtr.size() + "; depth = " + tbtr.depth());
                            kGoodLB.Evaluate(tbtr, transGoldTree, pwErr);
                        }
                    }
                }
                else
                {
                    // Huang and Chiang (2006) Algorithm 3 output from the PCFG parser
                    if (op.testOptions.printPCFGkBest > 0 && op.testOptions.outputkBestEquivocation == null)
                    {
                        IList <ScoredObject <Tree> > trees = kbestPCFGTrees.SubList(0, op.testOptions.printPCFGkBest);
                        Tree transGoldTree = collinizer.TransformTree(goldTree);
                        int  i             = 0;
                        foreach (ScoredObject <Tree> tp in trees)
                        {
                            i++;
                            pwOut.Println("PCFG Parse #" + i + " with score " + tp.Score());
                            Tree tbd = tp.Object();
                            tbd.PennPrint(pwOut);
                            Tree tbtr = collinizer.TransformTree(tbd);
                            kGoodLB.Evaluate(tbtr, transGoldTree, pwErr);
                        }
                    }
                    else
                    {
                        // Chart parser (factored) n-best list
                        if (op.testOptions.printFactoredKGood > 0 && pq.HasFactoredParse())
                        {
                            // DZ: debug n best trees
                            IList <ScoredObject <Tree> > trees = pq.GetKGoodFactoredParses(op.testOptions.printFactoredKGood);
                            Tree transGoldTree = collinizer.TransformTree(goldTree);
                            int  ii            = 0;
                            foreach (ScoredObject <Tree> tp in trees)
                            {
                                ii++;
                                pwOut.Println("Factored Parse #" + ii + " with score " + tp.Score());
                                Tree tbd = tp.Object();
                                tbd.PennPrint(pwOut);
                                Tree tbtr = collinizer.TransformTree(tbd);
                                kGoodLB.Evaluate(tbtr, transGoldTree, pwOut);
                            }
                        }
                        else
                        {
                            //1-best output
                            if (pwFileOut != null)
                            {
                                pwFileOut.Println(tree.ToString());
                            }
                        }
                    }
                }
                //Print the derivational entropy
                if (op.testOptions.outputkBestEquivocation != null && op.testOptions.printPCFGkBest > 0)
                {
                    IList <ScoredObject <Tree> > trees = kbestPCFGTrees.SubList(0, op.testOptions.printPCFGkBest);
                    double[] logScores = new double[trees.Count];
                    int      treeId    = 0;
                    foreach (ScoredObject <Tree> kBestTree in trees)
                    {
                        logScores[treeId++] = kBestTree.Score();
                    }
                    //Re-normalize
                    double entropy = 0.0;
                    double denom   = ArrayMath.LogSum(logScores);
                    foreach (double logScore in logScores)
                    {
                        double logPr = logScore - denom;
                        entropy += System.Math.Exp(logPr) * (logPr / System.Math.Log(2));
                    }
                    entropy *= -1;
                    //Convert to bits
                    pwStats.Printf("%f\t%d\t%d\n", entropy, trees.Count, sentence.Count);
                }
            }
            // **EVALUATION**
            // Perform various evaluations specified by the user
            if (tree != null)
            {
                //Strip subcategories and remove punctuation for evaluation
                tree = subcategoryStripper.TransformTree(tree);
                Tree treeFact = collinizer.TransformTree(tree);
                //Setup the gold tree
                if (op.testOptions.verbose)
                {
                    pwOut.Println("Correct parse");
                    treePrint.PrintTree(goldTree, pwOut);
                }
                Tree transGoldTree = collinizer.TransformTree(goldTree);
                if (transGoldTree != null)
                {
                    transGoldTree = subcategoryStripper.TransformTree(transGoldTree);
                }
                //Can't do evaluation in these two cases
                if (transGoldTree == null)
                {
                    pwErr.Println("Couldn't transform gold tree for evaluation, skipping eval. Gold tree was:");
                    goldTree.PennPrint(pwErr);
                    numSkippedEvals++;
                    return;
                }
                else
                {
                    if (treeFact == null)
                    {
                        pwErr.Println("Couldn't transform hypothesis tree for evaluation, skipping eval. Tree was:");
                        tree.PennPrint(pwErr);
                        numSkippedEvals++;
                        return;
                    }
                    else
                    {
                        if (treeFact.Yield().Count != transGoldTree.Yield().Count)
                        {
                            IList <ILabel> fYield = treeFact.Yield();
                            IList <ILabel> gYield = transGoldTree.Yield();
                            pwErr.Println("WARNING: Evaluation could not be performed due to gold/parsed yield mismatch.");
                            pwErr.Printf("  sizes: gold: %d (transf) %d (orig); parsed: %d (transf) %d (orig).%n", gYield.Count, goldTree.Yield().Count, fYield.Count, tree.Yield().Count);
                            pwErr.Println("  gold: " + SentenceUtils.ListToString(gYield, true));
                            pwErr.Println("  pars: " + SentenceUtils.ListToString(fYield, true));
                            numSkippedEvals++;
                            return;
                        }
                    }
                }
                if (topKEvals.Count > 0)
                {
                    IList <Tree> transGuesses = new List <Tree>();
                    int          kbest        = System.Math.Min(op.testOptions.evalPCFGkBest, kbestPCFGTrees.Count);
                    foreach (ScoredObject <Tree> guess in kbestPCFGTrees.SubList(0, kbest))
                    {
                        transGuesses.Add(collinizer.TransformTree(guess.Object()));
                    }
                    foreach (BestOfTopKEval eval in topKEvals)
                    {
                        eval.Evaluate(transGuesses, transGoldTree, pwErr);
                    }
                }
                //PCFG eval
                Tree treePCFG = pq.GetBestPCFGParse();
                if (treePCFG != null)
                {
                    Tree treePCFGeval = collinizer.TransformTree(treePCFG);
                    if (pcfgLB != null)
                    {
                        pcfgLB.Evaluate(treePCFGeval, transGoldTree, pwErr);
                    }
                    if (pcfgChildSpecific != null)
                    {
                        pcfgChildSpecific.Evaluate(treePCFGeval, transGoldTree, pwErr);
                    }
                    if (pcfgLA != null)
                    {
                        pcfgLA.Evaluate(treePCFGeval, transGoldTree, pwErr);
                    }
                    if (pcfgCB != null)
                    {
                        pcfgCB.Evaluate(treePCFGeval, transGoldTree, pwErr);
                    }
                    if (pcfgDA != null)
                    {
                        // Re-index the leaves after Collinization, stripping traces, etc.
                        treePCFGeval.IndexLeaves(true);
                        transGoldTree.IndexLeaves(true);
                        pcfgDA.Evaluate(treePCFGeval, transGoldTree, pwErr);
                    }
                    if (pcfgTA != null)
                    {
                        pcfgTA.Evaluate(treePCFGeval, transGoldTree, pwErr);
                    }
                    if (pcfgLL != null && pq.GetPCFGParser() != null)
                    {
                        pcfgLL.RecordScore(pq.GetPCFGParser(), pwErr);
                    }
                    if (pcfgRUO != null)
                    {
                        pcfgRUO.Evaluate(treePCFGeval, transGoldTree, pwErr);
                    }
                    if (pcfgCUO != null)
                    {
                        pcfgCUO.Evaluate(treePCFGeval, transGoldTree, pwErr);
                    }
                    if (pcfgCatE != null)
                    {
                        pcfgCatE.Evaluate(treePCFGeval, transGoldTree, pwErr);
                    }
                }
                //Dependency eval
                // todo: is treeDep really useful here, or should we really use depDAEval tree (debinarized) throughout? We use it for parse, and it sure seems like we could use it for tag eval, but maybe not factDA?
                Tree treeDep = pq.GetBestDependencyParse(false);
                if (treeDep != null)
                {
                    Tree goldTreeB    = binarizerOnly.TransformTree(goldTree);
                    Tree goldTreeEval = goldTree.DeepCopy();
                    goldTreeEval.IndexLeaves(true);
                    goldTreeEval.PercolateHeads(op.Langpack().HeadFinder());
                    Tree depDAEval = pq.GetBestDependencyParse(true);
                    depDAEval.IndexLeaves(true);
                    depDAEval.PercolateHeadIndices();
                    if (depDA != null)
                    {
                        depDA.Evaluate(depDAEval, goldTreeEval, pwErr);
                    }
                    if (depTA != null)
                    {
                        Tree undoneTree = debinarizer.TransformTree(treeDep);
                        undoneTree = subcategoryStripper.TransformTree(undoneTree);
                        pq.RestoreOriginalWords(undoneTree);
                        // pwErr.println("subcategoryStripped tree: " + undoneTree.toStructureDebugString());
                        depTA.Evaluate(undoneTree, goldTree, pwErr);
                    }
                    if (depLL != null && pq.GetDependencyParser() != null)
                    {
                        depLL.RecordScore(pq.GetDependencyParser(), pwErr);
                    }
                    Tree factTreeB;
                    if (pq.HasFactoredParse())
                    {
                        factTreeB = pq.GetBestFactoredParse();
                    }
                    else
                    {
                        factTreeB = treeDep;
                    }
                    if (factDA != null)
                    {
                        factDA.Evaluate(factTreeB, goldTreeB, pwErr);
                    }
                }
                //Factored parser (1best) eval
                if (factLB != null)
                {
                    factLB.Evaluate(treeFact, transGoldTree, pwErr);
                }
                if (factChildSpecific != null)
                {
                    factChildSpecific.Evaluate(treeFact, transGoldTree, pwErr);
                }
                if (factLA != null)
                {
                    factLA.Evaluate(treeFact, transGoldTree, pwErr);
                }
                if (factTA != null)
                {
                    factTA.Evaluate(tree, boundaryRemover.TransformTree(goldTree), pwErr);
                }
                if (factLL != null && pq.GetFactoredParser() != null)
                {
                    factLL.RecordScore(pq.GetFactoredParser(), pwErr);
                }
                if (factCB != null)
                {
                    factCB.Evaluate(treeFact, transGoldTree, pwErr);
                }
                foreach (IEval eval_1 in evals)
                {
                    eval_1.Evaluate(treeFact, transGoldTree, pwErr);
                }
                if (parserQueryEvals != null)
                {
                    foreach (IParserQueryEval eval in parserQueryEvals)
                    {
                        eval_1.Evaluate(pq, transGoldTree, pwErr);
                    }
                }
                if (op.testOptions.evalb)
                {
                    // empty out scores just in case
                    NanScores(tree);
                    EvalbFormatWriter.WriteEVALBline(treeFact, transGoldTree);
                }
            }
            pwErr.Println();
        }
            // end parseFiles
            public virtual void ProcessResults(IParserQuery parserQuery, int num, PrintWriter pwo)
        {
            if (parserQuery.ParseSkipped())
            {
                IList <IHasWord> sentence = parserQuery.OriginalSentence();
                if (sentence != null)
                {
                    numWords -= sentence.Count;
                }
                numSkipped++;
            }
            if (parserQuery.ParseNoMemory())
            {
                numNoMemory++;
            }
            if (parserQuery.ParseUnparsable())
            {
                numUnparsable++;
            }
            if (parserQuery.ParseFallback())
            {
                numFallback++;
            }
            saidMemMessage = saidMemMessage || parserQuery.SaidMemMessage();
            Tree ansTree = parserQuery.GetBestParse();

            if (ansTree == null)
            {
                pwo.Println("(())");
                return;
            }
            if (pcfgLL != null && parserQuery.GetPCFGParser() != null)
            {
                pcfgLL.RecordScore(parserQuery.GetPCFGParser(), pwErr);
            }
            if (depLL != null && parserQuery.GetDependencyParser() != null)
            {
                depLL.RecordScore(parserQuery.GetDependencyParser(), pwErr);
            }
            if (factLL != null && parserQuery.GetFactoredParser() != null)
            {
                factLL.RecordScore(parserQuery.GetFactoredParser(), pwErr);
            }
            try
            {
                treePrint.PrintTree(ansTree, int.ToString(num), pwo);
            }
            catch (Exception re)
            {
                pwErr.Println("TreePrint.printTree skipped: out of memory (or other error)");
                Sharpen.Runtime.PrintStackTrace(re, pwErr);
                numNoMemory++;
                try
                {
                    treePrint.PrintTree(null, int.ToString(num), pwo);
                }
                catch (Exception e)
                {
                    pwErr.Println("Sentence skipped: out of memory or error calling TreePrint.");
                    pwo.Println("(())");
                    Sharpen.Runtime.PrintStackTrace(e, pwErr);
                }
            }
            // crude addition of k-best tree printing
            // TODO: interface with the RerankingParserQuery
            if (op.testOptions.printPCFGkBest > 0 && parserQuery.GetPCFGParser() != null && parserQuery.GetPCFGParser().HasParse())
            {
                IList <ScoredObject <Tree> > trees = parserQuery.GetKBestPCFGParses(op.testOptions.printPCFGkBest);
                treePrint.PrintTrees(trees, int.ToString(num), pwo);
            }
            else
            {
                if (op.testOptions.printFactoredKGood > 0 && parserQuery.GetFactoredParser() != null && parserQuery.GetFactoredParser().HasParse())
                {
                    // DZ: debug n best trees
                    IList <ScoredObject <Tree> > trees = parserQuery.GetKGoodFactoredParses(op.testOptions.printFactoredKGood);
                    treePrint.PrintTrees(trees, int.ToString(num), pwo);
                }
            }
        }
 public virtual void ParseFiles <_T0>(string[] args, int argIndex, bool tokenized, ITokenizerFactory <_T0> tokenizerFactory, string elementDelimiter, string sentenceDelimiter, IFunction <IList <IHasWord>, IList <IHasWord> > escaper, string tagDelimiter
                                      )
 where _T0 : IHasWord
     {
      DocumentPreprocessor.DocType docType = (elementDelimiter == null) ? DocumentPreprocessor.DocType.Plain : DocumentPreprocessor.DocType.Xml;
      if (op.testOptions.verbose)
     {
         if (tokenizerFactory != null)
         {
             pwErr.Println("parseFiles: Tokenizer factory is: " + tokenizerFactory);
         }
     }
      Timing timer = new Timing();
      // timer.start(); // constructor already starts it.
      //Loop over the files
      for (int i = argIndex; i < args.Length; i++)
     {
         string filename = args[i];
         DocumentPreprocessor documentPreprocessor;
         if (filename.Equals("-"))
         {
             try
             {
                 documentPreprocessor = new DocumentPreprocessor(IOUtils.ReaderFromStdin(op.tlpParams.GetInputEncoding()), docType);
             }
             catch (IOException e)
             {
                 throw new RuntimeIOException(e);
             }
         }
         else
         {
             documentPreprocessor = new DocumentPreprocessor(filename, docType, op.tlpParams.GetInputEncoding());
         }
         //Unused values are null per the main() method invocation below
         //null is the default for these properties
         documentPreprocessor.SetSentenceFinalPuncWords(tlp.SentenceFinalPunctuationWords());
         documentPreprocessor.SetEscaper(escaper);
         documentPreprocessor.SetSentenceDelimiter(sentenceDelimiter);
         documentPreprocessor.SetTagDelimiter(tagDelimiter);
         documentPreprocessor.SetElementDelimiter(elementDelimiter);
         if (tokenizerFactory == null)
         {
             documentPreprocessor.SetTokenizerFactory((tokenized) ? null : tlp.GetTokenizerFactory());
         }
         else
         {
             documentPreprocessor.SetTokenizerFactory(tokenizerFactory);
         }
         //Setup the output
         PrintWriter pwo = pwOut;
         if (op.testOptions.writeOutputFiles)
         {
             string normalizedName = filename;
             try
             {
                 new URL(normalizedName);
                 // this will exception if not a URL
                 normalizedName = normalizedName.ReplaceAll("/", "_");
             }
             catch (MalformedURLException)
             {
             }
             //It isn't a URL, so silently ignore
             string ext   = (op.testOptions.outputFilesExtension == null) ? "stp" : op.testOptions.outputFilesExtension;
             string fname = normalizedName + '.' + ext;
             if (op.testOptions.outputFilesDirectory != null && !op.testOptions.outputFilesDirectory.IsEmpty())
             {
                 string fseparator = Runtime.GetProperty("file.separator");
                 if (fseparator == null || fseparator.IsEmpty())
                 {
                     fseparator = "/";
                 }
                 File fnameFile = new File(fname);
                 fname          = op.testOptions.outputFilesDirectory + fseparator + fnameFile.GetName();
             }
             try
             {
                 pwo = op.tlpParams.Pw(new FileOutputStream(fname));
             }
             catch (IOException ioe)
             {
                 throw new RuntimeIOException(ioe);
             }
         }
         treePrint.PrintHeader(pwo, op.tlpParams.GetOutputEncoding());
         pwErr.Println("Parsing file: " + filename);
         int num          = 0;
         int numProcessed = 0;
         if (op.testOptions.testingThreads != 1)
         {
             MulticoreWrapper <IList <IHasWord>, IParserQuery> wrapper = new MulticoreWrapper <IList <IHasWord>, IParserQuery>(op.testOptions.testingThreads, new ParsingThreadsafeProcessor(pqFactory, pwErr));
             foreach (IList <IHasWord> sentence in documentPreprocessor)
             {
                 num++;
                 numSents++;
                 int len   = sentence.Count;
                 numWords += len;
                 pwErr.Println("Parsing [sent. " + num + " len. " + len + "]: " + SentenceUtils.ListToString(sentence, true));
                 wrapper.Put(sentence);
                 while (wrapper.Peek())
                 {
                     IParserQuery pq = wrapper.Poll();
                     ProcessResults(pq, numProcessed++, pwo);
                 }
             }
             wrapper.Join();
             while (wrapper.Peek())
             {
                 IParserQuery pq = wrapper.Poll();
                 ProcessResults(pq, numProcessed++, pwo);
             }
         }
         else
         {
             IParserQuery pq = pqFactory.ParserQuery();
             foreach (IList <IHasWord> sentence in documentPreprocessor)
             {
                 num++;
                 numSents++;
                 int len   = sentence.Count;
                 numWords += len;
                 pwErr.Println("Parsing [sent. " + num + " len. " + len + "]: " + SentenceUtils.ListToString(sentence, true));
                 pq.ParseAndReport(sentence, pwErr);
                 ProcessResults(pq, numProcessed++, pwo);
             }
         }
         treePrint.PrintFooter(pwo);
         if (op.testOptions.writeOutputFiles)
         {
             pwo.Close();
         }
         pwErr.Println("Parsed file: " + filename + " [" + num + " sentences].");
     }
      long millis = timer.Stop();
      if (summary)
     {
         if (pcfgLL != null)
         {
             pcfgLL.Display(false, pwErr);
         }
         if (depLL != null)
         {
             depLL.Display(false, pwErr);
         }
         if (factLL != null)
         {
             factLL.Display(false, pwErr);
         }
     }
      if (saidMemMessage)
     {
         ParserUtils.PrintOutOfMemory(pwErr);
     }
      double wordspersec = numWords / (((double)millis) / 1000);
      double sentspersec = numSents / (((double)millis) / 1000);
      NumberFormat nf    = new DecimalFormat("0.00");
      // easier way!
      pwErr.Println("Parsed " + numWords + " words in " + numSents + " sentences (" + nf.Format(wordspersec) + " wds/sec; " + nf.Format(sentspersec) + " sents/sec).");
      if (numFallback > 0)
     {
         pwErr.Println("  " + numFallback + " sentences were parsed by fallback to PCFG.");
     }
      if (numUnparsable > 0 || numNoMemory > 0 || numSkipped > 0)
     {
         pwErr.Println("  " + (numUnparsable + numNoMemory + numSkipped) + " sentences were not parsed:");
         if (numUnparsable > 0)
         {
             pwErr.Println("    " + numUnparsable + " were not parsable with non-zero probability.");
         }
         if (numNoMemory > 0)
         {
             pwErr.Println("    " + numNoMemory + " were skipped because of insufficient memory.");
         }
         if (numSkipped > 0)
         {
             pwErr.Println("    " + numSkipped + " were skipped as length 0 or greater than " + op.testOptions.maxLength);
         }
     }
     }
        /// <exception cref="System.Exception"/>
        public static void Main(string[] args)
        {
            string         modelPath          = null;
            string         outputPath         = null;
            string         testTreebankPath   = null;
            IFileFilter    testTreebankFilter = null;
            IList <string> unusedArgs         = new List <string>();

            for (int argIndex = 0; argIndex < args.Length;)
            {
                if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-model"))
                {
                    modelPath = args[argIndex + 1];
                    argIndex += 2;
                }
                else
                {
                    if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-testTreebank"))
                    {
                        Pair <string, IFileFilter> treebankDescription = ArgUtils.GetTreebankDescription(args, argIndex, "-testTreebank");
                        argIndex           = argIndex + ArgUtils.NumSubArgs(args, argIndex) + 1;
                        testTreebankPath   = treebankDescription.First();
                        testTreebankFilter = treebankDescription.Second();
                    }
                    else
                    {
                        if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-output"))
                        {
                            outputPath = args[argIndex + 1];
                            argIndex  += 2;
                        }
                        else
                        {
                            unusedArgs.Add(args[argIndex++]);
                        }
                    }
                }
            }
            if (modelPath == null)
            {
                throw new ArgumentException("Need to specify -model");
            }
            if (testTreebankPath == null)
            {
                throw new ArgumentException("Need to specify -testTreebank");
            }
            if (outputPath == null)
            {
                throw new ArgumentException("Need to specify -output");
            }
            string[]          newArgs      = Sharpen.Collections.ToArray(unusedArgs, new string[unusedArgs.Count]);
            LexicalizedParser lexparser    = ((LexicalizedParser)LexicalizedParser.LoadModel(modelPath, newArgs));
            Treebank          testTreebank = null;

            if (testTreebankPath != null)
            {
                log.Info("Reading in trees from " + testTreebankPath);
                if (testTreebankFilter != null)
                {
                    log.Info("Filtering on " + testTreebankFilter);
                }
                testTreebank = lexparser.GetOp().tlpParams.MemoryTreebank();
                testTreebank.LoadPath(testTreebankPath, testTreebankFilter);
                log.Info("Read in " + testTreebank.Count + " trees for testing");
            }
            FileWriter     @out = new FileWriter(outputPath);
            BufferedWriter bout = new BufferedWriter(@out);

            log.Info("Parsing " + testTreebank.Count + " trees");
            int count = 0;
            IList <FindNearestNeighbors.ParseRecord> records = Generics.NewArrayList();

            foreach (Tree goldTree in testTreebank)
            {
                IList <Word> tokens      = goldTree.YieldWords();
                IParserQuery parserQuery = lexparser.ParserQuery();
                if (!parserQuery.Parse(tokens))
                {
                    throw new AssertionError("Could not parse: " + tokens);
                }
                if (!(parserQuery is RerankingParserQuery))
                {
                    throw new ArgumentException("Expected a LexicalizedParser with a Reranker attached");
                }
                RerankingParserQuery rpq = (RerankingParserQuery)parserQuery;
                if (!(rpq.RerankerQuery() is DVModelReranker.Query))
                {
                    throw new ArgumentException("Expected a LexicalizedParser with a DVModel attached");
                }
                DeepTree     tree       = ((DVModelReranker.Query)rpq.RerankerQuery()).GetDeepTrees()[0];
                SimpleMatrix rootVector = null;
                foreach (KeyValuePair <Tree, SimpleMatrix> entry in tree.GetVectors())
                {
                    if (entry.Key.Label().Value().Equals("ROOT"))
                    {
                        rootVector = entry.Value;
                        break;
                    }
                }
                if (rootVector == null)
                {
                    throw new AssertionError("Could not find root nodevector");
                }
                @out.Write(tokens + "\n");
                @out.Write(tree.GetTree() + "\n");
                for (int i = 0; i < rootVector.GetNumElements(); ++i)
                {
                    @out.Write("  " + rootVector.Get(i));
                }
                @out.Write("\n\n\n");
                count++;
                if (count % 10 == 0)
                {
                    log.Info("  " + count);
                }
                records.Add(new FindNearestNeighbors.ParseRecord(tokens, goldTree, tree.GetTree(), rootVector, tree.GetVectors()));
            }
            log.Info("  done parsing");
            IList <Pair <Tree, SimpleMatrix> > subtrees = Generics.NewArrayList();

            foreach (FindNearestNeighbors.ParseRecord record in records)
            {
                foreach (KeyValuePair <Tree, SimpleMatrix> entry in record.nodeVectors)
                {
                    if (entry.Key.GetLeaves().Count <= maxLength)
                    {
                        subtrees.Add(Pair.MakePair(entry.Key, entry.Value));
                    }
                }
            }
            log.Info("There are " + subtrees.Count + " subtrees in the set of trees");
            PriorityQueue <ScoredObject <Pair <Tree, Tree> > > bestmatches = new PriorityQueue <ScoredObject <Pair <Tree, Tree> > >(101, ScoredComparator.DescendingComparator);

            for (int i_1 = 0; i_1 < subtrees.Count; ++i_1)
            {
                log.Info(subtrees[i_1].First().YieldWords());
                log.Info(subtrees[i_1].First());
                for (int j = 0; j < subtrees.Count; ++j)
                {
                    if (i_1 == j)
                    {
                        continue;
                    }
                    // TODO: look at basic category?
                    double normF = subtrees[i_1].Second().Minus(subtrees[j].Second()).NormF();
                    bestmatches.Add(new ScoredObject <Pair <Tree, Tree> >(Pair.MakePair(subtrees[i_1].First(), subtrees[j].First()), normF));
                    if (bestmatches.Count > 100)
                    {
                        bestmatches.Poll();
                    }
                }
                IList <ScoredObject <Pair <Tree, Tree> > > ordered = Generics.NewArrayList();
                while (bestmatches.Count > 0)
                {
                    ordered.Add(bestmatches.Poll());
                }
                Java.Util.Collections.Reverse(ordered);
                foreach (ScoredObject <Pair <Tree, Tree> > pair in ordered)
                {
                    log.Info(" MATCHED " + pair.Object().second.YieldWords() + " ... " + pair.Object().Second() + " with a score of " + pair.Score());
                }
                log.Info();
                log.Info();
                bestmatches.Clear();
            }

            /*
             * for (int i = 0; i < records.size(); ++i) {
             * if (i % 10 == 0) {
             * log.info("  " + i);
             * }
             * List<ScoredObject<ParseRecord>> scored = Generics.newArrayList();
             * for (int j = 0; j < records.size(); ++j) {
             * if (i == j) continue;
             *
             * double score = 0.0;
             * int matches = 0;
             * for (Map.Entry<Tree, SimpleMatrix> first : records.get(i).nodeVectors.entrySet()) {
             * for (Map.Entry<Tree, SimpleMatrix> second : records.get(j).nodeVectors.entrySet()) {
             * String firstBasic = dvparser.dvModel.basicCategory(first.getKey().label().value());
             * String secondBasic = dvparser.dvModel.basicCategory(second.getKey().label().value());
             * if (firstBasic.equals(secondBasic)) {
             ++matches;
             * double normF = first.getValue().minus(second.getValue()).normF();
             * score += normF * normF;
             * }
             * }
             * }
             * if (matches == 0) {
             * score = Double.POSITIVE_INFINITY;
             * } else {
             * score = score / matches;
             * }
             * //double score = records.get(i).vector.minus(records.get(j).vector).normF();
             * scored.add(new ScoredObject<ParseRecord>(records.get(j), score));
             * }
             * Collections.sort(scored, ScoredComparator.ASCENDING_COMPARATOR);
             *
             * out.write(records.get(i).sentence.toString() + "\n");
             * for (int j = 0; j < numNeighbors; ++j) {
             * out.write("   " + scored.get(j).score() + ": " + scored.get(j).object().sentence + "\n");
             * }
             * out.write("\n\n");
             * }
             * log.info();
             */
            bout.Flush();
            @out.Flush();
            @out.Close();
        }