public static IList <Tree> GetTopParsesForOneTree(LexicalizedParser parser, int dvKBest, Tree tree, ITreeTransformer transformer)
        {
            IParserQuery pq       = parser.ParserQuery();
            IList <Word> sentence = tree.YieldWords();

            // Since the trees are binarized and otherwise manipulated, we
            // need to chop off the last word in order to remove the end of
            // sentence symbol
            if (sentence.Count <= 1)
            {
                return(null);
            }
            sentence = sentence.SubList(0, sentence.Count - 1);
            if (!pq.Parse(sentence))
            {
                log.Info("Failed to use the given parser to reparse sentence \"" + sentence + "\"");
                return(null);
            }
            IList <Tree> parses = new List <Tree>();
            IList <ScoredObject <Tree> > bestKParses = pq.GetKBestPCFGParses(dvKBest);

            foreach (ScoredObject <Tree> so in bestKParses)
            {
                Tree result = so.Object();
                if (transformer != null)
                {
                    result = transformer.TransformTree(result);
                }
                parses.Add(result);
            }
            return(parses);
        }
예제 #2
0
        private IList <Tree> DoOneSentence(IList <ParserConstraint> constraints, IList <CoreLabel> words)
        {
            IParserQuery pq = parser.ParserQuery();

            pq.SetConstraints(constraints);
            pq.Parse(words);
            IList <Tree> trees = Generics.NewLinkedList();

            try
            {
                // Use bestParse if kBest is set to 1.
                if (this.kBest == 1)
                {
                    Tree t = pq.GetBestParse();
                    if (t == null)
                    {
                        log.Warn("Parsing of sentence failed.  " + "Will ignore and continue: " + SentenceUtils.ListToString(words));
                    }
                    else
                    {
                        double score = pq.GetBestScore();
                        t.SetScore(score % -10000.0);
                        trees.Add(t);
                    }
                }
                else
                {
                    IList <ScoredObject <Tree> > scoredObjects = pq.GetKBestParses(this.kBest);
                    if (scoredObjects == null || scoredObjects.Count < 1)
                    {
                        log.Warn("Parsing of sentence failed.  " + "Will ignore and continue: " + SentenceUtils.ListToString(words));
                    }
                    else
                    {
                        foreach (ScoredObject <Tree> so in scoredObjects)
                        {
                            // -10000 denotes unknown words
                            Tree tree = so.Object();
                            tree.SetScore(so.Score() % -10000.0);
                            trees.Add(tree);
                        }
                    }
                }
            }
            catch (OutOfMemoryException e)
            {
                log.Error(e);
                // Beware that we can now get an OOM in logging, too.
                log.Warn("Parsing of sentence ran out of memory (length=" + words.Count + ").  " + "Will ignore and try to continue.");
            }
            catch (NoSuchParseException)
            {
                log.Warn("Parsing of sentence failed, possibly because of out of memory.  " + "Will ignore and continue: " + SentenceUtils.ListToString(words));
            }
            return(trees);
        }
        public virtual IParserQuery Process <_T0>(IList <_T0> sentence)
            where _T0 : IHasWord
        {
            IParserQuery pq = pqFactory.ParserQuery();

            if (pwErr != null)
            {
                pq.ParseAndReport(sentence, pwErr);
            }
            else
            {
                pq.Parse(sentence);
            }
            return(pq);
        }
예제 #4
0
        public virtual bool Parse <_T0>(IList <_T0> sentence)
            where _T0 : IHasWord
        {
            bool success = parserQuery.Parse(sentence);

            if (!success)
            {
                return(false);
            }
            IList <ScoredObject <Tree> > bestKParses = parserQuery.GetKBestPCFGParses(rerankerKBest);

            if (bestKParses.IsEmpty())
            {
                return(false);
            }
            scoredTrees = Rerank(sentence, bestKParses);
            return(true);
        }
예제 #5
0
            public override void Run()
            {
                bool         successful;
                IParserQuery parserQuery = this._enclosing.parser.ParserQuery();

                try
                {
                    successful = parserQuery.Parse(this.sentence);
                }
                catch (Exception)
                {
                    this._enclosing.StopProgressMonitor();
                    JOptionPane.ShowMessageDialog(this._enclosing, "Could not parse selected sentence\n(sentence probably too long)", null, JOptionPane.ErrorMessage);
                    this._enclosing.SetStatus("Error parsing");
                    return;
                }
                this._enclosing.StopProgressMonitor();
                this._enclosing.SetStatus("Done");
                if (successful)
                {
                    // display the best parse
                    Tree tree = parserQuery.GetBestParse();
                    //tree.pennPrint();
                    this._enclosing.treePanel.SetTree(tree);
                    this._enclosing.clearButton.SetEnabled(true);
                }
                else
                {
                    JOptionPane.ShowMessageDialog(this._enclosing, "Could not parse selected sentence", null, JOptionPane.ErrorMessage);
                    this._enclosing.SetStatus("Error parsing");
                    this._enclosing.treePanel.SetTree(null);
                    this._enclosing.clearButton.SetEnabled(false);
                }
                if (this._enclosing.scrollWhenDone)
                {
                    this._enclosing.ScrollForward();
                }
            }
        /// <exception cref="System.Exception"/>
        public static void Main(string[] args)
        {
            string         modelPath          = null;
            string         outputPath         = null;
            string         testTreebankPath   = null;
            IFileFilter    testTreebankFilter = null;
            IList <string> unusedArgs         = new List <string>();

            for (int argIndex = 0; argIndex < args.Length;)
            {
                if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-model"))
                {
                    modelPath = args[argIndex + 1];
                    argIndex += 2;
                }
                else
                {
                    if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-testTreebank"))
                    {
                        Pair <string, IFileFilter> treebankDescription = ArgUtils.GetTreebankDescription(args, argIndex, "-testTreebank");
                        argIndex           = argIndex + ArgUtils.NumSubArgs(args, argIndex) + 1;
                        testTreebankPath   = treebankDescription.First();
                        testTreebankFilter = treebankDescription.Second();
                    }
                    else
                    {
                        if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-output"))
                        {
                            outputPath = args[argIndex + 1];
                            argIndex  += 2;
                        }
                        else
                        {
                            unusedArgs.Add(args[argIndex++]);
                        }
                    }
                }
            }
            if (modelPath == null)
            {
                throw new ArgumentException("Need to specify -model");
            }
            if (testTreebankPath == null)
            {
                throw new ArgumentException("Need to specify -testTreebank");
            }
            if (outputPath == null)
            {
                throw new ArgumentException("Need to specify -output");
            }
            string[]          newArgs      = Sharpen.Collections.ToArray(unusedArgs, new string[unusedArgs.Count]);
            LexicalizedParser lexparser    = ((LexicalizedParser)LexicalizedParser.LoadModel(modelPath, newArgs));
            Treebank          testTreebank = null;

            if (testTreebankPath != null)
            {
                log.Info("Reading in trees from " + testTreebankPath);
                if (testTreebankFilter != null)
                {
                    log.Info("Filtering on " + testTreebankFilter);
                }
                testTreebank = lexparser.GetOp().tlpParams.MemoryTreebank();
                testTreebank.LoadPath(testTreebankPath, testTreebankFilter);
                log.Info("Read in " + testTreebank.Count + " trees for testing");
            }
            FileWriter     @out = new FileWriter(outputPath);
            BufferedWriter bout = new BufferedWriter(@out);

            log.Info("Parsing " + testTreebank.Count + " trees");
            int count = 0;
            IList <FindNearestNeighbors.ParseRecord> records = Generics.NewArrayList();

            foreach (Tree goldTree in testTreebank)
            {
                IList <Word> tokens      = goldTree.YieldWords();
                IParserQuery parserQuery = lexparser.ParserQuery();
                if (!parserQuery.Parse(tokens))
                {
                    throw new AssertionError("Could not parse: " + tokens);
                }
                if (!(parserQuery is RerankingParserQuery))
                {
                    throw new ArgumentException("Expected a LexicalizedParser with a Reranker attached");
                }
                RerankingParserQuery rpq = (RerankingParserQuery)parserQuery;
                if (!(rpq.RerankerQuery() is DVModelReranker.Query))
                {
                    throw new ArgumentException("Expected a LexicalizedParser with a DVModel attached");
                }
                DeepTree     tree       = ((DVModelReranker.Query)rpq.RerankerQuery()).GetDeepTrees()[0];
                SimpleMatrix rootVector = null;
                foreach (KeyValuePair <Tree, SimpleMatrix> entry in tree.GetVectors())
                {
                    if (entry.Key.Label().Value().Equals("ROOT"))
                    {
                        rootVector = entry.Value;
                        break;
                    }
                }
                if (rootVector == null)
                {
                    throw new AssertionError("Could not find root nodevector");
                }
                @out.Write(tokens + "\n");
                @out.Write(tree.GetTree() + "\n");
                for (int i = 0; i < rootVector.GetNumElements(); ++i)
                {
                    @out.Write("  " + rootVector.Get(i));
                }
                @out.Write("\n\n\n");
                count++;
                if (count % 10 == 0)
                {
                    log.Info("  " + count);
                }
                records.Add(new FindNearestNeighbors.ParseRecord(tokens, goldTree, tree.GetTree(), rootVector, tree.GetVectors()));
            }
            log.Info("  done parsing");
            IList <Pair <Tree, SimpleMatrix> > subtrees = Generics.NewArrayList();

            foreach (FindNearestNeighbors.ParseRecord record in records)
            {
                foreach (KeyValuePair <Tree, SimpleMatrix> entry in record.nodeVectors)
                {
                    if (entry.Key.GetLeaves().Count <= maxLength)
                    {
                        subtrees.Add(Pair.MakePair(entry.Key, entry.Value));
                    }
                }
            }
            log.Info("There are " + subtrees.Count + " subtrees in the set of trees");
            PriorityQueue <ScoredObject <Pair <Tree, Tree> > > bestmatches = new PriorityQueue <ScoredObject <Pair <Tree, Tree> > >(101, ScoredComparator.DescendingComparator);

            for (int i_1 = 0; i_1 < subtrees.Count; ++i_1)
            {
                log.Info(subtrees[i_1].First().YieldWords());
                log.Info(subtrees[i_1].First());
                for (int j = 0; j < subtrees.Count; ++j)
                {
                    if (i_1 == j)
                    {
                        continue;
                    }
                    // TODO: look at basic category?
                    double normF = subtrees[i_1].Second().Minus(subtrees[j].Second()).NormF();
                    bestmatches.Add(new ScoredObject <Pair <Tree, Tree> >(Pair.MakePair(subtrees[i_1].First(), subtrees[j].First()), normF));
                    if (bestmatches.Count > 100)
                    {
                        bestmatches.Poll();
                    }
                }
                IList <ScoredObject <Pair <Tree, Tree> > > ordered = Generics.NewArrayList();
                while (bestmatches.Count > 0)
                {
                    ordered.Add(bestmatches.Poll());
                }
                Java.Util.Collections.Reverse(ordered);
                foreach (ScoredObject <Pair <Tree, Tree> > pair in ordered)
                {
                    log.Info(" MATCHED " + pair.Object().second.YieldWords() + " ... " + pair.Object().Second() + " with a score of " + pair.Score());
                }
                log.Info();
                log.Info();
                bestmatches.Clear();
            }

            /*
             * for (int i = 0; i < records.size(); ++i) {
             * if (i % 10 == 0) {
             * log.info("  " + i);
             * }
             * List<ScoredObject<ParseRecord>> scored = Generics.newArrayList();
             * for (int j = 0; j < records.size(); ++j) {
             * if (i == j) continue;
             *
             * double score = 0.0;
             * int matches = 0;
             * for (Map.Entry<Tree, SimpleMatrix> first : records.get(i).nodeVectors.entrySet()) {
             * for (Map.Entry<Tree, SimpleMatrix> second : records.get(j).nodeVectors.entrySet()) {
             * String firstBasic = dvparser.dvModel.basicCategory(first.getKey().label().value());
             * String secondBasic = dvparser.dvModel.basicCategory(second.getKey().label().value());
             * if (firstBasic.equals(secondBasic)) {
             ++matches;
             * double normF = first.getValue().minus(second.getValue()).normF();
             * score += normF * normF;
             * }
             * }
             * }
             * if (matches == 0) {
             * score = Double.POSITIVE_INFINITY;
             * } else {
             * score = score / matches;
             * }
             * //double score = records.get(i).vector.minus(records.get(j).vector).normF();
             * scored.add(new ScoredObject<ParseRecord>(records.get(j), score));
             * }
             * Collections.sort(scored, ScoredComparator.ASCENDING_COMPARATOR);
             *
             * out.write(records.get(i).sentence.toString() + "\n");
             * for (int j = 0; j < numNeighbors; ++j) {
             * out.write("   " + scored.get(j).score() + ": " + scored.get(j).object().sentence + "\n");
             * }
             * out.write("\n\n");
             * }
             * log.info();
             */
            bout.Flush();
            @out.Flush();
            @out.Close();
        }