public static IList <Tree> GetTopParsesForOneTree(LexicalizedParser parser, int dvKBest, Tree tree, ITreeTransformer transformer) { IParserQuery pq = parser.ParserQuery(); IList <Word> sentence = tree.YieldWords(); // Since the trees are binarized and otherwise manipulated, we // need to chop off the last word in order to remove the end of // sentence symbol if (sentence.Count <= 1) { return(null); } sentence = sentence.SubList(0, sentence.Count - 1); if (!pq.Parse(sentence)) { log.Info("Failed to use the given parser to reparse sentence \"" + sentence + "\""); return(null); } IList <Tree> parses = new List <Tree>(); IList <ScoredObject <Tree> > bestKParses = pq.GetKBestPCFGParses(dvKBest); foreach (ScoredObject <Tree> so in bestKParses) { Tree result = so.Object(); if (transformer != null) { result = transformer.TransformTree(result); } parses.Add(result); } return(parses); }
private IList <Tree> DoOneSentence(IList <ParserConstraint> constraints, IList <CoreLabel> words) { IParserQuery pq = parser.ParserQuery(); pq.SetConstraints(constraints); pq.Parse(words); IList <Tree> trees = Generics.NewLinkedList(); try { // Use bestParse if kBest is set to 1. if (this.kBest == 1) { Tree t = pq.GetBestParse(); if (t == null) { log.Warn("Parsing of sentence failed. " + "Will ignore and continue: " + SentenceUtils.ListToString(words)); } else { double score = pq.GetBestScore(); t.SetScore(score % -10000.0); trees.Add(t); } } else { IList <ScoredObject <Tree> > scoredObjects = pq.GetKBestParses(this.kBest); if (scoredObjects == null || scoredObjects.Count < 1) { log.Warn("Parsing of sentence failed. " + "Will ignore and continue: " + SentenceUtils.ListToString(words)); } else { foreach (ScoredObject <Tree> so in scoredObjects) { // -10000 denotes unknown words Tree tree = so.Object(); tree.SetScore(so.Score() % -10000.0); trees.Add(tree); } } } } catch (OutOfMemoryException e) { log.Error(e); // Beware that we can now get an OOM in logging, too. log.Warn("Parsing of sentence ran out of memory (length=" + words.Count + "). " + "Will ignore and try to continue."); } catch (NoSuchParseException) { log.Warn("Parsing of sentence failed, possibly because of out of memory. " + "Will ignore and continue: " + SentenceUtils.ListToString(words)); } return(trees); }
public virtual IParserQuery Process <_T0>(IList <_T0> sentence) where _T0 : IHasWord { IParserQuery pq = pqFactory.ParserQuery(); if (pwErr != null) { pq.ParseAndReport(sentence, pwErr); } else { pq.Parse(sentence); } return(pq); }
public virtual bool Parse <_T0>(IList <_T0> sentence) where _T0 : IHasWord { bool success = parserQuery.Parse(sentence); if (!success) { return(false); } IList <ScoredObject <Tree> > bestKParses = parserQuery.GetKBestPCFGParses(rerankerKBest); if (bestKParses.IsEmpty()) { return(false); } scoredTrees = Rerank(sentence, bestKParses); return(true); }
public override void Run() { bool successful; IParserQuery parserQuery = this._enclosing.parser.ParserQuery(); try { successful = parserQuery.Parse(this.sentence); } catch (Exception) { this._enclosing.StopProgressMonitor(); JOptionPane.ShowMessageDialog(this._enclosing, "Could not parse selected sentence\n(sentence probably too long)", null, JOptionPane.ErrorMessage); this._enclosing.SetStatus("Error parsing"); return; } this._enclosing.StopProgressMonitor(); this._enclosing.SetStatus("Done"); if (successful) { // display the best parse Tree tree = parserQuery.GetBestParse(); //tree.pennPrint(); this._enclosing.treePanel.SetTree(tree); this._enclosing.clearButton.SetEnabled(true); } else { JOptionPane.ShowMessageDialog(this._enclosing, "Could not parse selected sentence", null, JOptionPane.ErrorMessage); this._enclosing.SetStatus("Error parsing"); this._enclosing.treePanel.SetTree(null); this._enclosing.clearButton.SetEnabled(false); } if (this._enclosing.scrollWhenDone) { this._enclosing.ScrollForward(); } }
/// <exception cref="System.Exception"/> public static void Main(string[] args) { string modelPath = null; string outputPath = null; string testTreebankPath = null; IFileFilter testTreebankFilter = null; IList <string> unusedArgs = new List <string>(); for (int argIndex = 0; argIndex < args.Length;) { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-model")) { modelPath = args[argIndex + 1]; argIndex += 2; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-testTreebank")) { Pair <string, IFileFilter> treebankDescription = ArgUtils.GetTreebankDescription(args, argIndex, "-testTreebank"); argIndex = argIndex + ArgUtils.NumSubArgs(args, argIndex) + 1; testTreebankPath = treebankDescription.First(); testTreebankFilter = treebankDescription.Second(); } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-output")) { outputPath = args[argIndex + 1]; argIndex += 2; } else { unusedArgs.Add(args[argIndex++]); } } } } if (modelPath == null) { throw new ArgumentException("Need to specify -model"); } if (testTreebankPath == null) { throw new ArgumentException("Need to specify -testTreebank"); } if (outputPath == null) { throw new ArgumentException("Need to specify -output"); } string[] newArgs = Sharpen.Collections.ToArray(unusedArgs, new string[unusedArgs.Count]); LexicalizedParser lexparser = ((LexicalizedParser)LexicalizedParser.LoadModel(modelPath, newArgs)); Treebank testTreebank = null; if (testTreebankPath != null) { log.Info("Reading in trees from " + testTreebankPath); if (testTreebankFilter != null) { log.Info("Filtering on " + testTreebankFilter); } testTreebank = lexparser.GetOp().tlpParams.MemoryTreebank(); testTreebank.LoadPath(testTreebankPath, testTreebankFilter); log.Info("Read in " + testTreebank.Count + " trees for testing"); } FileWriter @out = new FileWriter(outputPath); BufferedWriter bout = new BufferedWriter(@out); log.Info("Parsing " + testTreebank.Count + " trees"); int count = 0; IList <FindNearestNeighbors.ParseRecord> records = Generics.NewArrayList(); foreach (Tree goldTree in testTreebank) { IList <Word> tokens = goldTree.YieldWords(); IParserQuery parserQuery = lexparser.ParserQuery(); if (!parserQuery.Parse(tokens)) { throw new AssertionError("Could not parse: " + tokens); } if (!(parserQuery is RerankingParserQuery)) { throw new ArgumentException("Expected a LexicalizedParser with a Reranker attached"); } RerankingParserQuery rpq = (RerankingParserQuery)parserQuery; if (!(rpq.RerankerQuery() is DVModelReranker.Query)) { throw new ArgumentException("Expected a LexicalizedParser with a DVModel attached"); } DeepTree tree = ((DVModelReranker.Query)rpq.RerankerQuery()).GetDeepTrees()[0]; SimpleMatrix rootVector = null; foreach (KeyValuePair <Tree, SimpleMatrix> entry in tree.GetVectors()) { if (entry.Key.Label().Value().Equals("ROOT")) { rootVector = entry.Value; break; } } if (rootVector == null) { throw new AssertionError("Could not find root nodevector"); } @out.Write(tokens + "\n"); @out.Write(tree.GetTree() + "\n"); for (int i = 0; i < rootVector.GetNumElements(); ++i) { @out.Write(" " + rootVector.Get(i)); } @out.Write("\n\n\n"); count++; if (count % 10 == 0) { log.Info(" " + count); } records.Add(new FindNearestNeighbors.ParseRecord(tokens, goldTree, tree.GetTree(), rootVector, tree.GetVectors())); } log.Info(" done parsing"); IList <Pair <Tree, SimpleMatrix> > subtrees = Generics.NewArrayList(); foreach (FindNearestNeighbors.ParseRecord record in records) { foreach (KeyValuePair <Tree, SimpleMatrix> entry in record.nodeVectors) { if (entry.Key.GetLeaves().Count <= maxLength) { subtrees.Add(Pair.MakePair(entry.Key, entry.Value)); } } } log.Info("There are " + subtrees.Count + " subtrees in the set of trees"); PriorityQueue <ScoredObject <Pair <Tree, Tree> > > bestmatches = new PriorityQueue <ScoredObject <Pair <Tree, Tree> > >(101, ScoredComparator.DescendingComparator); for (int i_1 = 0; i_1 < subtrees.Count; ++i_1) { log.Info(subtrees[i_1].First().YieldWords()); log.Info(subtrees[i_1].First()); for (int j = 0; j < subtrees.Count; ++j) { if (i_1 == j) { continue; } // TODO: look at basic category? double normF = subtrees[i_1].Second().Minus(subtrees[j].Second()).NormF(); bestmatches.Add(new ScoredObject <Pair <Tree, Tree> >(Pair.MakePair(subtrees[i_1].First(), subtrees[j].First()), normF)); if (bestmatches.Count > 100) { bestmatches.Poll(); } } IList <ScoredObject <Pair <Tree, Tree> > > ordered = Generics.NewArrayList(); while (bestmatches.Count > 0) { ordered.Add(bestmatches.Poll()); } Java.Util.Collections.Reverse(ordered); foreach (ScoredObject <Pair <Tree, Tree> > pair in ordered) { log.Info(" MATCHED " + pair.Object().second.YieldWords() + " ... " + pair.Object().Second() + " with a score of " + pair.Score()); } log.Info(); log.Info(); bestmatches.Clear(); } /* * for (int i = 0; i < records.size(); ++i) { * if (i % 10 == 0) { * log.info(" " + i); * } * List<ScoredObject<ParseRecord>> scored = Generics.newArrayList(); * for (int j = 0; j < records.size(); ++j) { * if (i == j) continue; * * double score = 0.0; * int matches = 0; * for (Map.Entry<Tree, SimpleMatrix> first : records.get(i).nodeVectors.entrySet()) { * for (Map.Entry<Tree, SimpleMatrix> second : records.get(j).nodeVectors.entrySet()) { * String firstBasic = dvparser.dvModel.basicCategory(first.getKey().label().value()); * String secondBasic = dvparser.dvModel.basicCategory(second.getKey().label().value()); * if (firstBasic.equals(secondBasic)) { ++matches; * double normF = first.getValue().minus(second.getValue()).normF(); * score += normF * normF; * } * } * } * if (matches == 0) { * score = Double.POSITIVE_INFINITY; * } else { * score = score / matches; * } * //double score = records.get(i).vector.minus(records.get(j).vector).normF(); * scored.add(new ScoredObject<ParseRecord>(records.get(j), score)); * } * Collections.sort(scored, ScoredComparator.ASCENDING_COMPARATOR); * * out.write(records.get(i).sentence.toString() + "\n"); * for (int j = 0; j < numNeighbors; ++j) { * out.write(" " + scored.get(j).score() + ": " + scored.get(j).object().sentence + "\n"); * } * out.write("\n\n"); * } * log.info(); */ bout.Flush(); @out.Flush(); @out.Close(); }