public static Tree NormalizeTree(Tree tree, TreeNormalizer tn, ITreeFactory tf) { foreach (Tree node in tree) { if (node.IsLeaf()) { node.Label().SetValue(tn.NormalizeTerminal(node.Label().Value())); } else { node.Label().SetValue(tn.NormalizeNonterminal(node.Label().Value())); } } return(tn.NormalizeWholeTree(tree, tf)); }
/// <exception cref="Java.Util.NoSuchElementException"/> private Tree GetTreeFromInputStream() { int wordIndex = 1; // FSA while (tokenizer.MoveNext()) { string token = tokenizer.Current; switch (token) { case leftParen: { // cdm 20100225: This next line used to have "" instead of null, but the traditional and current tree normalizers depend on the label being null not "" when there is no label on a tree (like the outermost English PTB level) string label = (tokenizer.Peek().Equals(leftParen)) ? null : tokenizer.Current; if (rightParen.Equals(label)) { //Skip past empty trees continue; } else { if (treeNormalizer != null) { label = treeNormalizer.NormalizeNonterminal(label); } } if (label != null) { label = StarPattern.Matcher(label).ReplaceAll("*"); label = SlashPattern.Matcher(label).ReplaceAll("/"); } Tree newTree = treeFactory.NewTreeNode(label, null); // dtrs are added below if (currentTree == null) { stack.Add(newTree); } else { currentTree.AddChild(newTree); stack.Add(currentTree); } currentTree = newTree; break; } case rightParen: { if (stack.IsEmpty()) { // Warn that file has too many right parentheses log.Info("PennTreeReader: warning: file has extra non-matching right parenthesis [ignored]"); goto label_break; } //Accept currentTree = stack.Remove(stack.Count - 1); // i.e., stack.pop() if (stack.IsEmpty()) { return(currentTree); } break; } default: { if (currentTree == null) { // A careful Reader should warn here, but it's kind of useful to // suppress this because then the TreeReader doesn't print a ton of // messages if there is a README file in a directory of Trees. // log.info("PennTreeReader: warning: file has extra token not in a s-expression tree: " + token + " [ignored]"); goto label_break; } string terminal = (treeNormalizer == null) ? token : treeNormalizer.NormalizeTerminal(token); terminal = StarPattern.Matcher(terminal).ReplaceAll("*"); terminal = SlashPattern.Matcher(terminal).ReplaceAll("/"); Tree leaf = treeFactory.NewLeaf(terminal); if (leaf.Label() is IHasIndex) { IHasIndex hi = (IHasIndex)leaf.Label(); hi.SetIndex(wordIndex); } if (leaf.Label() is IHasWord) { IHasWord hw = (IHasWord)leaf.Label(); hw.SetWord(leaf.Label().Value()); } if (leaf.Label() is IHasTag) { IHasTag ht = (IHasTag)leaf.Label(); ht.SetTag(currentTree.Label().Value()); } wordIndex++; currentTree.AddChild(leaf); // cdm: Note: this implementation just isn't as efficient as the old recursive descent parser (see 2008 code), where all the daughters are gathered before the tree is made.... break; } } label_continue :; } label_break :; //Reject if (currentTree != null) { log.Info("PennTreeReader: warning: incomplete tree (extra left parentheses in input): " + currentTree); } return(null); }