/// <summary> /// Reads a single tree in standard Penn Treebank format from the /// input stream. /// </summary> /// <remarks> /// Reads a single tree in standard Penn Treebank format from the /// input stream. The method supports additional parentheses around the /// tree (an unnamed ROOT node) so long as they are balanced. If the token stream /// ends before the current tree is complete, then the method will throw an /// <code>IOException</code>. /// <p> /// Note that the method will skip malformed trees and attempt to /// read additional trees from the input stream. It is possible, however, /// that a malformed tree will corrupt the token stream. In this case, /// an <code>IOException</code> will eventually be thrown. /// </remarks> /// <returns>A single tree, or <code>null</code> at end of token stream.</returns> /// <exception cref="System.IO.IOException"/> public virtual Tree ReadTree() { Tree t = null; while (tokenizer.MoveNext() && t == null) { //Setup PDA this.currentTree = null; this.stack = new List <Tree>(); try { t = GetTreeFromInputStream(); } catch (NoSuchElementException) { throw new IOException("End of token stream encountered before parsing could complete."); } if (t != null) { // cdm 20100618: Don't do this! This was never the historical behavior!!! // Escape empty trees e.g. (()) // while(t != null && (t.value() == null || t.value().equals("")) && t.numChildren() <= 1) // t = t.firstChild(); if (treeNormalizer != null && treeFactory != null) { t = treeNormalizer.NormalizeWholeTree(t, treeFactory); } if (t != null) { t.IndexLeaves(true); } } } return(t); }
public static Tree NormalizeTree(Tree tree, TreeNormalizer tn, ITreeFactory tf) { foreach (Tree node in tree) { if (node.IsLeaf()) { node.Label().SetValue(tn.NormalizeTerminal(node.Label().Value())); } else { node.Label().SetValue(tn.NormalizeNonterminal(node.Label().Value())); } } return(tn.NormalizeWholeTree(tree, tf)); }