/// <summary>Build a parse tree node corresponding to the word in the given XML node.</summary> private Tree BuildWordNode(INode root) { IElement eRoot = (IElement)root; string posStr = GetPOS(eRoot); posStr = treeNormalizer.NormalizeNonterminal(posStr); string lemma = eRoot.GetAttribute(AttrLemma); string word = GetWord(eRoot); string leafStr = treeNormalizer.NormalizeTerminal(word); Tree leafNode = treeFactory.NewLeaf(leafStr); if (leafNode.Label() is IHasWord) { ((IHasWord)leafNode.Label()).SetWord(leafStr); } if (leafNode.Label() is IHasLemma && lemma != null) { ((IHasLemma)leafNode.Label()).SetLemma(lemma); } IList <Tree> kids = new List <Tree>(); kids.Add(leafNode); Tree t = treeFactory.NewTreeNode(posStr, kids); if (t.Label() is IHasTag) { ((IHasTag)t.Label()).SetTag(posStr); } return(t); }
protected internal virtual Tree TransformTreeHelper(Tree t) { if (t.IsLeaf()) { Tree leaf = tf.NewLeaf(t.Label()); leaf.SetScore(t.Score()); return(leaf); } IList <Tree> newChildren = new List <Tree>(); for (int childNum = 0; childNum < numKids; childNum++) { Tree child = t.GetChild(childNum); Tree newChild = TransformTreeHelper(child); if ((!newChild.IsLeaf()) && newChild.Label().Value().IndexOf('@') >= 0) { Sharpen.Collections.AddAll(newChildren, newChild.GetChildrenAsList()); } else { newChildren.Add(newChild); } } Tree node = tf.NewTreeNode(t.Label(), newChildren); node.SetScore(t.Score()); return(node); }
public virtual Tree UntransformTree(Tree tree) { ITreeFactory tf = tree.TreeFactory(); if (tree.IsPrePreTerminal()) { if (tree.FirstChild().Label().Value().Matches(".*_.")) { StringBuilder word = new StringBuilder(); for (int i = 0; i < tree.Children().Length; i++) { Tree child = tree.Children()[i]; word.Append(child.FirstChild().Label().Value()); } Tree newChild = tf.NewLeaf(word.ToString()); tree.SetChildren(Java.Util.Collections.SingletonList(newChild)); } } else { for (int i = 0; i < tree.Children().Length; i++) { Tree child = tree.Children()[i]; UntransformTree(child); } } return(tree); }
public virtual Tree Helper(Tree t) { if (t == null) { return(null); } else { if (t.IsLeaf()) { return(tf.NewLeaf(t.Label().Value())); } else { if (t.IsPreTerminal()) { return(tf.NewTreeNode(t.Label().Value(), Java.Util.Collections.SingletonList(Helper(t.Children()[0])))); } else { int numKids = t.NumChildren(); IList <Tree> children = new List <Tree>(numKids); for (int k = 0; k < numKids; k++) { children.Add(Helper(t.Children()[k])); } return(tf.NewTreeNode(t.Label().Value(), children)); } } } }
/// <summary>Find the best (partial) parse within the parameter constraints.</summary> /// <param name="start">Sentence index of start of span (fenceposts, from 0 up)</param> /// <param name="end">Sentence index of end of span (right side fencepost)</param> /// <param name="hWord">Sentence index of head word (left side fencepost)</param> /// <param name="hTag">Tag assigned to hWord</param> /// <returns>The best parse tree within the parameter constraints</returns> private Tree ExtractBestParse(int start, int end, int hWord, int hTag) { string headWordStr = wordIndex.Get(words[hWord]); string headTagStr = tagIndex.Get(hTag); ILabel headLabel = new CategoryWordTag(headWordStr, headWordStr, headTagStr); int numTags = tagIndex.Size(); // deal with span 1 if (end - start == 1) { Tree leaf = tf.NewLeaf(new Word(headWordStr)); return(tf.NewTreeNode(headLabel, Java.Util.Collections.SingletonList(leaf))); } // find backtrace IList <Tree> children = new List <Tree>(); double bestScore = IScore(start, end, hWord, hTag); for (int split = start + 1; split < end; split++) { int binD = binDistance[hWord][split]; if (hWord < split) { for (int aWord = split; aWord < end; aWord++) { for (int aTag = 0; aTag < numTags; aTag++) { if (Matches(IScore(start, split, hWord, hTag) + IScore(split, end, aWord, aTag) + headScore[binD][hWord][dg.TagBin(hTag)][aWord][dg.TagBin(aTag)] + headStop[aWord][dg.TagBin(aTag)][split] + headStop[aWord][dg.TagBin(aTag)][end], bestScore)) { // build it children.Add(ExtractBestParse(start, split, hWord, hTag)); children.Add(ExtractBestParse(split, end, aWord, aTag)); return(tf.NewTreeNode(headLabel, children)); } } } } else { for (int aWord = start; aWord < split; aWord++) { for (int aTag = 0; aTag < numTags; aTag++) { if (Matches(IScore(start, split, aWord, aTag) + IScore(split, end, hWord, hTag) + headScore[binD][hWord][dg.TagBin(hTag)][aWord][dg.TagBin(aTag)] + headStop[aWord][dg.TagBin(aTag)][start] + headStop[aWord][dg.TagBin(aTag)][split], bestScore)) { children.Add(ExtractBestParse(start, split, aWord, aTag)); children.Add(ExtractBestParse(split, end, hWord, hTag)); // build it return(tf.NewTreeNode(headLabel, children)); } } } } } log.Info("Problem in ExhaustiveDependencyParser::extractBestParse"); return(null); }
public virtual Tree TransformTree(Tree tree) { ILabel l = tree.Label(); if (tree.IsLeaf()) { return(tf.NewLeaf(l)); } string s = l.Value(); s = tlpp.TreebankLanguagePack().BasicCategory(s); if (deletePunct) { // this is broken as it's not the right thing to do when there // is any tag ambiguity -- and there is for ' (POS/''). Sentences // can then have more or less words. It's also unnecessary for EVALB, // since it ignores punctuation anyway if (tree.IsPreTerminal() && tlpp.TreebankLanguagePack().IsEvalBIgnoredPunctuationTag(s)) { return(null); } } // TEMPORARY: eliminate the TOPP constituent if (tree.Children()[0].Label().Value().Equals("TOPP")) { log.Info("Found a TOPP"); tree.SetChildren(tree.Children()[0].Children()); } // Negra has lots of non-unary roots; delete unary roots if (tlpp.TreebankLanguagePack().IsStartSymbol(s) && tree.NumChildren() == 1) { // NB: This deletes the boundary symbol, which is in the tree! return(TransformTree(tree.GetChild(0))); } IList <Tree> children = new List <Tree>(); for (int cNum = 0; cNum < numC; cNum++) { Tree child = tree.GetChild(cNum); Tree newChild = TransformTree(child); if (newChild != null) { children.Add(newChild); } } if (children.IsEmpty()) { return(null); } return(tf.NewTreeNode(new StringLabel(s), children)); }
private Tree TransformTree(Tree tree, bool isRoot) { string label = tree.Label().Value(); // log.info("ChineseCollinizer: Node label is " + label); if (tree.IsLeaf()) { if (deletePunct && ctlp.IsPunctuationWord(label)) { return(null); } else { return(tf.NewLeaf(new StringLabel(label))); } } if (tree.IsPreTerminal() && deletePunct && ctlp.IsPunctuationTag(label)) { // System.out.println("Deleting punctuation"); return(null); } IList <Tree> children = new List <Tree>(); if (label.Matches("ROOT.*") && tree.NumChildren() == 1) { // keep non-unary roots for now return(TransformTree(tree.Children()[0], true)); } //System.out.println("Enhanced label is " + label); // remove all functional and machine-generated annotations label = label.ReplaceFirst("[^A-Z].*$", string.Empty); // merge parentheticals with adverb phrases label = label.ReplaceFirst("PRN", "ADVP"); //System.out.println("New label is " + label); for (int cNum = 0; cNum < tree.Children().Length; cNum++) { Tree child = tree.Children()[cNum]; Tree newChild = TransformTree(child, false); if (newChild != null) { children.Add(newChild); } } // We don't delete the root because there are trees in the // Chinese treebank that only have punctuation in them!!! if (children.IsEmpty() && !isRoot) { return(null); } return(tf.NewTreeNode(new StringLabel(label), children)); }
// static Set preterminals = new HashSet(); public override Tree TransformTree(Tree tree) { ITreeFactory tf = tree.TreeFactory(); string tag = tree.Label().Value(); if (tree.IsPreTerminal()) { string word = tree.FirstChild().Label().Value(); IList <Tree> newPreterms = new List <Tree>(); for (int i = 0; i < size; i++) { string singleCharLabel = new string(new char[] { word[i] }); Tree newLeaf = tf.NewLeaf(singleCharLabel); string suffix; if (word.Length == 1) { suffix = "_S"; } else { if (i == 0) { suffix = "_B"; } else { if (i == word.Length - 1) { suffix = "_E"; } else { suffix = "_M"; } } } newPreterms.Add(tf.NewTreeNode(tag + suffix, Java.Util.Collections.SingletonList <Tree>(newLeaf))); } return(tf.NewTreeNode(tag, newPreterms)); } else { IList <Tree> newChildren = new List <Tree>(); for (int i = 0; i < tree.Children().Length; i++) { Tree child = tree.Children()[i]; newChildren.Add(TransformTree(child)); } return(tf.NewTreeNode(tag, newChildren)); } }
internal virtual Tree TransformNode(Tree tree, ITreeFactory tf) { if (tree.IsLeaf()) { return(tf.NewLeaf(tree.Label())); } if (tree.IsPreTerminal()) { return(tf.NewTreeNode(tree.Label(), Java.Util.Collections.SingletonList(tf.NewLeaf(tree.Children()[0].Label())))); } IList <Tree> children = tree.GetChildrenAsList(); LinkedList <Tree> newChildren = new LinkedList <Tree>(); // promote lower punctuation foreach (Tree child in children) { LinkedList <Tree> preTerms = PreTerms(child); while (!preTerms.IsEmpty() && IsPunc(preTerms.GetFirst())) { newChildren.Add(preTerms.GetFirst()); preTerms.RemoveFirst(); } Tree newChild = TransformNode(child, tf); LinkedList <Tree> temp = new LinkedList <Tree>(); if (newChild.Children().Length > 0) { newChildren.Add(newChild); } while (!preTerms.IsEmpty() && IsPunc(preTerms.GetLast())) { temp.AddFirst(preTerms.GetLast()); preTerms.RemoveLast(); } Sharpen.Collections.AddAll(newChildren, temp); } // remove local punctuation while (!newChildren.IsEmpty() && IsPunc(newChildren.GetFirst())) { newChildren.RemoveFirst(); } while (!newChildren.IsEmpty() && IsPunc(newChildren.GetLast())) { newChildren.RemoveLast(); } return(tf.NewTreeNode(tree.Label(), newChildren)); }
// returns Pair<node,foot> private Pair <Tree, Tree> CopyHelper(Tree node, IDictionary <string, Tree> newNamesToNodes, ITreeFactory treeFactory, ILabelFactory labelFactory) { Tree clone; Tree newFoot = null; if (node.IsLeaf()) { if (node == foot) { // found the foot node; pass it up. clone = treeFactory.NewTreeNode(node.Label(), new List <Tree>(0)); newFoot = clone; } else { clone = treeFactory.NewLeaf(labelFactory.NewLabel(node.Label())); } } else { IList <Tree> newChildren = new List <Tree>(node.Children().Length); foreach (Tree child in node.Children()) { Pair <Tree, Tree> newChild = CopyHelper(child, newNamesToNodes, treeFactory, labelFactory); newChildren.Add(newChild.First()); if (newChild.Second() != null) { if (newFoot != null) { log.Info("Error -- two feet found when copying auxiliary tree " + tree.ToString() + "; using last foot found."); } newFoot = newChild.Second(); } } clone = treeFactory.NewTreeNode(labelFactory.NewLabel(node.Label()), newChildren); } if (nodesToNames.Contains(node)) { newNamesToNodes[nodesToNames[node]] = clone; } return(new Pair <Tree, Tree>(clone, newFoot)); }
/// <summary>Binarizes the tree according to options set up in the constructor.</summary> /// <remarks> /// Binarizes the tree according to options set up in the constructor. /// Does the whole tree by calling itself recursively. /// </remarks> /// <param name="t"> /// A tree to be binarized. The non-leaf nodes must already have /// CategoryWordTag labels, with heads percolated. /// </param> /// <returns>A binary tree.</returns> public virtual Tree TransformTree(Tree t) { // handle null if (t == null) { return(null); } string cat = t.Label().Value(); // handle words if (t.IsLeaf()) { ILabel label = new Word(cat); //new CategoryWordTag(cat,cat,""); return(tf.NewLeaf(label)); } // handle tags if (t.IsPreTerminal()) { Tree childResult = TransformTree(t.GetChild(0)); string word = childResult.Value(); // would be nicer if Word/CWT ?? IList <Tree> newChildren = new List <Tree>(1); newChildren.Add(childResult); return(tf.NewTreeNode(new CategoryWordTag(cat, word, cat), newChildren)); } // handle categories Tree headChild = hf.DetermineHead(t); /* * System.out.println("### finding head for:"); * t.pennPrint(); * System.out.println("### its head is:"); * headChild.pennPrint(); */ if (headChild == null && !t.Label().Value().StartsWith(tlp.StartSymbol())) { log.Info("### No head found for:"); t.PennPrint(); } int headNum = -1; Tree[] kids = t.Children(); IList <Tree> newChildren_1 = new List <Tree>(kids.Length); for (int childNum = 0; childNum < kids.Length; childNum++) { Tree child = kids[childNum]; Tree childResult = TransformTree(child); // recursive call if (child == headChild) { headNum = childNum; } newChildren_1.Add(childResult); } Tree result; // XXXXX UPTO HERE!!! ALMOST DONE!!! if (t.Label().Value().StartsWith(tlp.StartSymbol())) { // handle the ROOT tree properly /* * //CategoryWordTag label = (CategoryWordTag) t.label(); * // binarize without the last kid and then add it back to the top tree * Tree lastKid = (Tree)newChildren.remove(newChildren.size()-1); * Tree tempTree = tf.newTreeNode(label, newChildren); * tempTree = binarizeLocalTree(tempTree, headNum, result.head); * newChildren = tempTree.getChildrenAsList(); * newChildren.add(lastKid); // add it back */ result = tf.NewTreeNode(t.Label(), newChildren_1); } else { // label shouldn't have changed // CategoryWordTag headLabel = (CategoryWordTag) headChild.label(); string word = ((IHasWord)headChild.Label()).Word(); string tag = ((IHasTag)headChild.Label()).Tag(); ILabel label = new CategoryWordTag(cat, word, tag); result = tf.NewTreeNode(label, newChildren_1); // cdm Mar 2005: invent a head so I don't have to rewrite all this // code, but with the removal of TreeHeadPair, some of the rest of // this should probably be rewritten too to not use this head variable TaggedWord head = new TaggedWord(word, tag); result = BinarizeLocalTree(result, headNum, head); } return(result); }
public virtual Tree TransformTree(Tree tree) { if (tree == null) { return(null); } ITreeFactory tf = tree.TreeFactory(); string s = tree.Value(); if (tlp.IsStartSymbol(s)) { return(TransformTree(tree.FirstChild())); } if (tree.IsLeaf()) { return(tf.NewLeaf(tree.Label())); } s = tlp.BasicCategory(s); if (((whOption & 1) != 0) && s.StartsWith("WH")) { s = Sharpen.Runtime.Substring(s, 2); } if ((whOption & 2) != 0) { s = s.ReplaceAll("^WP", "PRP"); // does both WP and WP$ !! s = s.ReplaceAll("^WDT", "DT"); s = s.ReplaceAll("^WRB", "RB"); } if (((whOption & 4) != 0) && s.StartsWith("WH")) { s = Sharpen.Runtime.Substring(s, 2); } // wsg2010: Might need a better way to deal with tag ambiguity. This still doesn't handle the // case where the GOLD tree does not label a punctuation mark as such (common in French), and // the guess tree does. if (deletePunct && tree.IsPreTerminal() && (tlp.IsEvalBIgnoredPunctuationTag(s) || tlp.IsPunctuationWord(tree.FirstChild().Value()))) { return(null); } // remove the extra NPs inserted in the collinsBaseNP option if (fixCollinsBaseNP && s.Equals("NP")) { Tree[] kids = tree.Children(); if (kids.Length == 1 && tlp.BasicCategory(kids[0].Value()).Equals("NP")) { return(TransformTree(kids[0])); } } // Magerman erased this distinction, and everyone else has followed like sheep... if (s.Equals("PRT")) { s = "ADVP"; } IList <Tree> children = new List <Tree>(); for (int cNum = 0; cNum < numKids; cNum++) { Tree child = tree.Children()[cNum]; Tree newChild = TransformTree(child); if (newChild != null) { children.Add(newChild); } } if (children.IsEmpty()) { return(null); } Tree node = tf.NewTreeNode(tree.Label(), children); node.SetValue(s); return(node); }
private Tree GetTreeFromInputStream() { int wordIndex = 0; // FSA //label: while (tokenizer.HasNext()) { string token = tokenizer.Next(); switch (token) { case LeftParen: // cdm 20100225: This next line used to have "" instead of null, but the traditional and current tree normalizers depend on the label being null not "" when there is no label on a tree (like the outermost English PTB level) string label = (tokenizer.Peek().Equals(LeftParen)) ? null : tokenizer.Next(); if (RightParen.Equals(label)) { //Skip past empty trees continue; } else if (treeNormalizer != null) { label = treeNormalizer.NormalizeNonterminal(label); } if (label != null) { label = StarPattern.Replace(label, "*"); label = SlashPattern.Replace(label, "/"); } Tree newTree = treeFactory.NewTreeNode(label, null); // dtrs are added below if (currentTree == null) { stack.Add(newTree); } else { currentTree.AddChild(newTree); stack.Add(currentTree); } currentTree = newTree; break; case RightParen: if (!stack.Any()) { // Warn that file has too many right parens //break label; goto post_while_label; } //Accept currentTree = stack.Last(); stack.RemoveAt(stack.Count - 1); // i.e., stack.pop() if (!stack.Any()) { return(currentTree); } break; default: if (currentTree == null) { // A careful Reader should warn here, but it's kind of useful to // suppress this because then the TreeReader doesn't print a ton of // messages if there is a README file in a directory of Trees. //break label; goto post_while_label; } string terminal = (treeNormalizer == null) ? token : treeNormalizer.NormalizeTerminal(token); terminal = StarPattern.Replace(terminal, "*"); terminal = SlashPattern.Replace(terminal, "/"); Tree leaf = treeFactory.NewLeaf(terminal); if (leaf.Label() is IHasIndex) { var hi = (IHasIndex)leaf.Label(); hi.SetIndex(wordIndex); } if (leaf.Label() is IHasWord) { var hw = (IHasWord)leaf.Label(); hw.SetWord(leaf.Label().Value()); } wordIndex++; currentTree.AddChild(leaf); // cdm: Note: this implementation just isn't as efficient as the old recursive descent parser (see 2008 code), where all the daughters are gathered before the tree is made.... break; } } post_while_label: { } //Reject return(null); }
private Tree GetTreeFromXML(INode root) { IElement eRoot = (IElement)root; if (eRoot.GetNodeName().Equals(NodeWord) && eRoot.GetElementsByTagName(NodeWord).GetLength() == 0) { string posStr = GetPOS(eRoot); posStr = treeNormalizer.NormalizeNonterminal(posStr); IList <string> lemmas = GetLemma(eRoot); string morph = GetMorph(eRoot); IList <string> leafToks = GetWordString(eRoot.GetTextContent().Trim()); string subcat = GetSubcat(eRoot); if (lemmas != null && lemmas.Count != leafToks.Count) { // If this happens (and it does for a few poorly editted trees) // we assume something has gone wrong and ignore the lemmas. log.Info("Lemmas don't match tokens, ignoring lemmas: " + "lemmas " + lemmas + ", tokens " + leafToks); lemmas = null; } //Terminals can have multiple tokens (MWEs). Make these into a //flat structure for now. Tree t = null; IList <Tree> kids = new List <Tree>(); if (leafToks.Count > 1) { for (int i = 0; i < leafToks.Count; ++i) { string tok = leafToks[i]; string s = treeNormalizer.NormalizeTerminal(tok); IList <Tree> leafList = new List <Tree>(); Tree leafNode = treeFactory.NewLeaf(s); if (leafNode.Label() is IHasWord) { ((IHasWord)leafNode.Label()).SetWord(s); } if (leafNode.Label() is CoreLabel && lemmas != null) { ((CoreLabel)leafNode.Label()).SetLemma(lemmas[i]); } if (leafNode.Label() is IHasContext) { ((IHasContext)leafNode.Label()).SetOriginalText(morph); } if (leafNode.Label() is IHasCategory) { ((IHasCategory)leafNode.Label()).SetCategory(subcat); } leafList.Add(leafNode); Tree posNode = treeFactory.NewTreeNode(MissingPos, leafList); if (posNode.Label() is IHasTag) { ((IHasTag)posNode.Label()).SetTag(MissingPos); } kids.Add(posNode); } t = treeFactory.NewTreeNode(MissingPhrasal, kids); } else { string leafStr = treeNormalizer.NormalizeTerminal(leafToks[0]); Tree leafNode = treeFactory.NewLeaf(leafStr); if (leafNode.Label() is IHasWord) { ((IHasWord)leafNode.Label()).SetWord(leafStr); } if (leafNode.Label() is CoreLabel && lemmas != null) { ((CoreLabel)leafNode.Label()).SetLemma(lemmas[0]); } if (leafNode.Label() is IHasContext) { ((IHasContext)leafNode.Label()).SetOriginalText(morph); } if (leafNode.Label() is IHasCategory) { ((IHasCategory)leafNode.Label()).SetCategory(subcat); } kids.Add(leafNode); t = treeFactory.NewTreeNode(posStr, kids); if (t.Label() is IHasTag) { ((IHasTag)t.Label()).SetTag(posStr); } } return(t); } IList <Tree> kids_1 = new List <Tree>(); for (INode childNode = eRoot.GetFirstChild(); childNode != null; childNode = childNode.GetNextSibling()) { if (childNode.GetNodeType() != NodeConstants.ElementNode) { continue; } Tree t = GetTreeFromXML(childNode); if (t == null) { System.Console.Error.Printf("%s: Discarding empty tree (root: %s)%n", this.GetType().FullName, childNode.GetNodeName()); } else { kids_1.Add(t); } } // MWEs have a label with a string rootLabel = eRoot.GetNodeName().Trim(); bool isMWE = rootLabel.Equals("w") && eRoot.HasAttribute(AttrPos); if (isMWE) { rootLabel = eRoot.GetAttribute(AttrPos).Trim(); } Tree t_1 = (kids_1.Count == 0) ? null : treeFactory.NewTreeNode(treeNormalizer.NormalizeNonterminal(rootLabel), kids_1); if (t_1 != null && isMWE) { t_1 = PostProcessMWE(t_1); } return(t_1); }
public virtual Tree TransformTreeHelper(Tree t, Tree root, ITreeFactory tf) { Tree result; Tree parent; string parentStr; string grandParentStr; if (root == null || t.Equals(root)) { parent = null; parentStr = string.Empty; } else { parent = t.Parent(root); parentStr = parent.Label().Value(); } if (parent == null || parent.Equals(root)) { grandParentStr = string.Empty; } else { Tree grandParent = parent.Parent(root); grandParentStr = grandParent.Label().Value(); } string cat = t.Label().Value(); string baseParentStr = tlpParams.TreebankLanguagePack().BasicCategory(parentStr); string baseGrandParentStr = tlpParams.TreebankLanguagePack().BasicCategory(grandParentStr); if (t.IsLeaf()) { return(tf.NewLeaf(new Word(t.Label().Value()))); } string word = t.HeadTerminal(hf).Value(); if (t.IsPreTerminal()) { nonTerms.IncrementCount(t.Label().Value()); } else { nonTerms.IncrementCount(t.Label().Value()); if (trainOptions.postPA && !trainOptions.smoothing && baseParentStr.Length > 0) { string cat2; if (trainOptions.postSplitWithBaseCategory) { cat2 = cat + '^' + baseParentStr; } else { cat2 = cat + '^' + parentStr; } if (!trainOptions.selectivePostSplit || trainOptions.postSplitters.Contains(cat2)) { cat = cat2; } } if (trainOptions.postGPA && !trainOptions.smoothing && grandParentStr.Length > 0) { string cat2; if (trainOptions.postSplitWithBaseCategory) { cat2 = cat + '~' + baseGrandParentStr; } else { cat2 = cat + '~' + grandParentStr; } if (trainOptions.selectivePostSplit) { if (cat.Contains("^") && trainOptions.postSplitters.Contains(cat2)) { cat = cat2; } } else { cat = cat2; } } } result = tf.NewTreeNode(new CategoryWordTag(cat, word, cat), Collections.EmptyList <Tree>()); List <Tree> newKids = new List <Tree>(); Tree[] kids = t.Children(); foreach (Tree kid in kids) { newKids.Add(TransformTreeHelper(kid, root, tf)); } result.SetChildren(newKids); return(result); }
/// <exception cref="Java.Util.NoSuchElementException"/> private Tree GetTreeFromInputStream() { int wordIndex = 1; // FSA while (tokenizer.MoveNext()) { string token = tokenizer.Current; switch (token) { case leftParen: { // cdm 20100225: This next line used to have "" instead of null, but the traditional and current tree normalizers depend on the label being null not "" when there is no label on a tree (like the outermost English PTB level) string label = (tokenizer.Peek().Equals(leftParen)) ? null : tokenizer.Current; if (rightParen.Equals(label)) { //Skip past empty trees continue; } else { if (treeNormalizer != null) { label = treeNormalizer.NormalizeNonterminal(label); } } if (label != null) { label = StarPattern.Matcher(label).ReplaceAll("*"); label = SlashPattern.Matcher(label).ReplaceAll("/"); } Tree newTree = treeFactory.NewTreeNode(label, null); // dtrs are added below if (currentTree == null) { stack.Add(newTree); } else { currentTree.AddChild(newTree); stack.Add(currentTree); } currentTree = newTree; break; } case rightParen: { if (stack.IsEmpty()) { // Warn that file has too many right parentheses log.Info("PennTreeReader: warning: file has extra non-matching right parenthesis [ignored]"); goto label_break; } //Accept currentTree = stack.Remove(stack.Count - 1); // i.e., stack.pop() if (stack.IsEmpty()) { return(currentTree); } break; } default: { if (currentTree == null) { // A careful Reader should warn here, but it's kind of useful to // suppress this because then the TreeReader doesn't print a ton of // messages if there is a README file in a directory of Trees. // log.info("PennTreeReader: warning: file has extra token not in a s-expression tree: " + token + " [ignored]"); goto label_break; } string terminal = (treeNormalizer == null) ? token : treeNormalizer.NormalizeTerminal(token); terminal = StarPattern.Matcher(terminal).ReplaceAll("*"); terminal = SlashPattern.Matcher(terminal).ReplaceAll("/"); Tree leaf = treeFactory.NewLeaf(terminal); if (leaf.Label() is IHasIndex) { IHasIndex hi = (IHasIndex)leaf.Label(); hi.SetIndex(wordIndex); } if (leaf.Label() is IHasWord) { IHasWord hw = (IHasWord)leaf.Label(); hw.SetWord(leaf.Label().Value()); } if (leaf.Label() is IHasTag) { IHasTag ht = (IHasTag)leaf.Label(); ht.SetTag(currentTree.Label().Value()); } wordIndex++; currentTree.AddChild(leaf); // cdm: Note: this implementation just isn't as efficient as the old recursive descent parser (see 2008 code), where all the daughters are gathered before the tree is made.... break; } } label_continue :; } label_break :; //Reject if (currentTree != null) { log.Info("PennTreeReader: warning: incomplete tree (extra left parentheses in input): " + currentTree); } return(null); }