// returns Pair<node,foot> private Pair <Tree, Tree> CopyHelper(Tree node, IDictionary <string, Tree> newNamesToNodes, ITreeFactory treeFactory, ILabelFactory labelFactory) { Tree clone; Tree newFoot = null; if (node.IsLeaf()) { if (node == foot) { // found the foot node; pass it up. clone = treeFactory.NewTreeNode(node.Label(), new List <Tree>(0)); newFoot = clone; } else { clone = treeFactory.NewLeaf(labelFactory.NewLabel(node.Label())); } } else { IList <Tree> newChildren = new List <Tree>(node.Children().Length); foreach (Tree child in node.Children()) { Pair <Tree, Tree> newChild = CopyHelper(child, newNamesToNodes, treeFactory, labelFactory); newChildren.Add(newChild.First()); if (newChild.Second() != null) { if (newFoot != null) { log.Info("Error -- two feet found when copying auxiliary tree " + tree.ToString() + "; using last foot found."); } newFoot = newChild.Second(); } } clone = treeFactory.NewTreeNode(labelFactory.NewLabel(node.Label()), newChildren); } if (nodesToNames.Contains(node)) { newNamesToNodes[nodesToNames[node]] = clone; } return(new Pair <Tree, Tree>(clone, newFoot)); }
/// <summary>Turns a sentence into a flat phrasal tree.</summary> /// <remarks> /// Turns a sentence into a flat phrasal tree. /// The structure is S -> tag*. And then each tag goes to a word. /// The tag is either found from the label or made "WD". /// The tag and phrasal node have a StringLabel. /// </remarks> /// <param name="s">The Sentence to make the Tree from</param> /// <param name="lf">The LabelFactory with which to create the new Tree labels</param> /// <returns>The one phrasal level Tree</returns> public static Tree ToFlatTree <_T0>(IList <_T0> s, ILabelFactory lf) where _T0 : IHasWord { IList <Tree> daughters = new List <Tree>(s.Count); foreach (IHasWord word in s) { Tree wordNode = new LabeledScoredTreeNode(lf.NewLabel(word.Word())); if (word is TaggedWord) { TaggedWord taggedWord = (TaggedWord)word; wordNode = new LabeledScoredTreeNode(new StringLabel(taggedWord.Tag()), Java.Util.Collections.SingletonList(wordNode)); } else { wordNode = new LabeledScoredTreeNode(lf.NewLabel("WD"), Java.Util.Collections.SingletonList(wordNode)); } daughters.Add(wordNode); } return(new LabeledScoredTreeNode(new StringLabel("S"), daughters)); }
/// <summary>Add -TMP when not present within an NP</summary> /// <param name="tree">The tree to add temporal info to.</param> private void AddTMP9(Tree tree) { // do the head chain under it Tree ht = headFinder.DetermineHead(tree); // special fix for possessives! -- make noun before head if (ht.Value().Equals("POS")) { int j = tree.ObjectIndexOf(ht); if (j > 0) { ht = tree.GetChild(j - 1); } } // Note: this next bit changes the tree label, rather // than creating a new tree node. Beware! if (ht.IsPreTerminal() || ht.Value().StartsWith("NP") || ht.Value().StartsWith("PP") || ht.Value().StartsWith("ADVP")) { if (!TmpPattern.Matcher(ht.Value()).Matches()) { ILabelFactory lf = ht.LabelFactory(); // log.info("TMP: Changing " + ht.value() + " to " + // ht.value() + "-TMP"); ht.SetLabel(lf.NewLabel(ht.Value() + "-TMP")); } if (ht.Value().StartsWith("NP") || ht.Value().StartsWith("PP") || ht.Value().StartsWith("ADVP")) { AddTMP9(ht); } } // do the NPs under it (which may or may not be the head chain Tree[] kidlets = tree.Children(); foreach (Tree kidlet in kidlets) { ht = kidlet; ILabelFactory lf; if (tree.IsPrePreTerminal() && !TmpPattern.Matcher(ht.Value()).Matches()) { // log.info("TMP: Changing " + ht.value() + " to " + // ht.value() + "-TMP"); lf = ht.LabelFactory(); // Note: this next bit changes the tree label, rather // than creating a new tree node. Beware! ht.SetLabel(lf.NewLabel(ht.Value() + "-TMP")); } else { if (ht.Value().StartsWith("NP")) { // don't add -TMP twice! if (!TmpPattern.Matcher(ht.Value()).Matches()) { lf = ht.LabelFactory(); // log.info("TMP: Changing " + ht.value() + " to " + // ht.value() + "-TMP"); // Note: this next bit changes the tree label, rather // than creating a new tree node. Beware! ht.SetLabel(lf.NewLabel(ht.Value() + "-TMP")); } AddTMP9(ht); } } } }
/// <summary> /// If things match, this method destructively changes the children list /// of the tree t. When this method is called, t is an NP and there must /// be at least two children to the right of ccIndex. /// </summary> /// <param name="t">The tree to transform a conjunction in</param> /// <param name="ccIndex">The index of the CC child</param> /// <returns>t</returns> private static Tree TransformCc(Tree t, int ccIndex) { // use the factories of t to create new nodes ITreeFactory tf = t.TreeFactory(); ILabelFactory lf = t.Label().LabelFactory(); Tree[] ccSiblings = t.Children(); //check if other CC var ccPositions = new List <int>(); for (int i = ccIndex + 1; i < ccSiblings.Length; i++) { if (ccSiblings[i].Value().StartsWith(PartsOfSpeech.CoordinatingConjunction) && i < ccSiblings.Length - 1) { // second conjunct to ensure that a CC we add isn't the last child ccPositions.Add(i); } } // a CC b c ... -> (a CC b) c ... with b not a DT string beforeSibling = ccSiblings[ccIndex - 1].Value(); if (ccIndex == 1 && (beforeSibling == PartsOfSpeech.Determiner || beforeSibling == PartsOfSpeech.Adjective || beforeSibling == PartsOfSpeech.Adverb || !(ccSiblings[ccIndex + 1].Value() == PartsOfSpeech.Determiner)) && !(beforeSibling.StartsWith("NP") || beforeSibling.Equals("ADJP") || beforeSibling == PartsOfSpeech.NounPlural)) { // && (ccSiblings.Length == ccIndex + 3 || !ccPositions.isEmpty())) { // something like "soya or maize oil" string leftHead = GetHeadTag(ccSiblings[ccIndex - 1]); //create a new tree to be inserted as first child of t Tree left = tf.NewTreeNode(lf.NewLabel(leftHead), null); for (int i = 0; i < ccIndex + 2; i++) { left.AddChild(ccSiblings[i]); } // remove all the children of t before ccIndex+2 for (int i = 0; i < ccIndex + 2; i++) { t.RemoveChild(0); } // if stuff after (like "soya or maize oil and vegetables") // we need to put the tree in another tree if (ccPositions.Any()) { bool comma = false; int index = ccPositions[0]; if (ccSiblings[index - 1].Value() == PartsOfSpeech.Comma) { //to handle the case of a comma ("soya and maize oil, and vegetables") index = index - 1; comma = true; } string head = GetHeadTag(ccSiblings[index - 1]); if (ccIndex + 2 < index) { Tree tree = tf.NewTreeNode(lf.NewLabel(head), null); tree.AddChild(0, left); int k = 1; for (int j = ccIndex + 2; j < index; j++) { t.RemoveChild(0); tree.AddChild(k, ccSiblings[j]); k++; } t.AddChild(0, tree); } else { t.AddChild(0, left); } Tree rightTree = tf.NewTreeNode(lf.NewLabel(Noun), null); int start = 2; if (comma) { start++; } while (start < t.NumChildren()) { Tree sib = t.GetChild(start); t.RemoveChild(start); rightTree.AddChild(sib); } t.AddChild(rightTree); } else { t.AddChild(0, left); } } // DT a CC b c -> DT (a CC b) c else if (ccIndex == 2 && ccSiblings[0].Value().StartsWith("DT") && ccSiblings[ccIndex - 1].Value() != PartsOfSpeech.NounPlural && (ccSiblings.Length == 5 || (ccPositions.Any() && ccPositions[0] == 5))) { string head = GetHeadTag(ccSiblings[ccIndex - 1]); //create a new tree to be inserted as second child of t (after the determiner Tree child = tf.NewTreeNode(lf.NewLabel(head), null); for (int i = 1; i < ccIndex + 2; i++) { child.AddChild(ccSiblings[i]); } // remove all the children of t between the determiner and ccIndex+2 for (int i = 1; i < ccIndex + 2; i++) { t.RemoveChild(1); } t.AddChild(1, child); } // ... a, b CC c ... -> ... (a, b CC c) ... else if (ccIndex > 2 && ccSiblings[ccIndex - 2].Value() == PartsOfSpeech.Comma && ccSiblings[ccIndex - 1].Value() != PartsOfSpeech.NounPlural) { string head = GetHeadTag(ccSiblings[ccIndex - 1]); Tree child = tf.NewTreeNode(lf.NewLabel(head), null); for (int j = ccIndex - 3; j < ccIndex + 2; j++) { child.AddChild(ccSiblings[j]); } int i = ccIndex - 4; while (i > 0 && ccSiblings[i].Value() == PartsOfSpeech.Comma) { child.AddChild(0, ccSiblings[i]); // add the comma child.AddChild(0, ccSiblings[i - 1]); // add the word before the comma i = i - 2; } if (i < 0) { i = -1; } // remove the old children for (int j = i + 1; j < ccIndex + 2; j++) { t.RemoveChild(i + 1); } // put the new tree t.AddChild(i + 1, child); } // something like "the new phone book and tour guide" -> multiple heads // we want (NP the new phone book) (CC and) (NP tour guide) else { bool commaLeft = false; bool commaRight = false; bool preconj = false; int indexBegin = 0; Tree conjT = tf.NewTreeNode(lf.NewLabel(PartsOfSpeech.CoordinatingConjunction), null); // create the left tree string leftHead = GetHeadTag(ccSiblings[ccIndex - 1]); Tree left = tf.NewTreeNode(lf.NewLabel(leftHead), null); // handle the case of a preconjunct (either, both, neither) Tree first = ccSiblings[0]; string leaf = first.FirstChild().Value().ToLower(); if (leaf.Equals("either") || leaf.Equals("neither") || leaf.Equals("both")) { preconj = true; indexBegin = 1; conjT.AddChild(first.FirstChild()); } for (int i = indexBegin; i < ccIndex - 1; i++) { left.AddChild(ccSiblings[i]); } // handle the case of a comma ("GM soya and maize, and food ingredients") if (ccSiblings[ccIndex - 1].Value() == PartsOfSpeech.Comma) { commaLeft = true; } else { left.AddChild(ccSiblings[ccIndex - 1]); } // create the CC tree Tree cc = ccSiblings[ccIndex]; // create the right tree int nextCc; if (!ccPositions.Any()) { nextCc = ccSiblings.Length; } else { nextCc = ccPositions[0]; } string rightHead = GetHeadTag(ccSiblings[nextCc - 1]); Tree right = tf.NewTreeNode(lf.NewLabel(rightHead), null); for (int i = ccIndex + 1; i < nextCc - 1; i++) { right.AddChild(ccSiblings[i]); } // handle the case of a comma ("GM soya and maize, and food ingredients") if (ccSiblings[nextCc - 1].Value() == PartsOfSpeech.Comma) { commaRight = true; } else { right.AddChild(ccSiblings[nextCc - 1]); } // put trees together in old t, first we remove the old nodes for (int i = 0; i < nextCc; i++) { t.RemoveChild(0); } if (ccPositions.Any()) { // need an extra level Tree tree = tf.NewTreeNode(lf.NewLabel(Noun), null); if (preconj) { tree.AddChild(conjT); } if (left.NumChildren() > 0) { tree.AddChild(left); } if (commaLeft) { tree.AddChild(ccSiblings[ccIndex - 1]); } tree.AddChild(cc); if (right.NumChildren() > 0) { tree.AddChild(right); } if (commaRight) { t.AddChild(0, ccSiblings[nextCc - 1]); } t.AddChild(0, tree); } else { if (preconj) { t.AddChild(conjT); } if (left.NumChildren() > 0) { t.AddChild(left); } if (commaLeft) { t.AddChild(ccSiblings[ccIndex - 1]); } t.AddChild(cc); if (right.NumChildren() > 0) { t.AddChild(right); } if (commaRight) { t.AddChild(ccSiblings[nextCc - 1]); } } } return(t); }
public override Tree NewLeaf(string word) { return(new LabeledScoredTreeNode(lf.NewLabel(word))); }
public Tree NewLeaf(string word) { return(NewLeaf(mlf.NewLabel(word))); }
/// <summary> /// Create a new <code>TreeGraphNode</code> with the supplied label /// </summary> /// <param name="label">the label for this node</param> public TreeGraphNode(ILabel label) { this._label = (CoreLabel)Mlf.NewLabel(label); }
/// <summary> /// If things match, this method destructively changes the children list /// of the tree t. /// </summary> /// <remarks> /// If things match, this method destructively changes the children list /// of the tree t. When this method is called, t is an NP and there must /// be at least two children to the right of ccIndex. /// </remarks> /// <param name="t">The tree to transform a conjunction in</param> /// <param name="ccIndex">The index of the CC child</param> /// <returns>t</returns> private static Tree TransformCC(Tree t, int ccIndex) { if (Verbose) { log.Info("transformCC in: " + t); } //System.out.println(ccIndex); // use the factories of t to create new nodes ITreeFactory tf = t.TreeFactory(); ILabelFactory lf = t.Label().LabelFactory(); Tree[] ccSiblings = t.Children(); //check if other CC IList <int> ccPositions = new List <int>(); for (int i = ccIndex + 1; i < ccSiblings.Length; i++) { if (ccSiblings[i].Value().StartsWith("CC") && i < ccSiblings.Length - 1) { // second conjunct to ensure that a CC we add isn't the last child ccPositions.Add(int.Parse(i)); } } // a CC b c ... -> (a CC b) c ... with b not a DT string beforeSibling = ccSiblings[ccIndex - 1].Value(); if (ccIndex == 1 && (beforeSibling.Equals("DT") || beforeSibling.Equals("JJ") || beforeSibling.Equals("RB") || !(ccSiblings[ccIndex + 1].Value().Equals("DT"))) && !(beforeSibling.StartsWith("NP") || beforeSibling.Equals("ADJP") || beforeSibling .Equals("NNS"))) { // && (ccSiblings.length == ccIndex + 3 || !ccPositions.isEmpty())) { // something like "soya or maize oil" string leftHead = GetHeadTag(ccSiblings[ccIndex - 1]); //create a new tree to be inserted as first child of t Tree left = tf.NewTreeNode(lf.NewLabel(leftHead), null); for (int i_1 = 0; i_1 < ccIndex + 2; i_1++) { left.AddChild(ccSiblings[i_1]); } if (Verbose) { System.Console.Out.WriteLine("print left tree"); left.PennPrint(); System.Console.Out.WriteLine(); } // remove all the children of t before ccIndex+2 for (int i_2 = 0; i_2 < ccIndex + 2; i_2++) { t.RemoveChild(0); } if (Verbose) { if (t.NumChildren() == 0) { System.Console.Out.WriteLine("Youch! No t children"); } } // if stuff after (like "soya or maize oil and vegetables") // we need to put the tree in another tree if (!ccPositions.IsEmpty()) { bool comma = false; int index = ccPositions[0]; if (Verbose) { log.Info("more CC index " + index); } if (ccSiblings[index - 1].Value().Equals(",")) { //to handle the case of a comma ("soya and maize oil, and vegetables") index = index - 1; comma = true; } if (Verbose) { log.Info("more CC index " + index); } string head = GetHeadTag(ccSiblings[index - 1]); if (ccIndex + 2 < index) { Tree tree = tf.NewTreeNode(lf.NewLabel(head), null); tree.AddChild(0, left); int k = 1; for (int j = ccIndex + 2; j < index; j++) { if (Verbose) { ccSiblings[j].PennPrint(); } t.RemoveChild(0); tree.AddChild(k, ccSiblings[j]); k++; } if (Verbose) { System.Console.Out.WriteLine("print t"); t.PennPrint(); System.Console.Out.WriteLine("print tree"); tree.PennPrint(); System.Console.Out.WriteLine(); } t.AddChild(0, tree); } else { t.AddChild(0, left); } Tree rightTree = tf.NewTreeNode(lf.NewLabel("NP"), null); int start = 2; if (comma) { start++; } while (start < t.NumChildren()) { Tree sib = t.GetChild(start); t.RemoveChild(start); rightTree.AddChild(sib); } t.AddChild(rightTree); } else { t.AddChild(0, left); } } else { // DT a CC b c -> DT (a CC b) c if (ccIndex == 2 && ccSiblings[0].Value().StartsWith("DT") && !ccSiblings[ccIndex - 1].Value().Equals("NNS") && (ccSiblings.Length == 5 || (!ccPositions.IsEmpty() && ccPositions[0] == 5))) { string head = GetHeadTag(ccSiblings[ccIndex - 1]); //create a new tree to be inserted as second child of t (after the determiner Tree child = tf.NewTreeNode(lf.NewLabel(head), null); for (int i_1 = 1; i_1 < ccIndex + 2; i_1++) { child.AddChild(ccSiblings[i_1]); } if (Verbose) { if (child.NumChildren() == 0) { System.Console.Out.WriteLine("Youch! No child children"); } } // remove all the children of t between the determiner and ccIndex+2 //System.out.println("print left tree"); //child.pennPrint(); for (int i_2 = 1; i_2 < ccIndex + 2; i_2++) { t.RemoveChild(1); } t.AddChild(1, child); } else { // ... a, b CC c ... -> ... (a, b CC c) ... if (ccIndex > 2 && ccSiblings[ccIndex - 2].Value().Equals(",") && !ccSiblings[ccIndex - 1].Value().Equals("NNS")) { string head = GetHeadTag(ccSiblings[ccIndex - 1]); Tree child = tf.NewTreeNode(lf.NewLabel(head), null); for (int i_1 = ccIndex - 3; i_1 < ccIndex + 2; i_1++) { child.AddChild(ccSiblings[i_1]); } if (Verbose) { if (child.NumChildren() == 0) { System.Console.Out.WriteLine("Youch! No child children"); } } int i_2 = ccIndex - 4; while (i_2 > 0 && ccSiblings[i_2].Value().Equals(",")) { child.AddChild(0, ccSiblings[i_2]); // add the comma child.AddChild(0, ccSiblings[i_2 - 1]); // add the word before the comma i_2 = i_2 - 2; } if (i_2 < 0) { i_2 = -1; } // remove the old children for (int j = i_2 + 1; j < ccIndex + 2; j++) { t.RemoveChild(i_2 + 1); } // put the new tree t.AddChild(i_2 + 1, child); } else { // something like "the new phone book and tour guide" -> multiple heads // we want (NP the new phone book) (CC and) (NP tour guide) bool commaLeft = false; bool commaRight = false; bool preconj = false; int indexBegin = 0; Tree conjT = tf.NewTreeNode(lf.NewLabel("CC"), null); // create the left tree string leftHead = GetHeadTag(ccSiblings[ccIndex - 1]); Tree left = tf.NewTreeNode(lf.NewLabel(leftHead), null); // handle the case of a preconjunct (either, both, neither) Tree first = ccSiblings[0]; string leaf = first.FirstChild().Value().ToLower(); if (leaf.Equals("either") || leaf.Equals("neither") || leaf.Equals("both")) { preconj = true; indexBegin = 1; conjT.AddChild(first.FirstChild()); } for (int i_1 = indexBegin; i_1 < ccIndex - 1; i_1++) { left.AddChild(ccSiblings[i_1]); } // handle the case of a comma ("GM soya and maize, and food ingredients") if (ccSiblings[ccIndex - 1].Value().Equals(",")) { commaLeft = true; } else { left.AddChild(ccSiblings[ccIndex - 1]); } // create the CC tree Tree cc = ccSiblings[ccIndex]; // create the right tree int nextCC; if (ccPositions.IsEmpty()) { nextCC = ccSiblings.Length; } else { nextCC = ccPositions[0]; } string rightHead = GetHeadTag(ccSiblings[nextCC - 1]); Tree right = tf.NewTreeNode(lf.NewLabel(rightHead), null); for (int i_2 = ccIndex + 1; i_2 < nextCC - 1; i_2++) { right.AddChild(ccSiblings[i_2]); } // handle the case of a comma ("GM soya and maize, and food ingredients") if (ccSiblings[nextCC - 1].Value().Equals(",")) { commaRight = true; } else { right.AddChild(ccSiblings[nextCC - 1]); } if (Verbose) { if (left.NumChildren() == 0) { System.Console.Out.WriteLine("Youch! No left children"); } if (right.NumChildren() == 0) { System.Console.Out.WriteLine("Youch! No right children"); } } // put trees together in old t, first we remove the old nodes for (int i_3 = 0; i_3 < nextCC; i_3++) { t.RemoveChild(0); } if (!ccPositions.IsEmpty()) { // need an extra level Tree tree = tf.NewTreeNode(lf.NewLabel("NP"), null); if (preconj) { tree.AddChild(conjT); } if (left.NumChildren() > 0) { tree.AddChild(left); } if (commaLeft) { tree.AddChild(ccSiblings[ccIndex - 1]); } tree.AddChild(cc); if (right.NumChildren() > 0) { tree.AddChild(right); } if (commaRight) { t.AddChild(0, ccSiblings[nextCC - 1]); } t.AddChild(0, tree); } else { if (preconj) { t.AddChild(conjT); } if (left.NumChildren() > 0) { t.AddChild(left); } if (commaLeft) { t.AddChild(ccSiblings[ccIndex - 1]); } t.AddChild(cc); if (right.NumChildren() > 0) { t.AddChild(right); } if (commaRight) { t.AddChild(ccSiblings[nextCC - 1]); } } } } } if (Verbose) { log.Info("transformCC out: " + t); } return(t); }
/// <summary> /// Create a new /// <c>TreeGraphNode</c> /// with the supplied /// label. /// </summary> /// <param name="label">the label for this node.</param> public TreeGraphNode(ILabel label) { // = null; this.label = (CoreLabel)mlf.NewLabel(label); }