internal virtual Tree BinarizeLocalTree(Tree t, int headNum, TaggedWord head) { //System.out.println("Working on: "+headNum+" -- "+t.label()); if (markovFactor) { string topCat = t.Label().Value(); ILabel newLabel = new CategoryWordTag(topCat, head.Word(), head.Tag()); t.SetLabel(newLabel); Tree t2; if (insideFactor) { t2 = MarkovInsideBinarizeLocalTreeNew(t, headNum, 0, t.NumChildren() - 1, true); } else { // t2 = markovInsideBinarizeLocalTree(t, head, headNum, topCat, false); t2 = MarkovOutsideBinarizeLocalTree(t, head, headNum, topCat, new LinkedList <Tree>(), false); } return(t2); } if (insideFactor) { return(InsideBinarizeLocalTree(t, headNum, head, 0, 0)); } return(OutsideBinarizeLocalTree(t, t.Label().Value(), t.Label().Value(), headNum, head, 0, string.Empty, 0, string.Empty)); }
/// <summary>Make a new label with this <code>String</code> as the "name".</summary> /// <param name="labelStr">The string to use as a label</param> /// <returns>The newly created Label</returns> public virtual ILabel NewLabelFromString(string labelStr) { CategoryWordTag cwt = new CategoryWordTag(); cwt.SetFromString(labelStr); return(cwt); }
/// <summary>Find the best (partial) parse within the parameter constraints.</summary> /// <param name="start">Sentence index of start of span (fenceposts, from 0 up)</param> /// <param name="end">Sentence index of end of span (right side fencepost)</param> /// <param name="hWord">Sentence index of head word (left side fencepost)</param> /// <param name="hTag">Tag assigned to hWord</param> /// <returns>The best parse tree within the parameter constraints</returns> private Tree ExtractBestParse(int start, int end, int hWord, int hTag) { string headWordStr = wordIndex.Get(words[hWord]); string headTagStr = tagIndex.Get(hTag); ILabel headLabel = new CategoryWordTag(headWordStr, headWordStr, headTagStr); int numTags = tagIndex.Size(); // deal with span 1 if (end - start == 1) { Tree leaf = tf.NewLeaf(new Word(headWordStr)); return(tf.NewTreeNode(headLabel, Java.Util.Collections.SingletonList(leaf))); } // find backtrace IList <Tree> children = new List <Tree>(); double bestScore = IScore(start, end, hWord, hTag); for (int split = start + 1; split < end; split++) { int binD = binDistance[hWord][split]; if (hWord < split) { for (int aWord = split; aWord < end; aWord++) { for (int aTag = 0; aTag < numTags; aTag++) { if (Matches(IScore(start, split, hWord, hTag) + IScore(split, end, aWord, aTag) + headScore[binD][hWord][dg.TagBin(hTag)][aWord][dg.TagBin(aTag)] + headStop[aWord][dg.TagBin(aTag)][split] + headStop[aWord][dg.TagBin(aTag)][end], bestScore)) { // build it children.Add(ExtractBestParse(start, split, hWord, hTag)); children.Add(ExtractBestParse(split, end, aWord, aTag)); return(tf.NewTreeNode(headLabel, children)); } } } } else { for (int aWord = start; aWord < split; aWord++) { for (int aTag = 0; aTag < numTags; aTag++) { if (Matches(IScore(start, split, aWord, aTag) + IScore(split, end, hWord, hTag) + headScore[binD][hWord][dg.TagBin(hTag)][aWord][dg.TagBin(aTag)] + headStop[aWord][dg.TagBin(aTag)][start] + headStop[aWord][dg.TagBin(aTag)][split], bestScore)) { children.Add(ExtractBestParse(start, split, aWord, aTag)); children.Add(ExtractBestParse(split, end, hWord, hTag)); // build it return(tf.NewTreeNode(headLabel, children)); } } } } } log.Info("Problem in ExhaustiveDependencyParser::extractBestParse"); return(null); }
private Tree OutsideBinarizeLocalTree(Tree t, string labelStr, string finalCat, int headNum, TaggedWord head, int leftProcessed, string leftStr, int rightProcessed, string rightStr) { IList <Tree> newChildren = new List <Tree>(2); ILabel label = new CategoryWordTag(labelStr, head.Word(), head.Tag()); // check if there are <=2 children already if (t.NumChildren() - leftProcessed - rightProcessed <= 2) { // done, return newChildren.Add(t.GetChild(leftProcessed)); if (t.NumChildren() - leftProcessed - rightProcessed == 2) { newChildren.Add(t.GetChild(leftProcessed + 1)); } return(tf.NewTreeNode(label, newChildren)); } if (headNum > leftProcessed) { // eat a left word Tree leftChild = t.GetChild(leftProcessed); string childLeftStr = leftStr + ' ' + leftChild.Label().Value(); string childLabelStr; if (simpleLabels) { childLabelStr = '@' + finalCat; } else { childLabelStr = '@' + finalCat + " :" + childLeftStr + " ..." + rightStr; } Tree rightChild = OutsideBinarizeLocalTree(t, childLabelStr, finalCat, headNum, head, leftProcessed + 1, childLeftStr, rightProcessed, rightStr); newChildren.Add(leftChild); newChildren.Add(rightChild); return(tf.NewTreeNode(label, newChildren)); } else { // eat a right word Tree rightChild = t.GetChild(t.NumChildren() - rightProcessed - 1); string childRightStr = ' ' + rightChild.Label().Value() + rightStr; string childLabelStr; if (simpleLabels) { childLabelStr = '@' + finalCat; } else { childLabelStr = '@' + finalCat + " :" + leftStr + " ..." + childRightStr; } Tree leftChild = OutsideBinarizeLocalTree(t, childLabelStr, finalCat, headNum, head, leftProcessed, leftStr, rightProcessed + 1, childRightStr); newChildren.Add(leftChild); newChildren.Add(rightChild); return(tf.NewTreeNode(label, newChildren)); } }
public virtual void TestCopy() { CategoryWordTag tag = new CategoryWordTag("A", "B", "C"); NUnit.Framework.Assert.AreEqual("A", tag.Category()); NUnit.Framework.Assert.AreEqual("B", tag.Word()); NUnit.Framework.Assert.AreEqual("C", tag.Tag()); CategoryWordTag tag2 = new CategoryWordTag(tag); NUnit.Framework.Assert.AreEqual("A", tag2.Category()); NUnit.Framework.Assert.AreEqual("B", tag2.Word()); NUnit.Framework.Assert.AreEqual("C", tag2.Tag()); }
private Tree TransformTreeHelper(Tree t) { if (t != null) { string cat = t.Label().Value(); if (t.IsLeaf()) { ILabel label = new Word(cat); //new CategoryWordTag(cat,cat,""); t.SetLabel(label); } else { Tree[] kids = t.Children(); foreach (Tree child in kids) { TransformTreeHelper(child); } // recursive call Tree headChild = hf.DetermineHead(t); string tag; string word; if (headChild == null) { log.Error("null head for tree\n" + t.ToString()); word = null; tag = null; } else { if (headChild.IsLeaf()) { tag = cat; word = headChild.Label().Value(); } else { CategoryWordTag headLabel = (CategoryWordTag)headChild.Label(); word = headLabel.Word(); tag = headLabel.Tag(); } } ILabel label = new CategoryWordTag(cat, word, tag); t.SetLabel(label); } } return(t); }
public virtual void TestCopy() { CategoryWordTag tag = new CategoryWordTag("A", "B", "C"); NUnit.Framework.Assert.AreEqual("A", tag.Category()); NUnit.Framework.Assert.AreEqual("B", tag.Word()); NUnit.Framework.Assert.AreEqual("C", tag.Tag()); CategoryWordTagFactory lf = new CategoryWordTagFactory(); ILabel label = lf.NewLabel(tag); NUnit.Framework.Assert.IsTrue(label is CategoryWordTag); CategoryWordTag tag2 = (CategoryWordTag)label; NUnit.Framework.Assert.AreEqual("A", tag2.Category()); NUnit.Framework.Assert.AreEqual("B", tag2.Word()); NUnit.Framework.Assert.AreEqual("C", tag2.Tag()); }
protected internal virtual Tree ExtractParse(Edge edge) { string head = wordIndex.Get(words[edge.head]); string tag = tagIndex.Get(edge.tag); string state = stateIndex.Get(edge.state); ILabel label = new CategoryWordTag(state, head, tag); if (edge.backEdge == null && edge.backHook == null) { // leaf, but needs word terminal Tree leaf; if (originalLabels[edge.head] != null) { leaf = tf.NewLeaf(originalLabels[edge.head]); } else { leaf = tf.NewLeaf(head); } IList <Tree> childList = Java.Util.Collections.SingletonList(leaf); return(tf.NewTreeNode(label, childList)); } if (edge.backHook == null) { // unary IList <Tree> childList = Java.Util.Collections.SingletonList(ExtractParse(edge.backEdge)); return(tf.NewTreeNode(label, childList)); } // binary IList <Tree> children = new List <Tree>(); if (edge.backHook.IsPreHook()) { children.Add(ExtractParse(edge.backEdge)); children.Add(ExtractParse(edge.backHook.backEdge)); } else { children.Add(ExtractParse(edge.backHook.backEdge)); children.Add(ExtractParse(edge.backEdge)); } return(tf.NewTreeNode(label, children)); }
public virtual Tree TransformTree(Tree tree) { ILabel lab = tree.Label(); if (tree.IsLeaf()) { Tree leaf = this.tf.NewLeaf(lab); leaf.SetScore(tree.Score()); return(leaf); } string s = lab.Value(); s = this._enclosing.TreebankLanguagePack().BasicCategory(s); int numKids = tree.NumChildren(); IList <Tree> children = new List <Tree>(numKids); for (int cNum = 0; cNum < numKids; cNum++) { Tree child = tree.GetChild(cNum); Tree newChild = this.TransformTree(child); // cdm 2007: for just subcategory stripping, null shouldn't happen // if (newChild != null) { children.Add(newChild); } // } // if (children.isEmpty()) { // return null; // } CategoryWordTag newLabel = new CategoryWordTag(lab); newLabel.SetCategory(s); if (lab is IHasTag) { string tag = ((IHasTag)lab).Tag(); tag = this._enclosing.TreebankLanguagePack().BasicCategory(tag); newLabel.SetTag(tag); } Tree node = this.tf.NewTreeNode(newLabel, children); node.SetScore(tree.Score()); return(node); }
// end class SubcategoryStripper public virtual Tree TransformTree(Tree tree) { ILabel lab = tree.Label(); if (tree.IsLeaf()) { Tree leaf = this.tf.NewLeaf(lab); leaf.SetScore(tree.Score()); return(leaf); } string s = lab.Value(); s = this._enclosing.TreebankLanguagePack().BasicCategory(s); s = this._enclosing.TreebankLanguagePack().StripGF(s); int numKids = tree.NumChildren(); IList <Tree> children = new List <Tree>(numKids); for (int cNum = 0; cNum < numKids; cNum++) { Tree child = tree.GetChild(cNum); Tree newChild = this.TransformTree(child); children.Add(newChild); } CategoryWordTag newLabel = new CategoryWordTag(lab); newLabel.SetCategory(s); if (lab is IHasTag) { string tag = ((IHasTag)lab).Tag(); tag = this._enclosing.TreebankLanguagePack().BasicCategory(tag); tag = this._enclosing.TreebankLanguagePack().StripGF(tag); newLabel.SetTag(tag); } Tree node = this.tf.NewTreeNode(newLabel, children); node.SetScore(tree.Score()); return(node); }
/// <summary>Binarizes the tree according to options set up in the constructor.</summary> /// <remarks> /// Binarizes the tree according to options set up in the constructor. /// Does the whole tree by calling itself recursively. /// </remarks> /// <param name="t"> /// A tree to be binarized. The non-leaf nodes must already have /// CategoryWordTag labels, with heads percolated. /// </param> /// <returns>A binary tree.</returns> public virtual Tree TransformTree(Tree t) { // handle null if (t == null) { return(null); } string cat = t.Label().Value(); // handle words if (t.IsLeaf()) { ILabel label = new Word(cat); //new CategoryWordTag(cat,cat,""); return(tf.NewLeaf(label)); } // handle tags if (t.IsPreTerminal()) { Tree childResult = TransformTree(t.GetChild(0)); string word = childResult.Value(); // would be nicer if Word/CWT ?? IList <Tree> newChildren = new List <Tree>(1); newChildren.Add(childResult); return(tf.NewTreeNode(new CategoryWordTag(cat, word, cat), newChildren)); } // handle categories Tree headChild = hf.DetermineHead(t); /* * System.out.println("### finding head for:"); * t.pennPrint(); * System.out.println("### its head is:"); * headChild.pennPrint(); */ if (headChild == null && !t.Label().Value().StartsWith(tlp.StartSymbol())) { log.Info("### No head found for:"); t.PennPrint(); } int headNum = -1; Tree[] kids = t.Children(); IList <Tree> newChildren_1 = new List <Tree>(kids.Length); for (int childNum = 0; childNum < kids.Length; childNum++) { Tree child = kids[childNum]; Tree childResult = TransformTree(child); // recursive call if (child == headChild) { headNum = childNum; } newChildren_1.Add(childResult); } Tree result; // XXXXX UPTO HERE!!! ALMOST DONE!!! if (t.Label().Value().StartsWith(tlp.StartSymbol())) { // handle the ROOT tree properly /* * //CategoryWordTag label = (CategoryWordTag) t.label(); * // binarize without the last kid and then add it back to the top tree * Tree lastKid = (Tree)newChildren.remove(newChildren.size()-1); * Tree tempTree = tf.newTreeNode(label, newChildren); * tempTree = binarizeLocalTree(tempTree, headNum, result.head); * newChildren = tempTree.getChildrenAsList(); * newChildren.add(lastKid); // add it back */ result = tf.NewTreeNode(t.Label(), newChildren_1); } else { // label shouldn't have changed // CategoryWordTag headLabel = (CategoryWordTag) headChild.label(); string word = ((IHasWord)headChild.Label()).Word(); string tag = ((IHasTag)headChild.Label()).Tag(); ILabel label = new CategoryWordTag(cat, word, tag); result = tf.NewTreeNode(label, newChildren_1); // cdm Mar 2005: invent a head so I don't have to rewrite all this // code, but with the removal of TreeHeadPair, some of the rest of // this should probably be rewritten too to not use this head variable TaggedWord head = new TaggedWord(word, tag); result = BinarizeLocalTree(result, headNum, head); } return(result); }
private Tree MarkovOutsideBinarizeLocalTree(Tree t, TaggedWord head, int headLoc, string topCat, LinkedList <Tree> ll, bool doneLeft) { string word = head.Word(); string tag = head.Tag(); IList <Tree> newChildren = new List <Tree>(2); // call with t, headNum, head, topCat, false if (headLoc == 0) { if (!doneLeft) { // insert a unary to separate the sides if (tlp.IsStartSymbol(topCat)) { return(MarkovOutsideBinarizeLocalTree(t, head, headLoc, topCat, new LinkedList <Tree>(), true)); } string subLabelStr; if (simpleLabels) { subLabelStr = '@' + topCat; } else { string headStr = t.GetChild(headLoc).Label().Value(); subLabelStr = '@' + topCat + ": " + headStr + " ]"; } ILabel subLabel = new CategoryWordTag(subLabelStr, word, tag); Tree subTree = tf.NewTreeNode(subLabel, t.GetChildrenAsList()); newChildren.Add(MarkovOutsideBinarizeLocalTree(subTree, head, headLoc, topCat, new LinkedList <Tree>(), true)); return(tf.NewTreeNode(t.Label(), newChildren)); } int len = t.NumChildren(); // len = 1 if (len == 1) { return(tf.NewTreeNode(t.Label(), Java.Util.Collections.SingletonList(t.GetChild(0)))); } ll.AddFirst(t.GetChild(len - 1)); if (ll.Count > markovOrder) { ll.RemoveLast(); } // generate a right string subLabelStr_1; if (simpleLabels) { subLabelStr_1 = '@' + topCat; } else { string headStr = t.GetChild(headLoc).Label().Value(); string rightStr = (len > markovOrder - 1 ? "... " : string.Empty) + Join(ll); subLabelStr_1 = '@' + topCat + ": " + headStr + ' ' + rightStr; } ILabel subLabel_1 = new CategoryWordTag(subLabelStr_1, word, tag); Tree subTree_1 = tf.NewTreeNode(subLabel_1, t.GetChildrenAsList().SubList(0, len - 1)); newChildren.Add(MarkovOutsideBinarizeLocalTree(subTree_1, head, headLoc, topCat, ll, true)); newChildren.Add(t.GetChild(len - 1)); return(tf.NewTreeNode(t.Label(), newChildren)); } if (headLoc > 0) { ll.AddLast(t.GetChild(0)); if (ll.Count > markovOrder) { ll.RemoveFirst(); } // generate a left string subLabelStr; if (simpleLabels) { subLabelStr = '@' + topCat; } else { string headStr = t.GetChild(headLoc).Label().Value(); string leftStr = Join(ll) + (headLoc > markovOrder - 1 ? " ..." : string.Empty); subLabelStr = '@' + topCat + ": " + leftStr + ' ' + headStr + " ]"; } ILabel subLabel = new CategoryWordTag(subLabelStr, word, tag); Tree subTree = tf.NewTreeNode(subLabel, t.GetChildrenAsList().SubList(1, t.NumChildren())); newChildren.Add(t.GetChild(0)); newChildren.Add(MarkovOutsideBinarizeLocalTree(subTree, head, headLoc - 1, topCat, ll, false)); return(tf.NewTreeNode(t.Label(), newChildren)); } return(t); }
public virtual ILabel NewLabelFromString(string labelStr) { CategoryWordTag cwt = new CategoryWordTag(); cwt.SetFromString(labelStr); return cwt; }