private Tree MarkovOutsideBinarizeLocalTree(Tree t, TaggedWord head, int headLoc, string topCat, LinkedList <Tree> ll, bool doneLeft) { string word = head.Word(); string tag = head.Tag(); IList <Tree> newChildren = new List <Tree>(2); // call with t, headNum, head, topCat, false if (headLoc == 0) { if (!doneLeft) { // insert a unary to separate the sides if (tlp.IsStartSymbol(topCat)) { return(MarkovOutsideBinarizeLocalTree(t, head, headLoc, topCat, new LinkedList <Tree>(), true)); } string subLabelStr; if (simpleLabels) { subLabelStr = '@' + topCat; } else { string headStr = t.GetChild(headLoc).Label().Value(); subLabelStr = '@' + topCat + ": " + headStr + " ]"; } ILabel subLabel = new CategoryWordTag(subLabelStr, word, tag); Tree subTree = tf.NewTreeNode(subLabel, t.GetChildrenAsList()); newChildren.Add(MarkovOutsideBinarizeLocalTree(subTree, head, headLoc, topCat, new LinkedList <Tree>(), true)); return(tf.NewTreeNode(t.Label(), newChildren)); } int len = t.NumChildren(); // len = 1 if (len == 1) { return(tf.NewTreeNode(t.Label(), Java.Util.Collections.SingletonList(t.GetChild(0)))); } ll.AddFirst(t.GetChild(len - 1)); if (ll.Count > markovOrder) { ll.RemoveLast(); } // generate a right string subLabelStr_1; if (simpleLabels) { subLabelStr_1 = '@' + topCat; } else { string headStr = t.GetChild(headLoc).Label().Value(); string rightStr = (len > markovOrder - 1 ? "... " : string.Empty) + Join(ll); subLabelStr_1 = '@' + topCat + ": " + headStr + ' ' + rightStr; } ILabel subLabel_1 = new CategoryWordTag(subLabelStr_1, word, tag); Tree subTree_1 = tf.NewTreeNode(subLabel_1, t.GetChildrenAsList().SubList(0, len - 1)); newChildren.Add(MarkovOutsideBinarizeLocalTree(subTree_1, head, headLoc, topCat, ll, true)); newChildren.Add(t.GetChild(len - 1)); return(tf.NewTreeNode(t.Label(), newChildren)); } if (headLoc > 0) { ll.AddLast(t.GetChild(0)); if (ll.Count > markovOrder) { ll.RemoveFirst(); } // generate a left string subLabelStr; if (simpleLabels) { subLabelStr = '@' + topCat; } else { string headStr = t.GetChild(headLoc).Label().Value(); string leftStr = Join(ll) + (headLoc > markovOrder - 1 ? " ..." : string.Empty); subLabelStr = '@' + topCat + ": " + leftStr + ' ' + headStr + " ]"; } ILabel subLabel = new CategoryWordTag(subLabelStr, word, tag); Tree subTree = tf.NewTreeNode(subLabel, t.GetChildrenAsList().SubList(1, t.NumChildren())); newChildren.Add(t.GetChild(0)); newChildren.Add(MarkovOutsideBinarizeLocalTree(subTree, head, headLoc - 1, topCat, ll, false)); return(tf.NewTreeNode(t.Label(), newChildren)); } return(t); }
public virtual Tree TransformTree(Tree tree) { if (tree == null) { return(null); } ITreeFactory tf = tree.TreeFactory(); string s = tree.Value(); if (tlp.IsStartSymbol(s)) { return(TransformTree(tree.FirstChild())); } if (tree.IsLeaf()) { return(tf.NewLeaf(tree.Label())); } s = tlp.BasicCategory(s); if (((whOption & 1) != 0) && s.StartsWith("WH")) { s = Sharpen.Runtime.Substring(s, 2); } if ((whOption & 2) != 0) { s = s.ReplaceAll("^WP", "PRP"); // does both WP and WP$ !! s = s.ReplaceAll("^WDT", "DT"); s = s.ReplaceAll("^WRB", "RB"); } if (((whOption & 4) != 0) && s.StartsWith("WH")) { s = Sharpen.Runtime.Substring(s, 2); } // wsg2010: Might need a better way to deal with tag ambiguity. This still doesn't handle the // case where the GOLD tree does not label a punctuation mark as such (common in French), and // the guess tree does. if (deletePunct && tree.IsPreTerminal() && (tlp.IsEvalBIgnoredPunctuationTag(s) || tlp.IsPunctuationWord(tree.FirstChild().Value()))) { return(null); } // remove the extra NPs inserted in the collinsBaseNP option if (fixCollinsBaseNP && s.Equals("NP")) { Tree[] kids = tree.Children(); if (kids.Length == 1 && tlp.BasicCategory(kids[0].Value()).Equals("NP")) { return(TransformTree(kids[0])); } } // Magerman erased this distinction, and everyone else has followed like sheep... if (s.Equals("PRT")) { s = "ADVP"; } IList <Tree> children = new List <Tree>(); for (int cNum = 0; cNum < numKids; cNum++) { Tree child = tree.Children()[cNum]; Tree newChild = TransformTree(child); if (newChild != null) { children.Add(newChild); } } if (children.IsEmpty()) { return(null); } Tree node = tf.NewTreeNode(tree.Label(), children); node.SetValue(s); return(node); }