public static void RemoveNullProduction(PhrasalNode root) { Stack<PhrasalNode> nodestack = new Stack<PhrasalNode>(); Stack<bool> visitflags = new Stack<bool>(); nodestack.Push(root); visitflags.Push(false); while (nodestack.Count > 0) { var node = nodestack.Pop(); var visited = visitflags.Pop(); if (visited) { if (node.Children.Count == 0) { if (node.Parent != null && node != root) { node.Parent.Children.Remove(node); } } } else { // if terminal if (node.Children.Count == 0) { if (node.Tag == "-NONE-") { node.Parent.Children.Remove(node); } continue; } nodestack.Push(node); visitflags.Push(true); foreach (var chd in node.Children) { nodestack.Push(chd); visitflags.Push(false); } } } }
public static void RecoverFromXBarBinarize(PhrasalNode node) { if (node == null) { return; } foreach (var chd in node.Children) { RecoverFromXBarBinarize(chd); } if (node.Children.Count == 0) { return; } var children = new List<PhrasalNode>(); foreach (var chd in node.Children) { if (!chd.Tag.EndsWith("\\") && !chd.Tag.EndsWith("/")) { children.Add(chd); } else { var ctag = chd.Tag.Substring(0, chd.Tag.Length - 1); if (ctag == node.Tag || chd.Tag == node.Tag) { foreach (var gchd in chd.Children) { children.Add(gchd); gchd.Parent = node; } } else { chd.Tag = ctag; children.Add(chd); } } } node.Children = children; }
private void GetSentence(StringBuilder sb, PhrasalNode node) { if (node.Children.Count == 0) { sb.Append(node.Lex); sb.Append(' '); } else { foreach (var chd in node.Children) { GetSentence(sb, chd); } } }
private void GetParseLine(StringBuilder sb, PhrasalNode node) { if (node.Children.Count == 0) { sb.AppendFormat("({0} {1})", node.Tag, node.Lex); } else { sb.Append("("); sb.Append(node.Tag); sb.Append(" "); foreach (var chd in node.Children) { GetParseLine(sb, chd); } sb.Append(")"); } }
private void GetBracketsIgnorePunc(string[] pos, HashSet<string> bset, PhrasalNode node, int left, out int right) { if (node.Children.Count == 0) { right = left; //bset.Add(string.Format("{0} {1} {2}", node.Tag, left, right)); return; } else { int oldleft = left; right = left; foreach (var chd in node.Children) { GetBracketsIgnorePunc(pos, bset, chd, left, out right); left = right + 1; } int xleft = oldleft; int xright = right; while (xleft < xright) { if (IsPUNC(pos[xleft])) { xleft += 1; } else { break; } } while (xleft < xright) { if (IsPUNC(pos[xright])) { xright -= 1; } else { break; } } bset.Add(string.Format("{0} {1} {2}", node.Tag, xleft, xright)); } }
private void GetBrackets(HashSet<string> bset, PhrasalNode node, int left, out int right) { if (node.Children.Count == 0) { right = left; //bset.Add(string.Format("{0} {1} {2}", node.Tag, left, right)); return; } else { int oldleft = left; right = left; foreach (var chd in node.Children) { GetBrackets(bset, chd, left, out right); left = right + 1; } bset.Add(string.Format("{0} {1} {2}", node.Tag, oldleft, right)); } }
public static string DrawTextTree(PhrasalNode node) { var sbs = BuildTextTree (node); return string.Join ("\r\r", sbs); }
private static void AnnotateLex(string[] words, PhrasalNode node) { if (node.Children.Count == 0) { node.Lex = words [node.Start]; } else { foreach (var chd in node.Children) { AnnotateLex (words, chd); } } }
private static void ComputeStartEnd(int start, PhrasalNode node) { if (node == null) { return; } if (node.Children != null && node.Children.Count > 0) { int cstart = start; foreach (var child in node.Children) { ComputeStartEnd(cstart, child); cstart = child.End; } node.Start = start; node.End = cstart; } else { node.Start = start; node.End = start + 1; } }
public PhrasalTree(string line) { Root.Tag = "TOP"; if (string.IsNullOrWhiteSpace(line)) { return; } PhrasalNode thisnode = Root; try { bool insideStr = false; StringBuilder sb = new StringBuilder(); foreach (char c in line) { switch (c) { case ('('): if (insideStr) { thisnode.Tag = sb.ToString(); sb.Clear(); insideStr = false; } var chd = new PhrasalNode(); chd.Parent = thisnode; thisnode.Children.Add(chd); thisnode = chd; break; case (')'): if (insideStr) { thisnode.Lex = sb.ToString(); sb.Clear(); insideStr = false; } thisnode = thisnode.Parent; break; case (' '): case ('\t'): if (insideStr) { thisnode.Tag = sb.ToString(); sb.Clear(); insideStr = false; } break; default: insideStr = true; sb.Append(c); break; } } } catch { throw new Exception("Invalid Tree format!"); } if (thisnode != Root) { throw new Exception("Invalid Tree format!"); } if (Root.Children.Count != 1) { throw new Exception("Invalid Tree format! Multiple Root!"); } Root = Root.Children[0]; Root.Parent = null; ComputeStartEnd(); }
public static void CollapseUnaryRules(PhrasalNode node, Counter<string> unaryRuleCount) { if (node == null || node.Children.Count == 0) { return; } if (node.Children.Count == 1) { var ulist = new List<PhrasalNode>(); ulist.Add(node.Children[0]); var xnode = node.Children[0]; while (xnode.Children.Count == 1) { ulist.Add(xnode.Children[0]); xnode = xnode.Children[0]; } StringBuilder sb = new StringBuilder(node.Tag); foreach (var x in ulist) { sb.Append('\t'); sb.Append(x.Tag); } unaryRuleCount.Add(sb.ToString()); node.Children.Clear(); xnode = ulist[ulist.Count - 1]; if (node.Tag != xnode.Tag) { node.Children.Add(ulist[ulist.Count - 1]); } else { foreach (var c in xnode.Children) { node.Children.Add(c); } } } foreach (var chd in node.Children) { CollapseUnaryRules(chd, unaryRuleCount); } }
public PhrasalTree(PhrasalNode root) { Root = root; ComputeStartEnd(); }
private void BuildHyperGraph( PhrasalNode node, HyperGraph g, out HyperVertex v) { v = null; if (node == null) { return; } if (node.Children.Count == 0) { int pt = tagset.GetPTID(node.Tag); int wid = vocab.GetId(node.Lex, node.Start == 0); HyperVertex wv = new HyperVertex(true, wid, 1); HyperVertex pv = new HyperVertex(false, pt, rules.GetSubTagCount(pt)); HyperEdge pe = new HyperEdge(pv, wv, rules.GetTerminalRuleScores(pt, wid), rules.GetTerminalPosteriorCounts(pt, wid)); g.Es.Add(pe); g.Vs.Add(wv); g.Vs.Add(pv); v = pv; return; } else if (node.Children.Count == 1) { HyperVertex cv; BuildHyperGraph(node.Children [0], g, out cv); int pt = tagset.GetID(node.Tag); HyperVertex pv = new HyperVertex(false, pt, rules.GetSubTagCount(pt)); HyperEdge pe = new HyperEdge( pv, cv, rules.GetRuleScores(pt, cv.tag), rules.GetPosteriorCounts(pt, cv.tag)); g.Es.Add(pe); g.Vs.Add(pv); v = pv; return; } else if (node.Children.Count == 2) { HyperVertex lv; HyperVertex rv; BuildHyperGraph(node.Children [0], g, out lv); BuildHyperGraph(node.Children [1], g, out rv); int pt = tagset.GetID(node.Tag); HyperVertex pv = new HyperVertex(false, pt, rules.GetSubTagCount(pt)); HyperEdge pe = new HyperEdge(pv, lv, rv, rules.GetRuleScores(pv.tag, lv.tag, rv.tag), rules.GetPosteriorCounts(pv.tag, lv.tag, rv.tag)); g.Es.Add(pe); g.Vs.Add(pv); v = pv; return; } else { throw new Exception("tree node can only have at most 2 children"); } }
private void RemoveIdentityUnaryRule(PhrasalNode node) { if (node.Children.Count == 0) { return; } if (node.Children.Count == 1 && node.Children[0].Tag == node.Tag) { if (node.Children[0].Children.Count == 0) { return; } var chd = node.Children[0]; node.Children.Clear(); foreach (var gc in chd.Children) { gc.Parent = node; node.Children.Add(gc); } RemoveUnaryRule(node); } else { foreach (var chd in node.Children) { RemoveUnaryRule(chd); } } }
private static void LeftBranchBinarization(PhrasalNode node) { if (node.Children.Count > 2) { var fragNode = new PhrasalNode(); fragNode.Parent = node; if (node.Tag.EndsWith("/")) { fragNode.Tag = node.Tag; } else { fragNode.Tag = node.Tag + "/"; } for (int i = 0; i < node.Children.Count - 1; ++i) { node.Children[i].Parent = fragNode; fragNode.Children.Add(node.Children[i]); } var rchd = node.Children[node.Children.Count - 1]; node.Children.Clear(); node.Children.Add(fragNode); node.Children.Add(rchd); } foreach (var chd in node.Children) { LeftBranchBinarization(chd); } }
static PhraseBox GetPhraseBoxes(List<PhraseBox> blist, PhraseBox parentNode, PhrasalNode node, ref int hLvl, int vLvl) { if (node.Children.Count == 0) { var ptnode = new PhraseBox { horizontalLvl = hLvl, verticalLvl = vLvl, parent = parentNode, description = node.Tag }; var tnode = new PhraseBox { horizontalLvl = hLvl, verticalLvl = vLvl + 1, parent = ptnode, description = node.Lex }; blist.Add(ptnode); blist.Add(tnode); return ptnode; } else if (node.Children.Count == 1) { var thisNode = new PhraseBox(); var chdBox = GetPhraseBoxes(blist, thisNode, node.Children[0], ref hLvl, vLvl + 1); thisNode.verticalLvl = vLvl; thisNode.horizontalLvl = chdBox.horizontalLvl; thisNode.description = node.Tag; thisNode.parent = parentNode; blist.Add(thisNode); return thisNode; } else if (node.Children.Count % 2 == 0) { var thisNode = new PhraseBox(); // left nodes for (int i = 0; i < node.Children.Count / 2; ++i) { GetPhraseBoxes(blist, thisNode, node.Children[i], ref hLvl, vLvl + 1); hLvl += 1; } thisNode.verticalLvl = vLvl; thisNode.horizontalLvl = hLvl; thisNode.description = node.Tag; thisNode.parent = parentNode; hLvl += 1; for (int i = node.Children.Count / 2; i < node.Children.Count; ++i) { GetPhraseBoxes(blist, thisNode, node.Children[i], ref hLvl, vLvl + 1); hLvl += 1; } blist.Add(thisNode); return thisNode; } else { var thisNode = new PhraseBox(); // left nodes for (int i = 0; i < node.Children.Count / 2; ++i) { GetPhraseBoxes(blist, thisNode, node.Children[i], ref hLvl, vLvl + 1); hLvl += 1; } var chdBox = GetPhraseBoxes(blist, thisNode, node.Children[node.Children.Count / 2], ref hLvl, vLvl + 1); thisNode.verticalLvl = vLvl; thisNode.horizontalLvl = chdBox.horizontalLvl; thisNode.description = node.Tag; thisNode.parent = parentNode; hLvl += 1; for (int i = node.Children.Count / 2 + 1; i < node.Children.Count; ++i) { GetPhraseBoxes(blist, thisNode, node.Children[i], ref hLvl, vLvl + 1); hLvl += 1; } blist.Add(thisNode); return thisNode; } }
private static void RecoverFromLBCNF(PhrasalNode node) { if (node.Children.Count == 0) { return; } if (node.Children.Count > 1) { var firstchd = node.Children[0]; if (firstchd.Tag.IndexOf("/") >= 0) { var chdlist = new List<PhrasalNode>(); foreach (var chd in node.Children[0].Children) { chd.Parent = node; chdlist.Add(chd); } for (int i = 1; i < node.Children.Count; ++i) { chdlist.Add(node.Children[i]); } node.Children = chdlist; RecoverFromLBCNF(node); return; } } foreach (var chd in node.Children) { RecoverFromLBCNF(chd); } }
private PhrasalNode ExtractViterbiParse(HyperVertex v, int subtag, TagSet tagSet) { if (v == null || v.TYPE == VTYPE.TERMINAL) { return null; } PhrasalNode node = new PhrasalNode(); node.Tag = tagSet.GetTagString(v.tag); var bestEdge = v.traces[subtag].edge; if (bestEdge == null) { return node; } switch (bestEdge.TYPE) { case ETYPE.BINARY: var l = ExtractViterbiParse(bestEdge.from0, v.traces[subtag].subtag0, tagSet); var r = ExtractViterbiParse(bestEdge.from1, v.traces[subtag].subtag1, tagSet); node.Children.Add(l); node.Children.Add(r); l.Parent = node; r.Parent = node; break; case ETYPE.UNARY: var c = ExtractViterbiParse(bestEdge.from0, v.traces[subtag].subtag0, tagSet); node.Children.Add(c); c.Parent = node; break; case ETYPE.TERMINAL: break; case ETYPE.DUMMY: node = ExtractViterbiParse(bestEdge.from0, v.traces[subtag].subtag0, tagSet); break; default: throw new Exception("unknown edge type!"); } return node; }
private static void RemoveFunctionTags(PhrasalNode node) { if (node.Tag != null && node.Tag != "-NONE-") { string[] parts = node.Tag.Split(new string[] { "-", "/", "=" }, StringSplitOptions.RemoveEmptyEntries); node.Tag = parts[0]; } foreach (var chd in node.Children) { RemoveFunctionTags(chd); } }
private static List<StringBuilder> BuildTextTree(PhrasalNode node) { var sbs = new List<StringBuilder> (); if (node.Children.Count == 0) { var sb = new StringBuilder (); sb.Append (node.Lex); sb.Append (' '); sb.Append (node.Tag); sbs.Add (sb); } else { var chdSbs = new List<List<StringBuilder>> (); foreach (var chd in node.Children) { var chdsb = BuildTextTree (chd); chdSbs.Add (chdsb); } int maxLen = 0; foreach (var csbs in chdSbs) { foreach (var sb in csbs) { maxLen = Math.Max (maxLen, sb.Length); } } foreach (var csbs in chdSbs) { foreach (var sb in csbs) { char filler = sb [sb.Length - 1] == ' ' ? ' ' : '-'; while (sb.Length < maxLen) { sb.Append (filler); } } } for (int i = 0; i < chdSbs.Count / 2; ++i) { var csbs = chdSbs [i]; bool flag = false; foreach (var sb in csbs) { if (flag) { sb.Append (" |"); } else { if (sb [sb.Length - 1] != ' ') { sb.Append ("--\\"); flag = true; } else { if (i == 0) { sb.Append (" "); } else { sb.Append (" |"); } } } sbs.Add (sb); } } if (chdSbs.Count % 2 == 0) { var xsb = new StringBuilder (); for (int i = 0; i < maxLen; ++i) { xsb.Append (' '); } xsb.Append (" -"); sbs.Add (xsb); } else { var xsbs = chdSbs [chdSbs.Count / 2]; bool flag = chdSbs.Count > 1; foreach (var sb in xsbs) { if (sb [sb.Length - 1] != ' ') { sb.Append ("---"); } else { if (flag) { sb.Append (" |"); } else { sb.Append (" "); } } sbs.Add (sb); } } for (int i = chdSbs.Count / 2 + chdSbs.Count % 2; i < chdSbs.Count; ++i) { var csbs = chdSbs [i]; bool flag = false; foreach (var sb in csbs) { if (flag) { if (i == chdSbs.Count - 1) { sb.Append (" "); } else { sb.Append (" |"); } } else { if (sb [sb.Length - 1] != ' ') { sb.Append ("--/"); flag = true; } else { sb.Append (" |"); } } sbs.Add (sb); } } string xtag = node.Tag; int xlen = xtag.Length; foreach (var sb in sbs) { if (sb [sb.Length - 1] == '-') { sb.Append (xtag); } else { for (int i = 0; i < xlen; ++i) { sb.Append (' '); } } } } return sbs; }
public static void XBarBinarize(PhrasalNode node, bool isLeft) { if (node.Children.Count > 2) { var fragNode = new PhrasalNode(); fragNode.Parent = node; if (isLeft) { if (node.Tag.EndsWith("/")) { fragNode.Tag = node.Tag; } else { fragNode.Tag = node.Tag + "/"; } for (int i = 0; i < node.Children.Count - 1; ++i) { node.Children[i].Parent = fragNode; fragNode.Children.Add(node.Children[i]); } var rchd = node.Children[node.Children.Count - 1]; node.Children.Clear(); node.Children.Add(fragNode); node.Children.Add(rchd); } else { if (node.Tag.EndsWith("\\")) { fragNode.Tag = node.Tag; } else { fragNode.Tag = node.Tag + "\\"; } for (int i = 1; i < node.Children.Count; ++i) { node.Children[i].Parent = fragNode; fragNode.Children.Add(node.Children[i]); } var lchd = node.Children[0]; node.Children.Clear(); node.Children.Add(lchd); node.Children.Add(fragNode); } } foreach (var chd in node.Children) { XBarBinarize(chd, isLeft); } }