public static void RemoveNullProduction(PhrasalNode root)
        {
            Stack<PhrasalNode> nodestack = new Stack<PhrasalNode>();
            Stack<bool> visitflags = new Stack<bool>();

            nodestack.Push(root);

            visitflags.Push(false);

            while (nodestack.Count > 0)
            {
                var node = nodestack.Pop();
                var visited = visitflags.Pop();

                if (visited)
                {
                    if (node.Children.Count == 0)
                    {
                        if (node.Parent != null && node != root)
                        {
                            node.Parent.Children.Remove(node);
                        }
                    }
                }
                else
                {
                    // if terminal
                    if (node.Children.Count == 0)
                    {
                        if (node.Tag == "-NONE-")
                        {
                            node.Parent.Children.Remove(node);
                        }

                        continue;
                    }

                    nodestack.Push(node);
                    visitflags.Push(true);

                    foreach (var chd in node.Children)
                    {
                        nodestack.Push(chd);
                        visitflags.Push(false);
                    }
                }

            }
        }
        public static void RecoverFromXBarBinarize(PhrasalNode node)
        {
            if (node == null)
            {
                return;
            }

            foreach (var chd in node.Children)
            {
                RecoverFromXBarBinarize(chd);
            }

            if (node.Children.Count == 0)
            {
                return;
            }

            var children = new List<PhrasalNode>();

            foreach (var chd in node.Children)
            {
                if (!chd.Tag.EndsWith("\\") && !chd.Tag.EndsWith("/"))
                {
                    children.Add(chd);
                }
                else
                {
                    var ctag = chd.Tag.Substring(0, chd.Tag.Length - 1);

                    if (ctag == node.Tag || chd.Tag == node.Tag)
                    {
                        foreach (var gchd in chd.Children)
                        {
                            children.Add(gchd);
                            gchd.Parent = node;
                        }
                    }
                    else
                    {
                        chd.Tag = ctag;
                        children.Add(chd);
                    }
                }
            }

            node.Children = children;
        }
Exemple #3
0
 private void GetSentence(StringBuilder sb, PhrasalNode node)
 {
     if (node.Children.Count == 0)
     {
         sb.Append(node.Lex);
         sb.Append(' ');
     }
     else
     {
         foreach (var chd in node.Children)
         {
             GetSentence(sb, chd);
         }
     }
 }
Exemple #4
0
 private void GetParseLine(StringBuilder sb, PhrasalNode node)
 {
     if (node.Children.Count == 0)
     {
         sb.AppendFormat("({0} {1})", node.Tag, node.Lex);
     }
     else
     {
         sb.Append("(");
         sb.Append(node.Tag);
         sb.Append(" ");
         foreach (var chd in node.Children)
         {
             GetParseLine(sb, chd);
         }
         sb.Append(")");
     }
 }
Exemple #5
0
        private void GetBracketsIgnorePunc(string[] pos, HashSet<string> bset, PhrasalNode node, int left, out int right)
        {
            if (node.Children.Count == 0)
            {
                right = left;
                //bset.Add(string.Format("{0} {1} {2}", node.Tag, left, right));
                return;
            }
            else
            {
                int oldleft = left;
                right = left;
                foreach (var chd in node.Children)
                {
                    GetBracketsIgnorePunc(pos, bset, chd, left, out right);
                    left = right + 1;
                }

                int xleft = oldleft;
                int xright = right;

                while (xleft < xright)
                {
                    if (IsPUNC(pos[xleft]))
                    {
                        xleft += 1;
                    }
                    else
                    {
                        break;
                    }
                }

                while (xleft < xright)
                {
                    if (IsPUNC(pos[xright]))
                    {
                        xright -= 1;
                    }
                    else
                    {
                        break;
                    }
                }

                bset.Add(string.Format("{0} {1} {2}", node.Tag, xleft, xright));
            }
        }
Exemple #6
0
        private void GetBrackets(HashSet<string> bset, PhrasalNode node, int left, out int right)
        {
            if (node.Children.Count == 0)
            {
                right = left;
                //bset.Add(string.Format("{0} {1} {2}", node.Tag, left, right));
                return;
            }
            else
            {
                int oldleft = left;
                right = left;
                foreach (var chd in node.Children)
                {
                    GetBrackets(bset, chd, left, out right);
                    left = right + 1;
                }

                bset.Add(string.Format("{0} {1} {2}", node.Tag, oldleft, right));
            }
        }
Exemple #7
0
        public static string DrawTextTree(PhrasalNode node)
        {
            var sbs = BuildTextTree (node);

            return string.Join ("\r\r", sbs);
        }
 private static void AnnotateLex(string[] words, PhrasalNode node)
 {
     if (node.Children.Count == 0) {
         node.Lex = words [node.Start];
     } else {
         foreach (var chd in node.Children) {
             AnnotateLex (words, chd);
         }
     }
 }
Exemple #9
0
        private static void ComputeStartEnd(int start, PhrasalNode node)
        {
            if (node == null)
            {
                return;
            }

            if (node.Children != null && node.Children.Count > 0)
            {
                int cstart = start;
                foreach (var child in node.Children)
                {
                    ComputeStartEnd(cstart, child);
                    cstart = child.End;
                }
                node.Start = start;
                node.End = cstart;
            }
            else
            {
                node.Start = start;
                node.End = start + 1;
            }
        }
Exemple #10
0
        public PhrasalTree(string line)
        {
            Root.Tag = "TOP";

            if (string.IsNullOrWhiteSpace(line))
            {
                return;
            }

            PhrasalNode thisnode = Root;

            try
            {
                bool insideStr = false;

                StringBuilder sb = new StringBuilder();

                foreach (char c in line)
                {
                    switch (c)
                    {
                        case ('('):
                            if (insideStr)
                            {
                                thisnode.Tag = sb.ToString();
                                sb.Clear();
                                insideStr = false;
                            }
                            var chd = new PhrasalNode();
                            chd.Parent = thisnode;
                            thisnode.Children.Add(chd);
                            thisnode = chd;
                            break;
                        case (')'):
                            if (insideStr)
                            {
                                thisnode.Lex = sb.ToString();
                                sb.Clear();
                                insideStr = false;
                            }

                            thisnode = thisnode.Parent;
                            break;

                        case (' '):
                        case ('\t'):

                            if (insideStr)
                            {
                                thisnode.Tag = sb.ToString();
                                sb.Clear();
                                insideStr = false;
                            }
                            break;
                        default:
                            insideStr = true;
                            sb.Append(c);
                            break;
                    }
                }
            }
            catch
            {
                throw new Exception("Invalid Tree format!");
            }

            if (thisnode != Root)
            {
                throw new Exception("Invalid Tree format!");
            }

            if (Root.Children.Count != 1)
            {
                throw new Exception("Invalid Tree format! Multiple Root!");
            }

            Root = Root.Children[0];

            Root.Parent = null;

            ComputeStartEnd();
        }
Exemple #11
0
        public static void CollapseUnaryRules(PhrasalNode node, Counter<string> unaryRuleCount)
        {
            if (node == null || node.Children.Count == 0)
            {
                return;
            }

            if (node.Children.Count == 1)
            {
                var ulist = new List<PhrasalNode>();
                ulist.Add(node.Children[0]);
                var xnode = node.Children[0];

                while (xnode.Children.Count == 1)
                {
                    ulist.Add(xnode.Children[0]);

                    xnode = xnode.Children[0];
                }

                StringBuilder sb = new StringBuilder(node.Tag);

                foreach (var x in ulist)
                {
                    sb.Append('\t');
                    sb.Append(x.Tag);
                }

                unaryRuleCount.Add(sb.ToString());

                node.Children.Clear();

                xnode = ulist[ulist.Count - 1];

                if (node.Tag != xnode.Tag)
                {
                    node.Children.Add(ulist[ulist.Count - 1]);
                }
                else
                {
                    foreach (var c in xnode.Children)
                    {
                        node.Children.Add(c);
                    }
                }
            }

            foreach (var chd in node.Children)
            {
                CollapseUnaryRules(chd, unaryRuleCount);
            }
        }
Exemple #12
0
 public PhrasalTree(PhrasalNode root)
 {
     Root = root;
     ComputeStartEnd();
 }
Exemple #13
0
        private void BuildHyperGraph(
            PhrasalNode node,
            HyperGraph g,
            out HyperVertex v)
        {
            v = null;
            if (node == null)
            {
                return;
            }

            if (node.Children.Count == 0)
            {
                int pt = tagset.GetPTID(node.Tag);
                int wid = vocab.GetId(node.Lex, node.Start == 0);

                HyperVertex wv = new HyperVertex(true, wid, 1);

                HyperVertex pv = new HyperVertex(false, pt, rules.GetSubTagCount(pt));

                HyperEdge pe = new HyperEdge(pv, wv,
                    rules.GetTerminalRuleScores(pt, wid),
                    rules.GetTerminalPosteriorCounts(pt, wid));

                g.Es.Add(pe);
                g.Vs.Add(wv);
                g.Vs.Add(pv);
                v = pv;
                return;
            } else if (node.Children.Count == 1)
            {
                HyperVertex cv;
                BuildHyperGraph(node.Children [0], g, out cv);

                int pt = tagset.GetID(node.Tag);
                HyperVertex pv = new HyperVertex(false, pt, rules.GetSubTagCount(pt));

                HyperEdge pe = new HyperEdge(
                    pv, cv, rules.GetRuleScores(pt, cv.tag),
                    rules.GetPosteriorCounts(pt, cv.tag));

                g.Es.Add(pe);
                g.Vs.Add(pv);
                v = pv;
                return;
            } else if (node.Children.Count == 2)
            {
                HyperVertex lv;
                HyperVertex rv;

                BuildHyperGraph(node.Children [0], g, out lv);
                BuildHyperGraph(node.Children [1], g, out rv);

                int pt = tagset.GetID(node.Tag);
                HyperVertex pv = new HyperVertex(false, pt, rules.GetSubTagCount(pt));

                HyperEdge pe = new HyperEdge(pv, lv, rv,
                    rules.GetRuleScores(pv.tag, lv.tag, rv.tag),
                    rules.GetPosteriorCounts(pv.tag, lv.tag, rv.tag));

                g.Es.Add(pe);
                g.Vs.Add(pv);
                v = pv;
                return;
            } else
            {
                throw new Exception("tree node can only have at most 2 children");
            }
        }
Exemple #14
0
        private void RemoveIdentityUnaryRule(PhrasalNode node)
        {
            if (node.Children.Count == 0)
            {
                return;
            }

            if (node.Children.Count == 1 && node.Children[0].Tag == node.Tag)
            {
                if (node.Children[0].Children.Count == 0)
                {
                    return;
                }

                var chd = node.Children[0];

                node.Children.Clear();

                foreach (var gc in chd.Children)
                {
                    gc.Parent = node;
                    node.Children.Add(gc);
                }

                RemoveUnaryRule(node);
            }
            else
            {
                foreach (var chd in node.Children)
                {
                    RemoveUnaryRule(chd);
                }
            }
        }
Exemple #15
0
        private static void LeftBranchBinarization(PhrasalNode node)
        {
            if (node.Children.Count > 2)
            {
                var fragNode = new PhrasalNode();

                fragNode.Parent = node;

                if (node.Tag.EndsWith("/"))
                {
                    fragNode.Tag = node.Tag;
                }
                else
                {
                    fragNode.Tag = node.Tag + "/";
                }

                for (int i = 0; i < node.Children.Count - 1; ++i)
                {
                    node.Children[i].Parent = fragNode;
                    fragNode.Children.Add(node.Children[i]);
                }

                var rchd = node.Children[node.Children.Count - 1];

                node.Children.Clear();

                node.Children.Add(fragNode);

                node.Children.Add(rchd);
            }

            foreach (var chd in node.Children)
            {
                LeftBranchBinarization(chd);
            }
        }
Exemple #16
0
        static PhraseBox GetPhraseBoxes(List<PhraseBox> blist, PhraseBox parentNode, PhrasalNode node, ref int hLvl, int vLvl)
        {
            if (node.Children.Count == 0)
            {
                var ptnode = new PhraseBox
                {
                    horizontalLvl = hLvl,
                    verticalLvl = vLvl,
                    parent = parentNode,
                    description = node.Tag
                };

                var tnode = new PhraseBox
                {
                    horizontalLvl = hLvl,
                    verticalLvl = vLvl + 1,
                    parent = ptnode,
                    description = node.Lex
                };

                blist.Add(ptnode);
                blist.Add(tnode);

                return ptnode;
            }
            else if (node.Children.Count == 1)
            {
                var thisNode = new PhraseBox();

                var chdBox = GetPhraseBoxes(blist, thisNode, node.Children[0], ref hLvl, vLvl + 1);

                thisNode.verticalLvl = vLvl;
                thisNode.horizontalLvl = chdBox.horizontalLvl;
                thisNode.description = node.Tag;
                thisNode.parent = parentNode;

                blist.Add(thisNode);

                return thisNode;
            }
            else if (node.Children.Count % 2 == 0)
            {
                var thisNode = new PhraseBox();

                // left nodes

                for (int i = 0; i < node.Children.Count / 2; ++i)
                {
                    GetPhraseBoxes(blist, thisNode, node.Children[i], ref hLvl, vLvl + 1);
                    hLvl += 1;
                }

                thisNode.verticalLvl = vLvl;
                thisNode.horizontalLvl = hLvl;
                thisNode.description = node.Tag;
                thisNode.parent = parentNode;

                hLvl += 1;

                for (int i = node.Children.Count / 2; i < node.Children.Count; ++i)
                {
                    GetPhraseBoxes(blist, thisNode, node.Children[i], ref hLvl, vLvl + 1);
                    hLvl += 1;
                }

                blist.Add(thisNode);

                return thisNode;
            }
            else
            {
                var thisNode = new PhraseBox();

                // left nodes

                for (int i = 0; i < node.Children.Count / 2; ++i)
                {
                    GetPhraseBoxes(blist, thisNode, node.Children[i], ref hLvl, vLvl + 1);
                    hLvl += 1;
                }

                var chdBox = GetPhraseBoxes(blist, thisNode, node.Children[node.Children.Count / 2], ref hLvl, vLvl + 1);

                thisNode.verticalLvl = vLvl;
                thisNode.horizontalLvl = chdBox.horizontalLvl;
                thisNode.description = node.Tag;
                thisNode.parent = parentNode;

                hLvl += 1;

                for (int i = node.Children.Count / 2 + 1; i < node.Children.Count; ++i)
                {
                    GetPhraseBoxes(blist, thisNode, node.Children[i], ref hLvl, vLvl + 1);
                    hLvl += 1;
                }

                blist.Add(thisNode);

                return thisNode;
            }
        }
Exemple #17
0
        private static void RecoverFromLBCNF(PhrasalNode node)
        {
            if (node.Children.Count == 0)
            {
                return;
            }

            if (node.Children.Count > 1)
            {
                var firstchd = node.Children[0];

                if (firstchd.Tag.IndexOf("/") >= 0)
                {
                    var chdlist = new List<PhrasalNode>();

                    foreach (var chd in node.Children[0].Children)
                    {
                        chd.Parent = node;
                        chdlist.Add(chd);
                    }

                    for (int i = 1; i < node.Children.Count; ++i)
                    {
                        chdlist.Add(node.Children[i]);
                    }

                    node.Children = chdlist;

                    RecoverFromLBCNF(node);

                    return;
                }
            }

            foreach (var chd in node.Children)
            {
                RecoverFromLBCNF(chd);
            }
        }
        private PhrasalNode ExtractViterbiParse(HyperVertex v, int subtag, TagSet tagSet)
        {
            if (v == null || v.TYPE == VTYPE.TERMINAL)
            {
                return null;
            }

            PhrasalNode node = new PhrasalNode();
            node.Tag = tagSet.GetTagString(v.tag);

            var bestEdge = v.traces[subtag].edge;

            if (bestEdge == null)
            {
                return node;
            }

            switch (bestEdge.TYPE)
            {
                case ETYPE.BINARY:
                    var l = ExtractViterbiParse(bestEdge.from0, v.traces[subtag].subtag0, tagSet);
                    var r = ExtractViterbiParse(bestEdge.from1, v.traces[subtag].subtag1, tagSet);
                    node.Children.Add(l);
                    node.Children.Add(r);
                    l.Parent = node;
                    r.Parent = node;
                    break;
                case ETYPE.UNARY:
                    var c = ExtractViterbiParse(bestEdge.from0, v.traces[subtag].subtag0, tagSet);
                    node.Children.Add(c);
                    c.Parent = node;
                    break;
                case ETYPE.TERMINAL:
                    break;
                case ETYPE.DUMMY:
                    node = ExtractViterbiParse(bestEdge.from0, v.traces[subtag].subtag0, tagSet);
                    break;
                default:
                    throw new Exception("unknown edge type!");
            }
            return node;
        }
Exemple #19
0
 private static void RemoveFunctionTags(PhrasalNode node)
 {
     if (node.Tag != null && node.Tag != "-NONE-")
     {
         string[] parts = node.Tag.Split(new string[] { "-", "/", "=" }, StringSplitOptions.RemoveEmptyEntries);
         node.Tag = parts[0];
     }
     foreach (var chd in node.Children)
     {
         RemoveFunctionTags(chd);
     }
 }
Exemple #20
0
        private static List<StringBuilder> BuildTextTree(PhrasalNode node)
        {
            var sbs = new List<StringBuilder> ();
            if (node.Children.Count == 0) {
                var sb = new StringBuilder ();
                sb.Append (node.Lex);
                sb.Append (' ');
                sb.Append (node.Tag);
                sbs.Add (sb);
            } else {
                var chdSbs = new List<List<StringBuilder>> ();

                foreach (var chd in node.Children) {
                    var chdsb = BuildTextTree (chd);
                    chdSbs.Add (chdsb);
                }

                int maxLen = 0;

                foreach (var csbs in chdSbs) {
                    foreach (var sb in csbs) {
                        maxLen = Math.Max (maxLen, sb.Length);
                    }
                }

                foreach (var csbs in chdSbs) {
                    foreach (var sb in csbs) {
                        char filler = sb [sb.Length - 1] == ' ' ? ' ' : '-';
                        while (sb.Length < maxLen) {
                            sb.Append (filler);
                        }
                    }
                }

                for (int i = 0; i < chdSbs.Count / 2; ++i) {
                    var csbs = chdSbs [i];
                    bool flag = false;
                    foreach (var sb in csbs) {
                        if (flag) {
                            sb.Append ("  |");
                        } else {
                            if (sb [sb.Length - 1] != ' ') {
                                sb.Append ("--\\");
                                flag = true;
                            } else {
                                if (i == 0) {
                                    sb.Append ("   ");
                                } else {
                                    sb.Append ("  |");
                                }
                            }
                        }

                        sbs.Add (sb);
                    }
                }

                if (chdSbs.Count % 2 == 0) {
                    var xsb = new StringBuilder ();

                    for (int i = 0; i < maxLen; ++i) {
                        xsb.Append (' ');
                    }

                    xsb.Append ("  -");
                    sbs.Add (xsb);
                } else {
                    var xsbs = chdSbs [chdSbs.Count / 2];
                    bool flag = chdSbs.Count > 1;
                    foreach (var sb in xsbs) {
                        if (sb [sb.Length - 1] != ' ') {
                            sb.Append ("---");
                        } else {
                            if (flag) {
                                sb.Append ("  |");
                            } else {
                                sb.Append ("   ");
                            }
                        }

                        sbs.Add (sb);
                    }
                }

                for (int i = chdSbs.Count / 2 + chdSbs.Count % 2; i < chdSbs.Count; ++i) {
                    var csbs = chdSbs [i];
                    bool flag = false;
                    foreach (var sb in csbs) {
                        if (flag) {
                            if (i == chdSbs.Count - 1) {
                                sb.Append ("   ");
                            } else {
                                sb.Append ("  |");
                            }
                        } else {
                            if (sb [sb.Length - 1] != ' ') {
                                sb.Append ("--/");
                                flag = true;
                            } else {
                                sb.Append ("  |");
                            }
                        }

                        sbs.Add (sb);
                    }
                }

                string xtag = node.Tag;
                int xlen = xtag.Length;

                foreach (var sb in sbs) {
                    if (sb [sb.Length - 1] == '-') {
                        sb.Append (xtag);
                    } else {
                        for (int i = 0; i < xlen; ++i) {
                            sb.Append (' ');
                        }
                    }
                }
            }

            return sbs;
        }
        public static void XBarBinarize(PhrasalNode node, bool isLeft)
        {
            if (node.Children.Count > 2)
            {
                var fragNode = new PhrasalNode();

                fragNode.Parent = node;

                if (isLeft)
                {
                    if (node.Tag.EndsWith("/"))
                    {
                        fragNode.Tag = node.Tag;
                    }
                    else
                    {
                        fragNode.Tag = node.Tag + "/";
                    }
                    for (int i = 0; i < node.Children.Count - 1; ++i)
                    {
                        node.Children[i].Parent = fragNode;
                        fragNode.Children.Add(node.Children[i]);
                    }
                    var rchd = node.Children[node.Children.Count - 1];
                    node.Children.Clear();
                    node.Children.Add(fragNode);
                    node.Children.Add(rchd);
                }
                else
                {
                    if (node.Tag.EndsWith("\\"))
                    {
                        fragNode.Tag = node.Tag;
                    }
                    else
                    {
                        fragNode.Tag = node.Tag + "\\";
                    }
                    for (int i = 1; i < node.Children.Count; ++i)
                    {
                        node.Children[i].Parent = fragNode;
                        fragNode.Children.Add(node.Children[i]);
                    }
                    var lchd = node.Children[0];
                    node.Children.Clear();
                    node.Children.Add(lchd);
                    node.Children.Add(fragNode);
                }
            }

            foreach (var chd in node.Children)
            {
                XBarBinarize(chd, isLeft);
            }
        }