Exemple #1
0
 internal virtual Tree BinarizeLocalTree(Tree t, int headNum, TaggedWord head)
 {
     //System.out.println("Working on: "+headNum+" -- "+t.label());
     if (markovFactor)
     {
         string topCat   = t.Label().Value();
         ILabel newLabel = new CategoryWordTag(topCat, head.Word(), head.Tag());
         t.SetLabel(newLabel);
         Tree t2;
         if (insideFactor)
         {
             t2 = MarkovInsideBinarizeLocalTreeNew(t, headNum, 0, t.NumChildren() - 1, true);
         }
         else
         {
             //          t2 = markovInsideBinarizeLocalTree(t, head, headNum, topCat, false);
             t2 = MarkovOutsideBinarizeLocalTree(t, head, headNum, topCat, new LinkedList <Tree>(), false);
         }
         return(t2);
     }
     if (insideFactor)
     {
         return(InsideBinarizeLocalTree(t, headNum, head, 0, 0));
     }
     return(OutsideBinarizeLocalTree(t, t.Label().Value(), t.Label().Value(), headNum, head, 0, string.Empty, 0, string.Empty));
 }
Exemple #2
0
        /// <summary>Make a new label with this <code>String</code> as the "name".</summary>
        /// <param name="labelStr">The string to use as a label</param>
        /// <returns>The newly created Label</returns>
        public virtual ILabel NewLabelFromString(string labelStr)
        {
            CategoryWordTag cwt = new CategoryWordTag();

            cwt.SetFromString(labelStr);
            return(cwt);
        }
        /// <summary>Find the best (partial) parse within the parameter constraints.</summary>
        /// <param name="start">Sentence index of start of span (fenceposts, from 0 up)</param>
        /// <param name="end">Sentence index of end of span (right side fencepost)</param>
        /// <param name="hWord">Sentence index of head word (left side fencepost)</param>
        /// <param name="hTag">Tag assigned to hWord</param>
        /// <returns>The best parse tree within the parameter constraints</returns>
        private Tree ExtractBestParse(int start, int end, int hWord, int hTag)
        {
            string headWordStr = wordIndex.Get(words[hWord]);
            string headTagStr  = tagIndex.Get(hTag);
            ILabel headLabel   = new CategoryWordTag(headWordStr, headWordStr, headTagStr);
            int    numTags     = tagIndex.Size();

            // deal with span 1
            if (end - start == 1)
            {
                Tree leaf = tf.NewLeaf(new Word(headWordStr));
                return(tf.NewTreeNode(headLabel, Java.Util.Collections.SingletonList(leaf)));
            }
            // find backtrace
            IList <Tree> children  = new List <Tree>();
            double       bestScore = IScore(start, end, hWord, hTag);

            for (int split = start + 1; split < end; split++)
            {
                int binD = binDistance[hWord][split];
                if (hWord < split)
                {
                    for (int aWord = split; aWord < end; aWord++)
                    {
                        for (int aTag = 0; aTag < numTags; aTag++)
                        {
                            if (Matches(IScore(start, split, hWord, hTag) + IScore(split, end, aWord, aTag) + headScore[binD][hWord][dg.TagBin(hTag)][aWord][dg.TagBin(aTag)] + headStop[aWord][dg.TagBin(aTag)][split] + headStop[aWord][dg.TagBin(aTag)][end], bestScore))
                            {
                                // build it
                                children.Add(ExtractBestParse(start, split, hWord, hTag));
                                children.Add(ExtractBestParse(split, end, aWord, aTag));
                                return(tf.NewTreeNode(headLabel, children));
                            }
                        }
                    }
                }
                else
                {
                    for (int aWord = start; aWord < split; aWord++)
                    {
                        for (int aTag = 0; aTag < numTags; aTag++)
                        {
                            if (Matches(IScore(start, split, aWord, aTag) + IScore(split, end, hWord, hTag) + headScore[binD][hWord][dg.TagBin(hTag)][aWord][dg.TagBin(aTag)] + headStop[aWord][dg.TagBin(aTag)][start] + headStop[aWord][dg.TagBin(aTag)][split], bestScore))
                            {
                                children.Add(ExtractBestParse(start, split, aWord, aTag));
                                children.Add(ExtractBestParse(split, end, hWord, hTag));
                                // build it
                                return(tf.NewTreeNode(headLabel, children));
                            }
                        }
                    }
                }
            }
            log.Info("Problem in ExhaustiveDependencyParser::extractBestParse");
            return(null);
        }
Exemple #4
0
        private Tree OutsideBinarizeLocalTree(Tree t, string labelStr, string finalCat, int headNum, TaggedWord head, int leftProcessed, string leftStr, int rightProcessed, string rightStr)
        {
            IList <Tree> newChildren = new List <Tree>(2);
            ILabel       label       = new CategoryWordTag(labelStr, head.Word(), head.Tag());

            // check if there are <=2 children already
            if (t.NumChildren() - leftProcessed - rightProcessed <= 2)
            {
                // done, return
                newChildren.Add(t.GetChild(leftProcessed));
                if (t.NumChildren() - leftProcessed - rightProcessed == 2)
                {
                    newChildren.Add(t.GetChild(leftProcessed + 1));
                }
                return(tf.NewTreeNode(label, newChildren));
            }
            if (headNum > leftProcessed)
            {
                // eat a left word
                Tree   leftChild    = t.GetChild(leftProcessed);
                string childLeftStr = leftStr + ' ' + leftChild.Label().Value();
                string childLabelStr;
                if (simpleLabels)
                {
                    childLabelStr = '@' + finalCat;
                }
                else
                {
                    childLabelStr = '@' + finalCat + " :" + childLeftStr + " ..." + rightStr;
                }
                Tree rightChild = OutsideBinarizeLocalTree(t, childLabelStr, finalCat, headNum, head, leftProcessed + 1, childLeftStr, rightProcessed, rightStr);
                newChildren.Add(leftChild);
                newChildren.Add(rightChild);
                return(tf.NewTreeNode(label, newChildren));
            }
            else
            {
                // eat a right word
                Tree   rightChild    = t.GetChild(t.NumChildren() - rightProcessed - 1);
                string childRightStr = ' ' + rightChild.Label().Value() + rightStr;
                string childLabelStr;
                if (simpleLabels)
                {
                    childLabelStr = '@' + finalCat;
                }
                else
                {
                    childLabelStr = '@' + finalCat + " :" + leftStr + " ..." + childRightStr;
                }
                Tree leftChild = OutsideBinarizeLocalTree(t, childLabelStr, finalCat, headNum, head, leftProcessed, leftStr, rightProcessed + 1, childRightStr);
                newChildren.Add(leftChild);
                newChildren.Add(rightChild);
                return(tf.NewTreeNode(label, newChildren));
            }
        }
Exemple #5
0
        public virtual void TestCopy()
        {
            CategoryWordTag tag = new CategoryWordTag("A", "B", "C");

            NUnit.Framework.Assert.AreEqual("A", tag.Category());
            NUnit.Framework.Assert.AreEqual("B", tag.Word());
            NUnit.Framework.Assert.AreEqual("C", tag.Tag());
            CategoryWordTag tag2 = new CategoryWordTag(tag);

            NUnit.Framework.Assert.AreEqual("A", tag2.Category());
            NUnit.Framework.Assert.AreEqual("B", tag2.Word());
            NUnit.Framework.Assert.AreEqual("C", tag2.Tag());
        }
Exemple #6
0
 private Tree TransformTreeHelper(Tree t)
 {
     if (t != null)
     {
         string cat = t.Label().Value();
         if (t.IsLeaf())
         {
             ILabel label = new Word(cat);
             //new CategoryWordTag(cat,cat,"");
             t.SetLabel(label);
         }
         else
         {
             Tree[] kids = t.Children();
             foreach (Tree child in kids)
             {
                 TransformTreeHelper(child);
             }
             // recursive call
             Tree   headChild = hf.DetermineHead(t);
             string tag;
             string word;
             if (headChild == null)
             {
                 log.Error("null head for tree\n" + t.ToString());
                 word = null;
                 tag  = null;
             }
             else
             {
                 if (headChild.IsLeaf())
                 {
                     tag  = cat;
                     word = headChild.Label().Value();
                 }
                 else
                 {
                     CategoryWordTag headLabel = (CategoryWordTag)headChild.Label();
                     word = headLabel.Word();
                     tag  = headLabel.Tag();
                 }
             }
             ILabel label = new CategoryWordTag(cat, word, tag);
             t.SetLabel(label);
         }
     }
     return(t);
 }
        public virtual void TestCopy()
        {
            CategoryWordTag tag = new CategoryWordTag("A", "B", "C");

            NUnit.Framework.Assert.AreEqual("A", tag.Category());
            NUnit.Framework.Assert.AreEqual("B", tag.Word());
            NUnit.Framework.Assert.AreEqual("C", tag.Tag());
            CategoryWordTagFactory lf = new CategoryWordTagFactory();
            ILabel label = lf.NewLabel(tag);

            NUnit.Framework.Assert.IsTrue(label is CategoryWordTag);
            CategoryWordTag tag2 = (CategoryWordTag)label;

            NUnit.Framework.Assert.AreEqual("A", tag2.Category());
            NUnit.Framework.Assert.AreEqual("B", tag2.Word());
            NUnit.Framework.Assert.AreEqual("C", tag2.Tag());
        }
        protected internal virtual Tree ExtractParse(Edge edge)
        {
            string head  = wordIndex.Get(words[edge.head]);
            string tag   = tagIndex.Get(edge.tag);
            string state = stateIndex.Get(edge.state);
            ILabel label = new CategoryWordTag(state, head, tag);

            if (edge.backEdge == null && edge.backHook == null)
            {
                // leaf, but needs word terminal
                Tree leaf;
                if (originalLabels[edge.head] != null)
                {
                    leaf = tf.NewLeaf(originalLabels[edge.head]);
                }
                else
                {
                    leaf = tf.NewLeaf(head);
                }
                IList <Tree> childList = Java.Util.Collections.SingletonList(leaf);
                return(tf.NewTreeNode(label, childList));
            }
            if (edge.backHook == null)
            {
                // unary
                IList <Tree> childList = Java.Util.Collections.SingletonList(ExtractParse(edge.backEdge));
                return(tf.NewTreeNode(label, childList));
            }
            // binary
            IList <Tree> children = new List <Tree>();

            if (edge.backHook.IsPreHook())
            {
                children.Add(ExtractParse(edge.backEdge));
                children.Add(ExtractParse(edge.backHook.backEdge));
            }
            else
            {
                children.Add(ExtractParse(edge.backHook.backEdge));
                children.Add(ExtractParse(edge.backEdge));
            }
            return(tf.NewTreeNode(label, children));
        }
Exemple #9
0
            public virtual Tree TransformTree(Tree tree)
            {
                ILabel lab = tree.Label();

                if (tree.IsLeaf())
                {
                    Tree leaf = this.tf.NewLeaf(lab);
                    leaf.SetScore(tree.Score());
                    return(leaf);
                }
                string s = lab.Value();

                s = this._enclosing.TreebankLanguagePack().BasicCategory(s);
                int          numKids  = tree.NumChildren();
                IList <Tree> children = new List <Tree>(numKids);

                for (int cNum = 0; cNum < numKids; cNum++)
                {
                    Tree child    = tree.GetChild(cNum);
                    Tree newChild = this.TransformTree(child);
                    // cdm 2007: for just subcategory stripping, null shouldn't happen
                    // if (newChild != null) {
                    children.Add(newChild);
                }
                // }
                // if (children.isEmpty()) {
                //   return null;
                // }
                CategoryWordTag newLabel = new CategoryWordTag(lab);

                newLabel.SetCategory(s);
                if (lab is IHasTag)
                {
                    string tag = ((IHasTag)lab).Tag();
                    tag = this._enclosing.TreebankLanguagePack().BasicCategory(tag);
                    newLabel.SetTag(tag);
                }
                Tree node = this.tf.NewTreeNode(newLabel, children);

                node.SetScore(tree.Score());
                return(node);
            }
Exemple #10
0
            // end class SubcategoryStripper
            public virtual Tree TransformTree(Tree tree)
            {
                ILabel lab = tree.Label();

                if (tree.IsLeaf())
                {
                    Tree leaf = this.tf.NewLeaf(lab);
                    leaf.SetScore(tree.Score());
                    return(leaf);
                }
                string s = lab.Value();

                s = this._enclosing.TreebankLanguagePack().BasicCategory(s);
                s = this._enclosing.TreebankLanguagePack().StripGF(s);
                int          numKids  = tree.NumChildren();
                IList <Tree> children = new List <Tree>(numKids);

                for (int cNum = 0; cNum < numKids; cNum++)
                {
                    Tree child    = tree.GetChild(cNum);
                    Tree newChild = this.TransformTree(child);
                    children.Add(newChild);
                }
                CategoryWordTag newLabel = new CategoryWordTag(lab);

                newLabel.SetCategory(s);
                if (lab is IHasTag)
                {
                    string tag = ((IHasTag)lab).Tag();
                    tag = this._enclosing.TreebankLanguagePack().BasicCategory(tag);
                    tag = this._enclosing.TreebankLanguagePack().StripGF(tag);
                    newLabel.SetTag(tag);
                }
                Tree node = this.tf.NewTreeNode(newLabel, children);

                node.SetScore(tree.Score());
                return(node);
            }
Exemple #11
0
        /// <summary>Binarizes the tree according to options set up in the constructor.</summary>
        /// <remarks>
        /// Binarizes the tree according to options set up in the constructor.
        /// Does the whole tree by calling itself recursively.
        /// </remarks>
        /// <param name="t">
        /// A tree to be binarized. The non-leaf nodes must already have
        /// CategoryWordTag labels, with heads percolated.
        /// </param>
        /// <returns>A binary tree.</returns>
        public virtual Tree TransformTree(Tree t)
        {
            // handle null
            if (t == null)
            {
                return(null);
            }
            string cat = t.Label().Value();

            // handle words
            if (t.IsLeaf())
            {
                ILabel label = new Word(cat);
                //new CategoryWordTag(cat,cat,"");
                return(tf.NewLeaf(label));
            }
            // handle tags
            if (t.IsPreTerminal())
            {
                Tree   childResult = TransformTree(t.GetChild(0));
                string word        = childResult.Value();
                // would be nicer if Word/CWT ??
                IList <Tree> newChildren = new List <Tree>(1);
                newChildren.Add(childResult);
                return(tf.NewTreeNode(new CategoryWordTag(cat, word, cat), newChildren));
            }
            // handle categories
            Tree headChild = hf.DetermineHead(t);

            /*
             * System.out.println("### finding head for:");
             * t.pennPrint();
             * System.out.println("### its head is:");
             * headChild.pennPrint();
             */
            if (headChild == null && !t.Label().Value().StartsWith(tlp.StartSymbol()))
            {
                log.Info("### No head found for:");
                t.PennPrint();
            }
            int headNum = -1;

            Tree[]       kids          = t.Children();
            IList <Tree> newChildren_1 = new List <Tree>(kids.Length);

            for (int childNum = 0; childNum < kids.Length; childNum++)
            {
                Tree child       = kids[childNum];
                Tree childResult = TransformTree(child);
                // recursive call
                if (child == headChild)
                {
                    headNum = childNum;
                }
                newChildren_1.Add(childResult);
            }
            Tree result;

            // XXXXX UPTO HERE!!!  ALMOST DONE!!!
            if (t.Label().Value().StartsWith(tlp.StartSymbol()))
            {
                // handle the ROOT tree properly

                /*
                 * //CategoryWordTag label = (CategoryWordTag) t.label();
                 * // binarize without the last kid and then add it back to the top tree
                 * Tree lastKid = (Tree)newChildren.remove(newChildren.size()-1);
                 * Tree tempTree = tf.newTreeNode(label, newChildren);
                 * tempTree = binarizeLocalTree(tempTree, headNum, result.head);
                 * newChildren = tempTree.getChildrenAsList();
                 * newChildren.add(lastKid); // add it back
                 */
                result = tf.NewTreeNode(t.Label(), newChildren_1);
            }
            else
            {
                // label shouldn't have changed
                //      CategoryWordTag headLabel = (CategoryWordTag) headChild.label();
                string word  = ((IHasWord)headChild.Label()).Word();
                string tag   = ((IHasTag)headChild.Label()).Tag();
                ILabel label = new CategoryWordTag(cat, word, tag);
                result = tf.NewTreeNode(label, newChildren_1);
                // cdm Mar 2005: invent a head so I don't have to rewrite all this
                // code, but with the removal of TreeHeadPair, some of the rest of
                // this should probably be rewritten too to not use this head variable
                TaggedWord head = new TaggedWord(word, tag);
                result = BinarizeLocalTree(result, headNum, head);
            }
            return(result);
        }
Exemple #12
0
        private Tree MarkovOutsideBinarizeLocalTree(Tree t, TaggedWord head, int headLoc, string topCat, LinkedList <Tree> ll, bool doneLeft)
        {
            string       word        = head.Word();
            string       tag         = head.Tag();
            IList <Tree> newChildren = new List <Tree>(2);

            // call with t, headNum, head, topCat, false
            if (headLoc == 0)
            {
                if (!doneLeft)
                {
                    // insert a unary to separate the sides
                    if (tlp.IsStartSymbol(topCat))
                    {
                        return(MarkovOutsideBinarizeLocalTree(t, head, headLoc, topCat, new LinkedList <Tree>(), true));
                    }
                    string subLabelStr;
                    if (simpleLabels)
                    {
                        subLabelStr = '@' + topCat;
                    }
                    else
                    {
                        string headStr = t.GetChild(headLoc).Label().Value();
                        subLabelStr = '@' + topCat + ": " + headStr + " ]";
                    }
                    ILabel subLabel = new CategoryWordTag(subLabelStr, word, tag);
                    Tree   subTree  = tf.NewTreeNode(subLabel, t.GetChildrenAsList());
                    newChildren.Add(MarkovOutsideBinarizeLocalTree(subTree, head, headLoc, topCat, new LinkedList <Tree>(), true));
                    return(tf.NewTreeNode(t.Label(), newChildren));
                }
                int len = t.NumChildren();
                // len = 1
                if (len == 1)
                {
                    return(tf.NewTreeNode(t.Label(), Java.Util.Collections.SingletonList(t.GetChild(0))));
                }
                ll.AddFirst(t.GetChild(len - 1));
                if (ll.Count > markovOrder)
                {
                    ll.RemoveLast();
                }
                // generate a right
                string subLabelStr_1;
                if (simpleLabels)
                {
                    subLabelStr_1 = '@' + topCat;
                }
                else
                {
                    string headStr  = t.GetChild(headLoc).Label().Value();
                    string rightStr = (len > markovOrder - 1 ? "... " : string.Empty) + Join(ll);
                    subLabelStr_1 = '@' + topCat + ": " + headStr + ' ' + rightStr;
                }
                ILabel subLabel_1 = new CategoryWordTag(subLabelStr_1, word, tag);
                Tree   subTree_1  = tf.NewTreeNode(subLabel_1, t.GetChildrenAsList().SubList(0, len - 1));
                newChildren.Add(MarkovOutsideBinarizeLocalTree(subTree_1, head, headLoc, topCat, ll, true));
                newChildren.Add(t.GetChild(len - 1));
                return(tf.NewTreeNode(t.Label(), newChildren));
            }
            if (headLoc > 0)
            {
                ll.AddLast(t.GetChild(0));
                if (ll.Count > markovOrder)
                {
                    ll.RemoveFirst();
                }
                // generate a left
                string subLabelStr;
                if (simpleLabels)
                {
                    subLabelStr = '@' + topCat;
                }
                else
                {
                    string headStr = t.GetChild(headLoc).Label().Value();
                    string leftStr = Join(ll) + (headLoc > markovOrder - 1 ? " ..." : string.Empty);
                    subLabelStr = '@' + topCat + ": " + leftStr + ' ' + headStr + " ]";
                }
                ILabel subLabel = new CategoryWordTag(subLabelStr, word, tag);
                Tree   subTree  = tf.NewTreeNode(subLabel, t.GetChildrenAsList().SubList(1, t.NumChildren()));
                newChildren.Add(t.GetChild(0));
                newChildren.Add(MarkovOutsideBinarizeLocalTree(subTree, head, headLoc - 1, topCat, ll, false));
                return(tf.NewTreeNode(t.Label(), newChildren));
            }
            return(t);
        }
 public virtual ILabel NewLabelFromString(string labelStr)
 {
     CategoryWordTag cwt = new CategoryWordTag();
     cwt.SetFromString(labelStr);
     return cwt;
 }