Пример #1
0
        private Tree MarkovOutsideBinarizeLocalTree(Tree t, TaggedWord head, int headLoc, string topCat, LinkedList <Tree> ll, bool doneLeft)
        {
            string       word        = head.Word();
            string       tag         = head.Tag();
            IList <Tree> newChildren = new List <Tree>(2);

            // call with t, headNum, head, topCat, false
            if (headLoc == 0)
            {
                if (!doneLeft)
                {
                    // insert a unary to separate the sides
                    if (tlp.IsStartSymbol(topCat))
                    {
                        return(MarkovOutsideBinarizeLocalTree(t, head, headLoc, topCat, new LinkedList <Tree>(), true));
                    }
                    string subLabelStr;
                    if (simpleLabels)
                    {
                        subLabelStr = '@' + topCat;
                    }
                    else
                    {
                        string headStr = t.GetChild(headLoc).Label().Value();
                        subLabelStr = '@' + topCat + ": " + headStr + " ]";
                    }
                    ILabel subLabel = new CategoryWordTag(subLabelStr, word, tag);
                    Tree   subTree  = tf.NewTreeNode(subLabel, t.GetChildrenAsList());
                    newChildren.Add(MarkovOutsideBinarizeLocalTree(subTree, head, headLoc, topCat, new LinkedList <Tree>(), true));
                    return(tf.NewTreeNode(t.Label(), newChildren));
                }
                int len = t.NumChildren();
                // len = 1
                if (len == 1)
                {
                    return(tf.NewTreeNode(t.Label(), Java.Util.Collections.SingletonList(t.GetChild(0))));
                }
                ll.AddFirst(t.GetChild(len - 1));
                if (ll.Count > markovOrder)
                {
                    ll.RemoveLast();
                }
                // generate a right
                string subLabelStr_1;
                if (simpleLabels)
                {
                    subLabelStr_1 = '@' + topCat;
                }
                else
                {
                    string headStr  = t.GetChild(headLoc).Label().Value();
                    string rightStr = (len > markovOrder - 1 ? "... " : string.Empty) + Join(ll);
                    subLabelStr_1 = '@' + topCat + ": " + headStr + ' ' + rightStr;
                }
                ILabel subLabel_1 = new CategoryWordTag(subLabelStr_1, word, tag);
                Tree   subTree_1  = tf.NewTreeNode(subLabel_1, t.GetChildrenAsList().SubList(0, len - 1));
                newChildren.Add(MarkovOutsideBinarizeLocalTree(subTree_1, head, headLoc, topCat, ll, true));
                newChildren.Add(t.GetChild(len - 1));
                return(tf.NewTreeNode(t.Label(), newChildren));
            }
            if (headLoc > 0)
            {
                ll.AddLast(t.GetChild(0));
                if (ll.Count > markovOrder)
                {
                    ll.RemoveFirst();
                }
                // generate a left
                string subLabelStr;
                if (simpleLabels)
                {
                    subLabelStr = '@' + topCat;
                }
                else
                {
                    string headStr = t.GetChild(headLoc).Label().Value();
                    string leftStr = Join(ll) + (headLoc > markovOrder - 1 ? " ..." : string.Empty);
                    subLabelStr = '@' + topCat + ": " + leftStr + ' ' + headStr + " ]";
                }
                ILabel subLabel = new CategoryWordTag(subLabelStr, word, tag);
                Tree   subTree  = tf.NewTreeNode(subLabel, t.GetChildrenAsList().SubList(1, t.NumChildren()));
                newChildren.Add(t.GetChild(0));
                newChildren.Add(MarkovOutsideBinarizeLocalTree(subTree, head, headLoc - 1, topCat, ll, false));
                return(tf.NewTreeNode(t.Label(), newChildren));
            }
            return(t);
        }
        public virtual Tree TransformTree(Tree tree)
        {
            if (tree == null)
            {
                return(null);
            }
            ITreeFactory tf = tree.TreeFactory();
            string       s  = tree.Value();

            if (tlp.IsStartSymbol(s))
            {
                return(TransformTree(tree.FirstChild()));
            }
            if (tree.IsLeaf())
            {
                return(tf.NewLeaf(tree.Label()));
            }
            s = tlp.BasicCategory(s);
            if (((whOption & 1) != 0) && s.StartsWith("WH"))
            {
                s = Sharpen.Runtime.Substring(s, 2);
            }
            if ((whOption & 2) != 0)
            {
                s = s.ReplaceAll("^WP", "PRP");
                // does both WP and WP$ !!
                s = s.ReplaceAll("^WDT", "DT");
                s = s.ReplaceAll("^WRB", "RB");
            }
            if (((whOption & 4) != 0) && s.StartsWith("WH"))
            {
                s = Sharpen.Runtime.Substring(s, 2);
            }
            // wsg2010: Might need a better way to deal with tag ambiguity. This still doesn't handle the
            // case where the GOLD tree does not label a punctuation mark as such (common in French), and
            // the guess tree does.
            if (deletePunct && tree.IsPreTerminal() && (tlp.IsEvalBIgnoredPunctuationTag(s) || tlp.IsPunctuationWord(tree.FirstChild().Value())))
            {
                return(null);
            }
            // remove the extra NPs inserted in the collinsBaseNP option
            if (fixCollinsBaseNP && s.Equals("NP"))
            {
                Tree[] kids = tree.Children();
                if (kids.Length == 1 && tlp.BasicCategory(kids[0].Value()).Equals("NP"))
                {
                    return(TransformTree(kids[0]));
                }
            }
            // Magerman erased this distinction, and everyone else has followed like sheep...
            if (s.Equals("PRT"))
            {
                s = "ADVP";
            }
            IList <Tree> children = new List <Tree>();

            for (int cNum = 0; cNum < numKids; cNum++)
            {
                Tree child    = tree.Children()[cNum];
                Tree newChild = TransformTree(child);
                if (newChild != null)
                {
                    children.Add(newChild);
                }
            }
            if (children.IsEmpty())
            {
                return(null);
            }
            Tree node = tf.NewTreeNode(tree.Label(), children);

            node.SetValue(s);
            return(node);
        }