Esempio n. 1
0
 /// <summary>
 /// Overwrite the postOperationFix method.  For "a, b and c" or similar: we want "a" to be the head.
 /// </summary>
 protected override int PostOperationFix(int headIdx, Tree[] daughterTrees)
 {
     if (headIdx >= 2)
     {
         string prevLab = Tlp.BasicCategory(daughterTrees[headIdx - 1].Value());
         if (prevLab.Equals(PartsOfSpeech.CoordinatingConjunction) || prevLab.Equals(CONJP))
         {
             bool origWasInterjection = PartsOfSpeech.Interjection.Equals(Tlp.BasicCategory(daughterTrees[headIdx].Value()));
             int  newHeadIdx          = headIdx - 2;
             // newHeadIdx is now left of conjunction.  Now try going back over commas, etc. for 3+ conjuncts
             // Don't allow INTJ unless conjoined with INTJ - important in informal genres "Oh and don't forget to call!"
             while (newHeadIdx >= 0 && ShouldSkip(daughterTrees[newHeadIdx], origWasInterjection))
             {
                 newHeadIdx--;
             }
             // We're now at newHeadIdx < 0 or have found a left head
             // Now consider going back some number of punct that includes a , or : tagged thing and then find non-punct
             while (newHeadIdx >= 2)
             {
                 int nextHead = FindPreviousHead(newHeadIdx, daughterTrees, origWasInterjection);
                 if (nextHead < 0)
                 {
                     break;
                 }
                 newHeadIdx = nextHead;
             }
             if (newHeadIdx >= 0)
             {
                 headIdx = newHeadIdx;
             }
         }
     }
     return(headIdx);
 }
Esempio n. 2
0
        private int FindPreviousHead(int headIdx, Tree[] daughterTrees, bool origWasInterjection)
        {
            bool seenSeparator = false;
            int  newHeadIdx    = headIdx;

            while (newHeadIdx >= 0)
            {
                newHeadIdx = newHeadIdx - 1;
                if (newHeadIdx < 0)
                {
                    return(newHeadIdx);
                }
                string label = Tlp.BasicCategory(daughterTrees[newHeadIdx].Value());
                if (PartsOfSpeech.Comma.Equals(label) || PartsOfSpeech.ColonSemiColon.Equals(label))
                {
                    seenSeparator = true;
                }
                else if (daughterTrees[newHeadIdx].IsPreTerminal() &&
                         (Tlp.IsPunctuationTag(label) || !origWasInterjection && PartsOfSpeech.Interjection.Equals(label)) ||
                         INTJ.Equals(label) && !origWasInterjection)
                {
                    // keep looping
                }
                else
                {
                    if (!seenSeparator)
                    {
                        newHeadIdx = -1;
                    }
                    break;
                }
            }
            return(newHeadIdx);
        }
Esempio n. 3
0
        /// <summary>
        /// Checks whether the tree t is an existential constituent
        /// There are two cases:
        /// -- affirmative sentences in which "there" is a left sister of the VP
        /// -- questions in which "there" is a daughter of the SQ.
        /// </summary>
        private bool IsExistential(Tree t, Tree parent)
        {
            bool   toReturn  = false;
            string motherCat = Tlp.BasicCategory(t.Label().Value());

            // affirmative case
            if (motherCat.Equals(AbstractCollinsHeadFinder.VerbPhrase) && parent != null)
            {
                //take t and the sisters
                Tree[] kids = parent.Children();
                // iterate over the sisters before t and checks if existential
                foreach (Tree kid in kids)
                {
                    if (!kid.Value().Equals(AbstractCollinsHeadFinder.VerbPhrase))
                    {
                        List <ILabel> tags = kid.PreTerminalYield();
                        foreach (ILabel tag in tags)
                        {
                            if (tag.Value().Equals(PartsOfSpeech.ExistentialThere))
                            {
                                toReturn = true;
                            }
                        }
                    }
                    else
                    {
                        break;
                    }
                }
            }
            // question case
            else if (motherCat.StartsWith(SQ) && parent != null)
            {
                //take the daughters
                Tree[] kids = parent.Children();
                // iterate over the daughters and checks if existential
                foreach (Tree kid in kids)
                {
                    if (!PartsOfSpeech.IsVerb(kid.Value()))
                    {
                        //not necessary to look into the verb
                        List <ILabel> tags = kid.PreTerminalYield();
                        foreach (ILabel tag in tags)
                        {
                            if (tag.Value().Equals(PartsOfSpeech.ExistentialThere))
                            {
                                toReturn = true;
                            }
                        }
                    }
                }
            }
            return(toReturn);
        }
Esempio n. 4
0
 protected override int PostOperationFix(int headIdx, Tree[] daughterTrees)
 {
     if (headIdx >= 2)
     {
         string prevLab = Tlp.BasicCategory(daughterTrees[headIdx - 1].Value());
         if (prevLab.Equals(PartsOfSpeech.CoordinatingConjunction) || prevLab.Equals(AbstractCollinsHeadFinder.CONJP))
         {
             int  newHeadIdx = headIdx - 2;
             Tree t          = daughterTrees[newHeadIdx];
             while (newHeadIdx >= 0 && t.IsPreTerminal() &&
                    Tlp.IsPunctuationTag(t.Value()))
             {
                 newHeadIdx--;
             }
             if (newHeadIdx >= 0)
             {
                 headIdx = newHeadIdx;
             }
         }
     }
     return(headIdx);
 }
Esempio n. 5
0
        /// <summary>
        /// Determine which daughter of the current parse tree is the head.
        /// It assumes that the daughters already have had their heads determined.
        /// Uses special rule for VP heads
        /// </summary>
        /// <param name="t">
        /// The parse tree to examine the daughters of.
        /// This is assumed to never be a leaf
        /// </param>
        /// <returns>The parse tree that is the head</returns>
        protected override Tree DetermineNonTrivialHead(Tree t, Tree parent)
        {
            string motherCat = Tlp.BasicCategory(t.Label().Value());

            // Some conj expressions seem to make more sense with the "not" or
            // other key words as the head.  For example, "and not" means
            // something completely different than "and".  Furthermore,
            // downstream code was written assuming "not" would be the head...
            if (motherCat.Equals(CONJP))
            {
                var headOfConjpTregex = new TregexPattern[]
                {
                    TregexPattern.Compile("CONJP < (CC <: /^(?i:but|and)$/ $+ (RB=head <: /^(?i:not)$/))"),
                    TregexPattern.Compile(
                        "CONJP < (CC <: /^(?i:but)$/ [ ($+ (RB=head <: /^(?i:also|rather)$/)) | ($+ (ADVP=head <: (RB <: /^(?i:also|rather)$/))) ])"),
                    TregexPattern.Compile(
                        "CONJP < (CC <: /^(?i:and)$/ [ ($+ (RB=head <: /^(?i:yet)$/)) | ($+ (ADVP=head <: (RB <: /^(?i:yet)$/))) ])"),
                };
                foreach (TregexPattern pattern in headOfConjpTregex)
                {
                    TregexMatcher matcher = pattern.Matcher(t);
                    if (matcher.MatchesAt(t))
                    {
                        return(matcher.GetNode("head"));
                    }
                }
                // if none of the above patterns match, use the standard method
            }

            if (motherCat.Equals(SBARQ) || motherCat.Equals(SINV))
            {
                if (!makeCopulaHead)
                {
                    var headOfCopulaTregex = new TregexPattern[]
                    {
                        // Matches phrases such as "what is wrong"
                        TregexPattern.Compile("SBARQ < (WHNP $++ (/^VB/ < " + EnglishPatterns.CopularWordRegex +
                                              " $++ ADJP=head))"),

                        // matches WHNP $+ VB<copula $+ NP
                        // for example, "Who am I to judge?"
                        // !$++ ADJP matches against "Why is the dog pink?"
                        TregexPattern.Compile("SBARQ < (WHNP=head $++ (/^VB/ < " + EnglishPatterns.CopularWordRegex +
                                              " $+ NP !$++ ADJP))"),

                        // Actually somewhat limited in scope, this detects "Tuesday it is",
                        // "Such a great idea this was", etc
                        TregexPattern.Compile("SINV < (NP=head $++ (NP $++ (VP < (/^(?:VB|AUX)/ < " +
                                              EnglishPatterns.CopularWordRegex + "))))"),
                    };
                    foreach (TregexPattern pattern in headOfCopulaTregex)
                    {
                        TregexMatcher matcher = pattern.Matcher(t);
                        if (matcher.MatchesAt(t))
                        {
                            return(matcher.GetNode("head"));
                        }
                    }
                }
                // if none of the above patterns match, use the standard method
            }

            Tree[] tmpFilteredChildren = null;

            // do VPs with auxiliary as special case
            if ((motherCat.Equals(AbstractCollinsHeadFinder.VerbPhrase) || motherCat.Equals("SQ") || motherCat.Equals("SINV")))
            {
                Tree[] kids = t.Children();
                // try to find if there is an auxiliary verb
                // looks for auxiliaries
                if (HasVerbalAuxiliary(kids, verbalAuxiliaries, true) || HasPassiveProgressiveAuxiliary(kids))
                {
                    // string[] how = new string[] {Left, AbstractCollinsHeadFinder.VerbPhrase, CoordinationTransformer.Adjective, CoordinationTransformer.Noun};
                    // Including NP etc seems okay for copular sentences but is
                    // problematic for other auxiliaries, like 'he has an answer'
                    // But maybe doing ADJP is fine!
                    string[] how = { Left, AbstractCollinsHeadFinder.VerbPhrase, CoordinationTransformer.Adjective };

                    //tmpFilteredChildren = ArrayUtils.filter(kids, REMOVE_TMP_AND_ADV);
                    tmpFilteredChildren = kids.Where(k => RemoveTmpAndAdv(k)).ToArray();

                    Tree pti = TraverseLocate(tmpFilteredChildren, how, false);
                    if (pti != null)
                    {
                        return(pti);
                    }
                }

                // looks for copular verbs
                if (HasVerbalAuxiliary(kids, copulars, false) && !IsExistential(t, parent) && !IsWhQ(t, parent))
                {
                    string[] how;
                    if (motherCat.Equals("SQ"))
                    {
                        how = new string[] { Right, AbstractCollinsHeadFinder.VerbPhrase, CoordinationTransformer.Adjective, CoordinationTransformer.Noun, WHADJP, WHNP };
                    }
                    else
                    {
                        how = new string[] { Left, AbstractCollinsHeadFinder.VerbPhrase, CoordinationTransformer.Adjective, CoordinationTransformer.Noun, WHADJP, WHNP };
                    }
                    // Avoid undesirable heads by filtering them from the list of potential children
                    if (tmpFilteredChildren == null)
                    {
                        //tmpFilteredChildren = ArrayUtils.filter(kids, REMOVE_TMP_AND_ADV);
                        tmpFilteredChildren = kids.Where(k => RemoveTmpAndAdv(k)).ToArray();
                    }
                    Tree pti = TraverseLocate(tmpFilteredChildren, how, false);
                    // In SQ, only allow an NP to become head if there is another one to the left (then it's probably predicative)
                    if (motherCat.Equals(SQ) && pti != null && pti.Label() != null &&
                        pti.Label().Value().StartsWith(CoordinationTransformer.Noun))
                    {
                        bool foundAnotherNp = false;
                        foreach (Tree kid in kids)
                        {
                            if (kid == pti)
                            {
                                break;
                            }
                            else if (kid.Label() != null && kid.Label().Value().StartsWith(CoordinationTransformer.Noun))
                            {
                                foundAnotherNp = true;
                                break;
                            }
                        }
                        if (!foundAnotherNp)
                        {
                            pti = null;
                        }
                    }

                    if (pti != null)
                    {
                        return(pti);
                    }
                }
            }

            Tree hd = base.DetermineNonTrivialHead(t, parent);

            /* ----
             * // This should now be handled at the AbstractCollinsHeadFinder level, so see if we can comment this out
             * // Heuristically repair punctuation heads
             * Tree[] hdChildren = hd.children();
             * if (hdChildren != null && hdChildren.length > 0 &&
             * hdChildren[0].isLeaf()) {
             * if (tlp.isPunctuationWord(hdChildren[0].label().value())) {
             * Tree[] tChildren = t.children();
             * for (int i = tChildren.length - 1; i >= 0; i--) {
             * if (!tlp.isPunctuationWord(tChildren[i].children()[0].label().value())) {
             * hd = tChildren[i];
             * break;
             * }
             * }
             * }
             * }
             */
            return(hd);
        }