/// <summary> /// Overwrite the postOperationFix method. For "a, b and c" or similar: we want "a" to be the head. /// </summary> protected override int PostOperationFix(int headIdx, Tree[] daughterTrees) { if (headIdx >= 2) { string prevLab = Tlp.BasicCategory(daughterTrees[headIdx - 1].Value()); if (prevLab.Equals(PartsOfSpeech.CoordinatingConjunction) || prevLab.Equals(CONJP)) { bool origWasInterjection = PartsOfSpeech.Interjection.Equals(Tlp.BasicCategory(daughterTrees[headIdx].Value())); int newHeadIdx = headIdx - 2; // newHeadIdx is now left of conjunction. Now try going back over commas, etc. for 3+ conjuncts // Don't allow INTJ unless conjoined with INTJ - important in informal genres "Oh and don't forget to call!" while (newHeadIdx >= 0 && ShouldSkip(daughterTrees[newHeadIdx], origWasInterjection)) { newHeadIdx--; } // We're now at newHeadIdx < 0 or have found a left head // Now consider going back some number of punct that includes a , or : tagged thing and then find non-punct while (newHeadIdx >= 2) { int nextHead = FindPreviousHead(newHeadIdx, daughterTrees, origWasInterjection); if (nextHead < 0) { break; } newHeadIdx = nextHead; } if (newHeadIdx >= 0) { headIdx = newHeadIdx; } } } return(headIdx); }
private int FindPreviousHead(int headIdx, Tree[] daughterTrees, bool origWasInterjection) { bool seenSeparator = false; int newHeadIdx = headIdx; while (newHeadIdx >= 0) { newHeadIdx = newHeadIdx - 1; if (newHeadIdx < 0) { return(newHeadIdx); } string label = Tlp.BasicCategory(daughterTrees[newHeadIdx].Value()); if (PartsOfSpeech.Comma.Equals(label) || PartsOfSpeech.ColonSemiColon.Equals(label)) { seenSeparator = true; } else if (daughterTrees[newHeadIdx].IsPreTerminal() && (Tlp.IsPunctuationTag(label) || !origWasInterjection && PartsOfSpeech.Interjection.Equals(label)) || INTJ.Equals(label) && !origWasInterjection) { // keep looping } else { if (!seenSeparator) { newHeadIdx = -1; } break; } } return(newHeadIdx); }
/// <summary> /// Checks whether the tree t is an existential constituent /// There are two cases: /// -- affirmative sentences in which "there" is a left sister of the VP /// -- questions in which "there" is a daughter of the SQ. /// </summary> private bool IsExistential(Tree t, Tree parent) { bool toReturn = false; string motherCat = Tlp.BasicCategory(t.Label().Value()); // affirmative case if (motherCat.Equals(AbstractCollinsHeadFinder.VerbPhrase) && parent != null) { //take t and the sisters Tree[] kids = parent.Children(); // iterate over the sisters before t and checks if existential foreach (Tree kid in kids) { if (!kid.Value().Equals(AbstractCollinsHeadFinder.VerbPhrase)) { List <ILabel> tags = kid.PreTerminalYield(); foreach (ILabel tag in tags) { if (tag.Value().Equals(PartsOfSpeech.ExistentialThere)) { toReturn = true; } } } else { break; } } } // question case else if (motherCat.StartsWith(SQ) && parent != null) { //take the daughters Tree[] kids = parent.Children(); // iterate over the daughters and checks if existential foreach (Tree kid in kids) { if (!PartsOfSpeech.IsVerb(kid.Value())) { //not necessary to look into the verb List <ILabel> tags = kid.PreTerminalYield(); foreach (ILabel tag in tags) { if (tag.Value().Equals(PartsOfSpeech.ExistentialThere)) { toReturn = true; } } } } } return(toReturn); }
protected override int PostOperationFix(int headIdx, Tree[] daughterTrees) { if (headIdx >= 2) { string prevLab = Tlp.BasicCategory(daughterTrees[headIdx - 1].Value()); if (prevLab.Equals(PartsOfSpeech.CoordinatingConjunction) || prevLab.Equals(AbstractCollinsHeadFinder.CONJP)) { int newHeadIdx = headIdx - 2; Tree t = daughterTrees[newHeadIdx]; while (newHeadIdx >= 0 && t.IsPreTerminal() && Tlp.IsPunctuationTag(t.Value())) { newHeadIdx--; } if (newHeadIdx >= 0) { headIdx = newHeadIdx; } } } return(headIdx); }
/// <summary> /// Determine which daughter of the current parse tree is the head. /// It assumes that the daughters already have had their heads determined. /// Uses special rule for VP heads /// </summary> /// <param name="t"> /// The parse tree to examine the daughters of. /// This is assumed to never be a leaf /// </param> /// <returns>The parse tree that is the head</returns> protected override Tree DetermineNonTrivialHead(Tree t, Tree parent) { string motherCat = Tlp.BasicCategory(t.Label().Value()); // Some conj expressions seem to make more sense with the "not" or // other key words as the head. For example, "and not" means // something completely different than "and". Furthermore, // downstream code was written assuming "not" would be the head... if (motherCat.Equals(CONJP)) { var headOfConjpTregex = new TregexPattern[] { TregexPattern.Compile("CONJP < (CC <: /^(?i:but|and)$/ $+ (RB=head <: /^(?i:not)$/))"), TregexPattern.Compile( "CONJP < (CC <: /^(?i:but)$/ [ ($+ (RB=head <: /^(?i:also|rather)$/)) | ($+ (ADVP=head <: (RB <: /^(?i:also|rather)$/))) ])"), TregexPattern.Compile( "CONJP < (CC <: /^(?i:and)$/ [ ($+ (RB=head <: /^(?i:yet)$/)) | ($+ (ADVP=head <: (RB <: /^(?i:yet)$/))) ])"), }; foreach (TregexPattern pattern in headOfConjpTregex) { TregexMatcher matcher = pattern.Matcher(t); if (matcher.MatchesAt(t)) { return(matcher.GetNode("head")); } } // if none of the above patterns match, use the standard method } if (motherCat.Equals(SBARQ) || motherCat.Equals(SINV)) { if (!makeCopulaHead) { var headOfCopulaTregex = new TregexPattern[] { // Matches phrases such as "what is wrong" TregexPattern.Compile("SBARQ < (WHNP $++ (/^VB/ < " + EnglishPatterns.CopularWordRegex + " $++ ADJP=head))"), // matches WHNP $+ VB<copula $+ NP // for example, "Who am I to judge?" // !$++ ADJP matches against "Why is the dog pink?" TregexPattern.Compile("SBARQ < (WHNP=head $++ (/^VB/ < " + EnglishPatterns.CopularWordRegex + " $+ NP !$++ ADJP))"), // Actually somewhat limited in scope, this detects "Tuesday it is", // "Such a great idea this was", etc TregexPattern.Compile("SINV < (NP=head $++ (NP $++ (VP < (/^(?:VB|AUX)/ < " + EnglishPatterns.CopularWordRegex + "))))"), }; foreach (TregexPattern pattern in headOfCopulaTregex) { TregexMatcher matcher = pattern.Matcher(t); if (matcher.MatchesAt(t)) { return(matcher.GetNode("head")); } } } // if none of the above patterns match, use the standard method } Tree[] tmpFilteredChildren = null; // do VPs with auxiliary as special case if ((motherCat.Equals(AbstractCollinsHeadFinder.VerbPhrase) || motherCat.Equals("SQ") || motherCat.Equals("SINV"))) { Tree[] kids = t.Children(); // try to find if there is an auxiliary verb // looks for auxiliaries if (HasVerbalAuxiliary(kids, verbalAuxiliaries, true) || HasPassiveProgressiveAuxiliary(kids)) { // string[] how = new string[] {Left, AbstractCollinsHeadFinder.VerbPhrase, CoordinationTransformer.Adjective, CoordinationTransformer.Noun}; // Including NP etc seems okay for copular sentences but is // problematic for other auxiliaries, like 'he has an answer' // But maybe doing ADJP is fine! string[] how = { Left, AbstractCollinsHeadFinder.VerbPhrase, CoordinationTransformer.Adjective }; //tmpFilteredChildren = ArrayUtils.filter(kids, REMOVE_TMP_AND_ADV); tmpFilteredChildren = kids.Where(k => RemoveTmpAndAdv(k)).ToArray(); Tree pti = TraverseLocate(tmpFilteredChildren, how, false); if (pti != null) { return(pti); } } // looks for copular verbs if (HasVerbalAuxiliary(kids, copulars, false) && !IsExistential(t, parent) && !IsWhQ(t, parent)) { string[] how; if (motherCat.Equals("SQ")) { how = new string[] { Right, AbstractCollinsHeadFinder.VerbPhrase, CoordinationTransformer.Adjective, CoordinationTransformer.Noun, WHADJP, WHNP }; } else { how = new string[] { Left, AbstractCollinsHeadFinder.VerbPhrase, CoordinationTransformer.Adjective, CoordinationTransformer.Noun, WHADJP, WHNP }; } // Avoid undesirable heads by filtering them from the list of potential children if (tmpFilteredChildren == null) { //tmpFilteredChildren = ArrayUtils.filter(kids, REMOVE_TMP_AND_ADV); tmpFilteredChildren = kids.Where(k => RemoveTmpAndAdv(k)).ToArray(); } Tree pti = TraverseLocate(tmpFilteredChildren, how, false); // In SQ, only allow an NP to become head if there is another one to the left (then it's probably predicative) if (motherCat.Equals(SQ) && pti != null && pti.Label() != null && pti.Label().Value().StartsWith(CoordinationTransformer.Noun)) { bool foundAnotherNp = false; foreach (Tree kid in kids) { if (kid == pti) { break; } else if (kid.Label() != null && kid.Label().Value().StartsWith(CoordinationTransformer.Noun)) { foundAnotherNp = true; break; } } if (!foundAnotherNp) { pti = null; } } if (pti != null) { return(pti); } } } Tree hd = base.DetermineNonTrivialHead(t, parent); /* ---- * // This should now be handled at the AbstractCollinsHeadFinder level, so see if we can comment this out * // Heuristically repair punctuation heads * Tree[] hdChildren = hd.children(); * if (hdChildren != null && hdChildren.length > 0 && * hdChildren[0].isLeaf()) { * if (tlp.isPunctuationWord(hdChildren[0].label().value())) { * Tree[] tChildren = t.children(); * for (int i = tChildren.length - 1; i >= 0; i--) { * if (!tlp.isPunctuationWord(tChildren[i].children()[0].label().value())) { * hd = tChildren[i]; * break; * } * } * } * } */ return(hd); }