// TODO: IDENTIFIER should not allow | after the first character, but it breaks some | queries to allow it. /// <summary> /// the grammar starts here /// each of these BNF rules will be converted into a function /// first expr is return val- passed up the tree after a production /// </summary> /// <returns></returns> public TregexPattern Root() { var nodes = new List <TregexPattern>(); // a local variable TregexPattern node = SubNode(TRegex.Relation.Root); nodes.Add(node); //label_1: while (true) { if (Jj_2_1(2)) { ; } else { //break label_1; break; } Jj_consume_token(12); node = SubNode(TRegex.Relation.Root); nodes.Add(node); } Jj_consume_token(13); if (nodes.Count == 1) { return(nodes[0]); } else { return(new CoordinationPattern(nodes, false)); } }
public TregexPattern ChildrenConj() { var children = new List <TregexPattern>(); TregexPattern child = ModChild(); children.Add(child); //label_4: while (true) { switch ((jj_ntk == -1) ? Jj_ntk_f() : jj_ntk) { case RELATION: case MultiRelation: case RelWStrArg: case 14: case 16: case 22: case 23: case 24: { ; break; } default: jj_la1[11] = jj_gen; //break label_4; goto post_label_4; } switch ((jj_ntk == -1) ? Jj_ntk_f() : jj_ntk) { case 22: { Jj_consume_token(22); break; } default: jj_la1[12] = jj_gen; break; } child = ModChild(); children.Add(child); } post_label_4: { if (children.Count == 1) { return(child); } else { return(new CoordinationPattern(children, true)); } } }
/// <summary> /// Creates a pattern from the given string using the default HeadFinder and /// BasicCategoryFunction. If you want to use a different HeadFinder or /// BasicCategoryFunction, use a {@link TregexPatternCompiler} object. /// Rather than throwing an exception when the string does not parse, /// simply returns null. /// </summary> /// <param name="tregex">the pattern string</param> /// <param name="verbose">whether to log errors when the string doesn't parse</param> /// <returns>a TregexPattern for the string, or null if the string does not parse.</returns> public static TregexPattern SafeCompile(string tregex, bool verbose) { TregexPattern result = null; try { result = TregexPatternCompiler.defaultCompiler.Compile(tregex); } catch (TregexParseException) { if (verbose) { //ex.printStackTrace(); } } return(result); }
public TregexPattern ChildrenDisj() { var children = new List <TregexPattern>(); // When we keep track of the known variables to assert that // variables are not redefined, or that links are only set to known // variables, we want to separate those done in different parts of the // disjunction. Variables set in one part won't be set in the next // part if it gets there, since disjunctions exit once known. var originalKnownVariables = new Set <string>(knownVariables); // However, we want to keep track of all the known variables, so that after // the disjunction is over, we know them all. var allKnownVariables = new Set <string>(knownVariables); TregexPattern child = ChildrenConj(); children.Add(child); allKnownVariables.AddAll(knownVariables); //label_3: while (true) { if (Jj_2_2(2)) { ; } else { //break label_3; break; } knownVariables = new Set <string>(originalKnownVariables); Jj_consume_token(12); child = ChildrenConj(); children.Add(child); allKnownVariables.AddAll(knownVariables); } knownVariables = allKnownVariables; if (children.Count == 1) { return(child); } else { return(new CoordinationPattern(children, false)); } }
public DescriptionPattern SubNode(Relation r) { DescriptionPattern result = null; TregexPattern child = null; switch ((jj_ntk == -1) ? Jj_ntk_f() : jj_ntk) { case 14: { Jj_consume_token(14); result = SubNode(r); Jj_consume_token(15); switch ((jj_ntk == -1) ? Jj_ntk_f() : jj_ntk) { case RELATION: case MultiRelation: case RelWStrArg: case 14: case 16: case 23: case 24: { child = ChildrenDisj(); break; } default: jj_la1[1] = jj_gen; break; } if (child != null) { var newChildren = new List <TregexPattern>(); newChildren.AddRange(result.GetChildren()); newChildren.Add(child); result.SetChild(new CoordinationPattern(newChildren, true)); } return(result); } case Identifier: case Blank: case Regex: case 16: case 17: case 20: case 21: { result = ModDescription(r); switch ((jj_ntk == -1) ? Jj_ntk_f() : jj_ntk) { case RELATION: case MultiRelation: case RelWStrArg: case 14: case 16: case 23: case 24: { child = ChildrenDisj(); break; } default: jj_la1[2] = jj_gen; break; } if (child != null) { result.SetChild(child); } return(result); } default: jj_la1[3] = jj_gen; Jj_consume_token(-1); throw new ParseException(); } }
/// <summary> /// Determine which daughter of the current parse tree is the head. /// It assumes that the daughters already have had their heads determined. /// Uses special rule for VP heads /// </summary> /// <param name="t"> /// The parse tree to examine the daughters of. /// This is assumed to never be a leaf /// </param> /// <returns>The parse tree that is the head</returns> protected override Tree DetermineNonTrivialHead(Tree t, Tree parent) { string motherCat = Tlp.BasicCategory(t.Label().Value()); // Some conj expressions seem to make more sense with the "not" or // other key words as the head. For example, "and not" means // something completely different than "and". Furthermore, // downstream code was written assuming "not" would be the head... if (motherCat.Equals(CONJP)) { var headOfConjpTregex = new TregexPattern[] { TregexPattern.Compile("CONJP < (CC <: /^(?i:but|and)$/ $+ (RB=head <: /^(?i:not)$/))"), TregexPattern.Compile( "CONJP < (CC <: /^(?i:but)$/ [ ($+ (RB=head <: /^(?i:also|rather)$/)) | ($+ (ADVP=head <: (RB <: /^(?i:also|rather)$/))) ])"), TregexPattern.Compile( "CONJP < (CC <: /^(?i:and)$/ [ ($+ (RB=head <: /^(?i:yet)$/)) | ($+ (ADVP=head <: (RB <: /^(?i:yet)$/))) ])"), }; foreach (TregexPattern pattern in headOfConjpTregex) { TregexMatcher matcher = pattern.Matcher(t); if (matcher.MatchesAt(t)) { return matcher.GetNode("head"); } } // if none of the above patterns match, use the standard method } if (motherCat.Equals(SBARQ) || motherCat.Equals(SINV)) { if (!makeCopulaHead) { var headOfCopulaTregex = new TregexPattern[] { // Matches phrases such as "what is wrong" TregexPattern.Compile("SBARQ < (WHNP $++ (/^VB/ < " + EnglishPatterns.CopularWordRegex + " $++ ADJP=head))"), // matches WHNP $+ VB<copula $+ NP // for example, "Who am I to judge?" // !$++ ADJP matches against "Why is the dog pink?" TregexPattern.Compile("SBARQ < (WHNP=head $++ (/^VB/ < " + EnglishPatterns.CopularWordRegex + " $+ NP !$++ ADJP))"), // Actually somewhat limited in scope, this detects "Tuesday it is", // "Such a great idea this was", etc TregexPattern.Compile("SINV < (NP=head $++ (NP $++ (VP < (/^(?:VB|AUX)/ < " + EnglishPatterns.CopularWordRegex + "))))"), }; foreach (TregexPattern pattern in headOfCopulaTregex) { TregexMatcher matcher = pattern.Matcher(t); if (matcher.MatchesAt(t)) { return matcher.GetNode("head"); } } } // if none of the above patterns match, use the standard method } Tree[] tmpFilteredChildren = null; // do VPs with auxiliary as special case if ((motherCat.Equals(AbstractCollinsHeadFinder.VerbPhrase) || motherCat.Equals("SQ") || motherCat.Equals("SINV"))) { Tree[] kids = t.Children(); // try to find if there is an auxiliary verb // looks for auxiliaries if (HasVerbalAuxiliary(kids, verbalAuxiliaries, true) || HasPassiveProgressiveAuxiliary(kids)) { // string[] how = new string[] {Left, AbstractCollinsHeadFinder.VerbPhrase, CoordinationTransformer.Adjective, CoordinationTransformer.Noun}; // Including NP etc seems okay for copular sentences but is // problematic for other auxiliaries, like 'he has an answer' // But maybe doing ADJP is fine! string[] how = { Left, AbstractCollinsHeadFinder.VerbPhrase, CoordinationTransformer.Adjective }; //tmpFilteredChildren = ArrayUtils.filter(kids, REMOVE_TMP_AND_ADV); tmpFilteredChildren = kids.Where(k => RemoveTmpAndAdv(k)).ToArray(); Tree pti = TraverseLocate(tmpFilteredChildren, how, false); if (pti != null) { return pti; } } // looks for copular verbs if (HasVerbalAuxiliary(kids, copulars, false) && ! IsExistential(t, parent) && ! IsWhQ(t, parent)) { string[] how; if (motherCat.Equals("SQ")) { how = new string[] { Right, AbstractCollinsHeadFinder.VerbPhrase, CoordinationTransformer.Adjective, CoordinationTransformer.Noun, WHADJP, WHNP }; } else { how = new string[] { Left, AbstractCollinsHeadFinder.VerbPhrase, CoordinationTransformer.Adjective, CoordinationTransformer.Noun, WHADJP, WHNP }; } // Avoid undesirable heads by filtering them from the list of potential children if (tmpFilteredChildren == null) { //tmpFilteredChildren = ArrayUtils.filter(kids, REMOVE_TMP_AND_ADV); tmpFilteredChildren = kids.Where(k => RemoveTmpAndAdv(k)).ToArray(); } Tree pti = TraverseLocate(tmpFilteredChildren, how, false); // In SQ, only allow an NP to become head if there is another one to the left (then it's probably predicative) if (motherCat.Equals(SQ) && pti != null && pti.Label() != null && pti.Label().Value().StartsWith(CoordinationTransformer.Noun)) { bool foundAnotherNp = false; foreach (Tree kid in kids) { if (kid == pti) { break; } else if (kid.Label() != null && kid.Label().Value().StartsWith(CoordinationTransformer.Noun)) { foundAnotherNp = true; break; } } if (! foundAnotherNp) { pti = null; } } if (pti != null) { return pti; } } } Tree hd = base.DetermineNonTrivialHead(t, parent); /* ---- // This should now be handled at the AbstractCollinsHeadFinder level, so see if we can comment this out // Heuristically repair punctuation heads Tree[] hdChildren = hd.children(); if (hdChildren != null && hdChildren.length > 0 && hdChildren[0].isLeaf()) { if (tlp.isPunctuationWord(hdChildren[0].label().value())) { Tree[] tChildren = t.children(); for (int i = tChildren.length - 1; i >= 0; i--) { if (!tlp.isPunctuationWord(tChildren[i].children()[0].label().value())) { hd = tChildren[i]; break; } } } } */ return hd; }
public void SetChild(TregexPattern n) { child = n; }