예제 #1
0
        // TODO: IDENTIFIER should not allow | after the first character, but it breaks some | queries to allow it.

        /// <summary>
        /// the grammar starts here
        /// each of these BNF rules will be converted into a function
        /// first expr is return val- passed up the tree after a production
        /// </summary>
        /// <returns></returns>
        public TregexPattern Root()
        {
            var nodes = new List <TregexPattern>();
            // a local variable

            TregexPattern node = SubNode(TRegex.Relation.Root);

            nodes.Add(node);
            //label_1:
            while (true)
            {
                if (Jj_2_1(2))
                {
                    ;
                }
                else
                {
                    //break label_1;
                    break;
                }
                Jj_consume_token(12);
                node = SubNode(TRegex.Relation.Root);
                nodes.Add(node);
            }
            Jj_consume_token(13);
            if (nodes.Count == 1)
            {
                return(nodes[0]);
            }
            else
            {
                return(new CoordinationPattern(nodes, false));
            }
        }
예제 #2
0
        public TregexPattern ChildrenConj()
        {
            var           children = new List <TregexPattern>();
            TregexPattern child    = ModChild();

            children.Add(child);
            //label_4:
            while (true)
            {
                switch ((jj_ntk == -1) ? Jj_ntk_f() : jj_ntk)
                {
                case RELATION:
                case MultiRelation:
                case RelWStrArg:
                case 14:
                case 16:
                case 22:
                case 23:
                case 24:
                {
                    ;
                    break;
                }

                default:
                    jj_la1[11] = jj_gen;
                    //break label_4;
                    goto post_label_4;
                }
                switch ((jj_ntk == -1) ? Jj_ntk_f() : jj_ntk)
                {
                case 22:
                {
                    Jj_consume_token(22);
                    break;
                }

                default:
                    jj_la1[12] = jj_gen;
                    break;
                }
                child = ModChild();
                children.Add(child);
            }
post_label_4:
            {
                if (children.Count == 1)
                {
                    return(child);
                }
                else
                {
                    return(new CoordinationPattern(children, true));
                }
            }
        }
예제 #3
0
        /// <summary>
        /// Creates a pattern from the given string using the default HeadFinder and
        /// BasicCategoryFunction.  If you want to use a different HeadFinder or
        /// BasicCategoryFunction, use a {@link TregexPatternCompiler} object.
        /// Rather than throwing an exception when the string does not parse,
        /// simply returns null.
        /// </summary>
        /// <param name="tregex">the pattern string</param>
        /// <param name="verbose">whether to log errors when the string doesn't parse</param>
        /// <returns>a TregexPattern for the string, or null if the string does not parse.</returns>
        public static TregexPattern SafeCompile(string tregex, bool verbose)
        {
            TregexPattern result = null;

            try
            {
                result = TregexPatternCompiler.defaultCompiler.Compile(tregex);
            }
            catch (TregexParseException)
            {
                if (verbose)
                {
                    //ex.printStackTrace();
                }
            }
            return(result);
        }
예제 #4
0
        public TregexPattern ChildrenDisj()
        {
            var children = new List <TregexPattern>();
            // When we keep track of the known variables to assert that
            // variables are not redefined, or that links are only set to known
            // variables, we want to separate those done in different parts of the
            // disjunction.  Variables set in one part won't be set in the next
            // part if it gets there, since disjunctions exit once known.
            var originalKnownVariables = new Set <string>(knownVariables);
            // However, we want to keep track of all the known variables, so that after
            // the disjunction is over, we know them all.
            var           allKnownVariables = new Set <string>(knownVariables);
            TregexPattern child             = ChildrenConj();

            children.Add(child);
            allKnownVariables.AddAll(knownVariables);
            //label_3:
            while (true)
            {
                if (Jj_2_2(2))
                {
                    ;
                }
                else
                {
                    //break label_3;
                    break;
                }
                knownVariables = new Set <string>(originalKnownVariables);
                Jj_consume_token(12);
                child = ChildrenConj();
                children.Add(child);
                allKnownVariables.AddAll(knownVariables);
            }
            knownVariables = allKnownVariables;
            if (children.Count == 1)
            {
                return(child);
            }
            else
            {
                return(new CoordinationPattern(children, false));
            }
        }
예제 #5
0
        public DescriptionPattern SubNode(Relation r)
        {
            DescriptionPattern result = null;
            TregexPattern      child  = null;

            switch ((jj_ntk == -1) ? Jj_ntk_f() : jj_ntk)
            {
            case 14:
            {
                Jj_consume_token(14);
                result = SubNode(r);
                Jj_consume_token(15);
                switch ((jj_ntk == -1) ? Jj_ntk_f() : jj_ntk)
                {
                case RELATION:
                case MultiRelation:
                case RelWStrArg:
                case 14:
                case 16:
                case 23:
                case 24:
                {
                    child = ChildrenDisj();
                    break;
                }

                default:
                    jj_la1[1] = jj_gen;
                    break;
                }
                if (child != null)
                {
                    var newChildren = new List <TregexPattern>();
                    newChildren.AddRange(result.GetChildren());
                    newChildren.Add(child);
                    result.SetChild(new CoordinationPattern(newChildren, true));
                }
                return(result);
            }

            case Identifier:
            case Blank:
            case Regex:
            case 16:
            case 17:
            case 20:
            case 21:
            {
                result = ModDescription(r);
                switch ((jj_ntk == -1) ? Jj_ntk_f() : jj_ntk)
                {
                case RELATION:
                case MultiRelation:
                case RelWStrArg:
                case 14:
                case 16:
                case 23:
                case 24:
                {
                    child = ChildrenDisj();
                    break;
                }

                default:
                    jj_la1[2] = jj_gen;
                    break;
                }
                if (child != null)
                {
                    result.SetChild(child);
                }
                return(result);
            }

            default:
                jj_la1[3] = jj_gen;
                Jj_consume_token(-1);
                throw new ParseException();
            }
        }
예제 #6
0
        /// <summary>
        /// Determine which daughter of the current parse tree is the head.
        /// It assumes that the daughters already have had their heads determined.
        /// Uses special rule for VP heads
        /// </summary>
        /// <param name="t">
        /// The parse tree to examine the daughters of.
        /// This is assumed to never be a leaf
        /// </param>
        /// <returns>The parse tree that is the head</returns>
        protected override Tree DetermineNonTrivialHead(Tree t, Tree parent)
        {
            string motherCat = Tlp.BasicCategory(t.Label().Value());
            
            // Some conj expressions seem to make more sense with the "not" or
            // other key words as the head.  For example, "and not" means
            // something completely different than "and".  Furthermore,
            // downstream code was written assuming "not" would be the head...
            if (motherCat.Equals(CONJP))
            {
                var headOfConjpTregex = new TregexPattern[]
                {
                    TregexPattern.Compile("CONJP < (CC <: /^(?i:but|and)$/ $+ (RB=head <: /^(?i:not)$/))"),
                    TregexPattern.Compile(
                        "CONJP < (CC <: /^(?i:but)$/ [ ($+ (RB=head <: /^(?i:also|rather)$/)) | ($+ (ADVP=head <: (RB <: /^(?i:also|rather)$/))) ])"),
                    TregexPattern.Compile(
                        "CONJP < (CC <: /^(?i:and)$/ [ ($+ (RB=head <: /^(?i:yet)$/)) | ($+ (ADVP=head <: (RB <: /^(?i:yet)$/))) ])"),
                };
                foreach (TregexPattern pattern in headOfConjpTregex)
                {
                    TregexMatcher matcher = pattern.Matcher(t);
                    if (matcher.MatchesAt(t))
                    {
                        return matcher.GetNode("head");
                    }
                }
                // if none of the above patterns match, use the standard method
            }

            if (motherCat.Equals(SBARQ) || motherCat.Equals(SINV))
            {
                if (!makeCopulaHead)
                {
                    var headOfCopulaTregex = new TregexPattern[]
                    {
                        // Matches phrases such as "what is wrong"
                        TregexPattern.Compile("SBARQ < (WHNP $++ (/^VB/ < " + EnglishPatterns.CopularWordRegex +
                                              " $++ ADJP=head))"),

                        // matches WHNP $+ VB<copula $+ NP
                        // for example, "Who am I to judge?"
                        // !$++ ADJP matches against "Why is the dog pink?"
                        TregexPattern.Compile("SBARQ < (WHNP=head $++ (/^VB/ < " + EnglishPatterns.CopularWordRegex +
                                              " $+ NP !$++ ADJP))"),

                        // Actually somewhat limited in scope, this detects "Tuesday it is",
                        // "Such a great idea this was", etc
                        TregexPattern.Compile("SINV < (NP=head $++ (NP $++ (VP < (/^(?:VB|AUX)/ < " +
                                              EnglishPatterns.CopularWordRegex + "))))"),
                    };
                    foreach (TregexPattern pattern in headOfCopulaTregex)
                    {
                        TregexMatcher matcher = pattern.Matcher(t);
                        if (matcher.MatchesAt(t))
                        {
                            return matcher.GetNode("head");
                        }
                    }
                }
                // if none of the above patterns match, use the standard method
            }

            Tree[] tmpFilteredChildren = null;

            // do VPs with auxiliary as special case
            if ((motherCat.Equals(AbstractCollinsHeadFinder.VerbPhrase) || motherCat.Equals("SQ") || motherCat.Equals("SINV")))
            {
                Tree[] kids = t.Children();
                // try to find if there is an auxiliary verb
                // looks for auxiliaries
                if (HasVerbalAuxiliary(kids, verbalAuxiliaries, true) || HasPassiveProgressiveAuxiliary(kids))
                {
                    // string[] how = new string[] {Left, AbstractCollinsHeadFinder.VerbPhrase, CoordinationTransformer.Adjective, CoordinationTransformer.Noun};
                    // Including NP etc seems okay for copular sentences but is
                    // problematic for other auxiliaries, like 'he has an answer'
                    // But maybe doing ADJP is fine!
                    string[] how = { Left, AbstractCollinsHeadFinder.VerbPhrase, CoordinationTransformer.Adjective };
                    
                    //tmpFilteredChildren = ArrayUtils.filter(kids, REMOVE_TMP_AND_ADV);
                    tmpFilteredChildren = kids.Where(k => RemoveTmpAndAdv(k)).ToArray();
                    
                    Tree pti = TraverseLocate(tmpFilteredChildren, how, false);
                    if (pti != null)
                    {
                        return pti;
                    }
                }

                // looks for copular verbs
                if (HasVerbalAuxiliary(kids, copulars, false) && ! IsExistential(t, parent) && ! IsWhQ(t, parent))
                {
                    string[] how;
                    if (motherCat.Equals("SQ"))
                    {
                        how = new string[] { Right, AbstractCollinsHeadFinder.VerbPhrase, CoordinationTransformer.Adjective, CoordinationTransformer.Noun, WHADJP, WHNP };
                    }
                    else
                    {
                        how = new string[] { Left, AbstractCollinsHeadFinder.VerbPhrase, CoordinationTransformer.Adjective, CoordinationTransformer.Noun, WHADJP, WHNP };
                    }
                    // Avoid undesirable heads by filtering them from the list of potential children
                    if (tmpFilteredChildren == null)
                    {
                        //tmpFilteredChildren = ArrayUtils.filter(kids, REMOVE_TMP_AND_ADV);
                        tmpFilteredChildren = kids.Where(k => RemoveTmpAndAdv(k)).ToArray();
                    }
                    Tree pti = TraverseLocate(tmpFilteredChildren, how, false);
                    // In SQ, only allow an NP to become head if there is another one to the left (then it's probably predicative)
                    if (motherCat.Equals(SQ) && pti != null && pti.Label() != null &&
                        pti.Label().Value().StartsWith(CoordinationTransformer.Noun))
                    {
                        bool foundAnotherNp = false;
                        foreach (Tree kid in kids)
                        {
                            if (kid == pti)
                            {
                                break;
                            }
                            else if (kid.Label() != null && kid.Label().Value().StartsWith(CoordinationTransformer.Noun))
                            {
                                foundAnotherNp = true;
                                break;
                            }
                        }
                        if (! foundAnotherNp)
                        {
                            pti = null;
                        }
                    }

                    if (pti != null)
                    {
                        return pti;
                    }
                }
            }

            Tree hd = base.DetermineNonTrivialHead(t, parent);

            /* ----
    // This should now be handled at the AbstractCollinsHeadFinder level, so see if we can comment this out
    // Heuristically repair punctuation heads
    Tree[] hdChildren = hd.children();
    if (hdChildren != null && hdChildren.length > 0 &&
        hdChildren[0].isLeaf()) {
      if (tlp.isPunctuationWord(hdChildren[0].label().value())) {
         Tree[] tChildren = t.children();
         for (int i = tChildren.length - 1; i >= 0; i--) {
           if (!tlp.isPunctuationWord(tChildren[i].children()[0].label().value())) {
             hd = tChildren[i];
             break;
           }
         }
      }
    }
    */
            return hd;
        }
예제 #7
0
 public void SetChild(TregexPattern n)
 {
     child = n;
 }
예제 #8
0
 public void SetChild(TregexPattern n)
 {
     child = n;
 }