コード例 #1
0
 /// <summary>
 /// Construct a HeadFinder.
 /// The TreebankLanguagePack is used to get basic categories. The remaining arguments
 /// set categories which, if it comes to last resort processing (i.e., none of
 /// the rules matched), will be avoided as heads. In last resort processing,
 /// it will attempt to match the leftmost or rightmost constituent not in this
 /// set but will fall back to the left or rightmost constituent if necessary.
 /// </summary>
 /// <param name="tlp">TreebankLanguagePack used to determine basic category</param>
 /// <param name="categoriesToAvoid">Constituent types to avoid as head</param>
 protected AbstractCollinsHeadFinder(AbstractTreebankLanguagePack tlp, string[] categoriesToAvoid)
 {
     this.Tlp = tlp;
     // automatically build defaultLeftRule, defaultRightRule
     DefaultLeftRule  = new string[categoriesToAvoid.Length + 1];
     DefaultRightRule = new string[categoriesToAvoid.Length + 1];
     if (categoriesToAvoid.Length > 0)
     {
         DefaultLeftRule[0]  = LeftExcept;
         DefaultRightRule[0] = RightExcept;
         Array.Copy(categoriesToAvoid, 0, DefaultLeftRule, 1, categoriesToAvoid.Length);
         Array.Copy(categoriesToAvoid, 0, DefaultRightRule, 1, categoriesToAvoid.Length);
     }
     else
     {
         DefaultLeftRule[0]  = Left;
         DefaultRightRule[0] = Right;
     }
 }
コード例 #2
0
        /// <summary>
        /// Create a SemanticHeadFinder
        /// </summary>
        /// <param name="tlp">
        /// The TreebankLanguagePack, used by the superclass to get basic category of constituents
        /// </param>
        /// <param name="noCopulaHead">
        /// If true, a copular verb (be, seem, appear, stay, remain, resemble, become)
        /// is not treated as head when it has an AdjP or NP complement.  If false,
        /// a copula verb is still always treated as a head.  But it will still
        /// be treated as an auxiliary in periphrastic tenses with a VP complement.
        /// </param>
        public SemanticHeadFinder(AbstractTreebankLanguagePack tlp, bool noCopulaHead) : base(tlp)
        {
            RuleChanges();

            // make a distinction between auxiliaries and copula verbs to
            // get the NP has semantic head in sentences like "Bill is an honest man".  (Added "sha" for "shan't" May 2009
            verbalAuxiliaries = new Util.HashSet <string>(Auxiliaries);

            passiveAuxiliaries = new Util.HashSet <string>(BeGetVerbs);

            //copula verbs having an NP complement
            copulars = new Util.HashSet <string>();
            if (noCopulaHead)
            {
                copulars.AddAll(CopulaVerbs);
            }

            // TODO: reverse the polarity of noCopulaHead
            this.makeCopulaHead = !noCopulaHead;

            verbalTags = new Util.HashSet <string>(VerbTags);
            unambiguousAuxiliaryTags = new Util.HashSet <string>(UnambiguousAuxTags);
        }
コード例 #3
0
        /// <summary>
        /// Create a SemanticHeadFinder
        /// </summary>
        /// <param name="tlp">
        /// The TreebankLanguagePack, used by the superclass to get basic category of constituents
        /// </param>
        /// <param name="noCopulaHead">
        /// If true, a copular verb (be, seem, appear, stay, remain, resemble, become)
        /// is not treated as head when it has an AdjP or NP complement.  If false,
        /// a copula verb is still always treated as a head.  But it will still
        /// be treated as an auxiliary in periphrastic tenses with a VP complement.
        /// </param>
        public SemanticHeadFinder(AbstractTreebankLanguagePack tlp, bool noCopulaHead) : base(tlp)
        {
            RuleChanges();

            // make a distinction between auxiliaries and copula verbs to
            // get the NP has semantic head in sentences like "Bill is an honest man".  (Added "sha" for "shan't" May 2009
            verbalAuxiliaries = new Util.HashSet<string>(Auxiliaries);

            passiveAuxiliaries = new Util.HashSet<string>(BeGetVerbs);

            //copula verbs having an NP complement
            copulars = new Util.HashSet<string>();
            if (noCopulaHead)
            {
                copulars.AddAll(CopulaVerbs);
            }

            // TODO: reverse the polarity of noCopulaHead
            this.makeCopulaHead = !noCopulaHead;

            verbalTags = new Util.HashSet<string>(VerbTags);
            unambiguousAuxiliaryTags = new Util.HashSet<string>(UnambiguousAuxTags);
        }
コード例 #4
0
 /// <summary>
 /// Construct a HeadFinder.
 /// The TreebankLanguagePack is used to get basic categories. The remaining arguments
 /// set categories which, if it comes to last resort processing (i.e., none of
 /// the rules matched), will be avoided as heads. In last resort processing,
 /// it will attempt to match the leftmost or rightmost constituent not in this
 /// set but will fall back to the left or rightmost constituent if necessary.
 /// </summary>
 /// <param name="tlp">TreebankLanguagePack used to determine basic category</param>
 /// <param name="categoriesToAvoid">Constituent types to avoid as head</param>
 protected AbstractCollinsHeadFinder(AbstractTreebankLanguagePack tlp, string[] categoriesToAvoid)
 {
     this.Tlp = tlp;
     // automatically build defaultLeftRule, defaultRightRule
     DefaultLeftRule = new string[categoriesToAvoid.Length + 1];
     DefaultRightRule = new string[categoriesToAvoid.Length + 1];
     if (categoriesToAvoid.Length > 0)
     {
         DefaultLeftRule[0] = LeftExcept;
         DefaultRightRule[0] = RightExcept;
         Array.Copy(categoriesToAvoid, 0, DefaultLeftRule, 1, categoriesToAvoid.Length);
         Array.Copy(categoriesToAvoid, 0, DefaultRightRule, 1, categoriesToAvoid.Length);
     }
     else
     {
         DefaultLeftRule[0] = Left;
         DefaultRightRule[0] = Right;
     }
 }
コード例 #5
0
 public DependencyTreeTransformer()
 {
     Tlp = new PennTreebankLanguagePack();
 }
コード例 #6
0
 public CollinsHeadFinder(AbstractTreebankLanguagePack tlp, string[] categoriesToAvoid)
     : base(tlp, categoriesToAvoid)
 {
     nonTerminalInfo = new Dictionary <string, string[][]>();
     // This version from Collins' diss (1999: 236-238)
     nonTerminalInfo.Add("ADJP",
                         new string[][]
     {
         new string[]
         {
             "left", "NNS", "QP", "NN", "$", "ADVP", "JJ", "VBN", "VBG", "ADJP", "JJR", "NP", "JJS", "DT", "FW",
             "RBR", "RBS", "SBAR", "RB"
         }
     });
     nonTerminalInfo.Add("ADVP",
                         new string[][]
     {
         new string[]
         { "right", "RB", "RBR", "RBS", "FW", "ADVP", "TO", "CD", "JJR", "JJ", "IN", "NP", "JJS", "NN" }
     });
     nonTerminalInfo.Add("CONJP", new string[][] { new string[] { "right", "CC", "RB", "IN" } });
     nonTerminalInfo.Add("FRAG", new string[][] { new string[] { "right" } }); // crap
     nonTerminalInfo.Add("INTJ", new string[][] { new string[] { "left" } });
     nonTerminalInfo.Add("LST", new string[][] { new string[] { "right", "LS", ":" } });
     nonTerminalInfo.Add("NAC",
                         new string[][]
     {
         new string[]
         {
             "left", "NN", "NNS", "NNP", "NNPS", "NP", "NAC", "EX", "$", "CD", "QP", "PRP", "VBG", "JJ", "JJS",
             "JJR", "ADJP", "FW"
         }
     });
     nonTerminalInfo.Add("NX", new string[][] { new string[] { "left" } }); // crap
     nonTerminalInfo.Add("PP", new string[][] { new string[] { "right", "IN", "TO", "VBG", "VBN", "RP", "FW" } });
     // should prefer JJ? (PP (JJ such) (IN as) (NP (NN crocidolite)))
     nonTerminalInfo.Add("PRN", new string[][] { new string[] { "left" } });
     nonTerminalInfo.Add("PRT", new string[][] { new string[] { "right", "RP" } });
     nonTerminalInfo.Add("QP",
                         new string[][]
                         { new string[] { "left", "$", "IN", "NNS", "NN", "JJ", "RB", "DT", "CD", "NCD", "QP", "JJR", "JJS" } });
     nonTerminalInfo.Add("RRC", new string[][] { new string[] { "right", "VP", "NP", "ADVP", "ADJP", "PP" } });
     nonTerminalInfo.Add("S",
                         new string[][] { new string[] { "left", "TO", "IN", "VP", "S", "SBAR", "ADJP", "UCP", "NP" } });
     nonTerminalInfo.Add("SBAR",
                         new string[][]
     {
         new string[]
         { "left", "WHNP", "WHPP", "WHADVP", "WHADJP", "IN", "DT", "S", "SQ", "SINV", "SBAR", "FRAG" }
     });
     nonTerminalInfo.Add("SBARQ", new string[][] { new string[] { "left", "SQ", "S", "SINV", "SBARQ", "FRAG" } });
     nonTerminalInfo.Add("SINV",
                         new string[][] { new string[] { "left", "VBZ", "VBD", "VBP", "VB", "MD", "VP", "S", "SINV", "ADJP", "NP" } });
     nonTerminalInfo.Add("SQ",
                         new string[][] { new string[] { "left", "VBZ", "VBD", "VBP", "VB", "MD", "VP", "SQ" } });
     nonTerminalInfo.Add("UCP", new string[][] { new string[] { "right" } });
     nonTerminalInfo.Add("VP",
                         new string[][]
     {
         new string[]
         {
             "left", "TO", "VBD", "VBN", "MD", "VBZ", "VB", "VBG", "VBP", "AUX", "AUXG", "VP", "ADJP", "NN",
             "NNS", "NP"
         }
     });
     nonTerminalInfo.Add("WHADJP", new string[][] { new string[] { "left", "CC", "WRB", "JJ", "ADJP" } });
     nonTerminalInfo.Add("WHADVP", new string[][] { new string[] { "right", "CC", "WRB" } });
     nonTerminalInfo.Add("WHNP",
                         new string[][] { new string[] { "left", "WDT", "WP", "WP$", "WHADJP", "WHPP", "WHNP" } });
     nonTerminalInfo.Add("WHPP", new string[][] { new string[] { "right", "IN", "TO", "FW" } });
     nonTerminalInfo.Add("X", new string[][] { new string[] { "right" } }); // crap rule
     nonTerminalInfo.Add("NP",
                         new string[][]
     {
         new string[] { "rightdis", "NN", "NNP", "NNPS", "NNS", "NX", "POS", "JJR" }, new string[] { "left", "NP" },
         new string[] { "rightdis", "$", "ADJP", "PRN" }, new string[] { "right", "CD" },
         new string[] { "rightdis", "JJ", "JJS", "RB", "QP" }
     });
     nonTerminalInfo.Add("TYPO", new string[][] { new string[] { "left" } }); // another crap rule, for Brown (Roger)
     nonTerminalInfo.Add("EDITED", new string[][] { new string[] { "left" } });
     // crap rule for Switchboard (if don't delete EDITED nodes)
     nonTerminalInfo.Add("XS", new string[][] { new string[] { "right", "IN" } }); // rule for new structure in QP
 }
コード例 #7
0
 /// <summary>
 /// This constructor provides the traditional behavior, where there is
 /// no special avoidance of punctuation categories.
 /// </summary>
 /// <param name="tlp">TreebankLanguagePack used for basic category function</param>
 public CollinsHeadFinder(AbstractTreebankLanguagePack tlp) : this(tlp, EmptyStringArray)
 {
 }
コード例 #8
0
 public BasicCategoryStringFunction(AbstractTreebankLanguagePack tlp)
 {
     this.tlp = tlp;
 }
コード例 #9
0
        public ModCollinsHeadFinder(AbstractTreebankLanguagePack tlp) : base(tlp, tlp.PunctuationTags())
        {
            // avoid punctuation as head in readonly default rule

            NonTerminalInfo = new Dictionary <string, string[][]>();

            // This version from Collins' diss (1999: 236-238)
            // NNS, NN is actually sensible (money, etc.)!
            // QP early isn't; should prefer JJR NN RB
            // remove ADVP; it just shouldn't be there.
            // if two JJ, should take right one (e.g. South Korean)
            // NonTerminalInfo.Add(CoordinationTransformer.Adjective, new string[][]{{Left, PartsOfSpeech.NounPlural, PartsOfSpeech.NounSingularOrMass, PartsOfSpeech.DollarSign, "QP"}, {Right, PartsOfSpeech.Adjective}, {Left, PartsOfSpeech.VerbPastParticiple, PartsOfSpeech.VerbGerundOrPresentParticiple, CoordinationTransformer.Adjective, "JJP", PartsOfSpeech.AdjectiveComparative, CoordinationTransformer.Noun, PartsOfSpeech.AdjectiveSuperlative, PartsOfSpeech.Determiner, PartsOfSpeech.ForeignWord, PartsOfSpeech.AdverbComparative, PartsOfSpeech.AdverbSuperlative, "SBAR", PartsOfSpeech.Adverb}});
            NonTerminalInfo.Add(CoordinationTransformer.Adjective,
                                new string[][]
            {
                new string[] { Left, PartsOfSpeech.DollarSign },
                new string[]
                {
                    RightDis, PartsOfSpeech.NounPlural, PartsOfSpeech.NounSingularOrMass, PartsOfSpeech.Adjective,
                    QP, PartsOfSpeech.VerbPastParticiple, PartsOfSpeech.VerbGerundOrPresentParticiple
                },
                new string[] { Left, CoordinationTransformer.Adjective },
                new string[]
                {
                    RightDis, JJP, PartsOfSpeech.AdjectiveComparative, PartsOfSpeech.AdjectiveSuperlative,
                    PartsOfSpeech.Determiner, PartsOfSpeech.Adverb, PartsOfSpeech.AdverbComparative,
                    PartsOfSpeech.CardinalNumber, PartsOfSpeech.PrepositionOrSubordinateConjunction,
                    PartsOfSpeech.VerbPastTense
                },
                new string[] { Left, ADVP, CoordinationTransformer.Noun }
            });
            NonTerminalInfo.Add(JJP,
                                new string[][]
            {
                new string[]
                {
                    Left, PartsOfSpeech.NounPlural, PartsOfSpeech.NounSingularOrMass, PartsOfSpeech.DollarSign,
                    QP, PartsOfSpeech.Adjective, PartsOfSpeech.VerbPastParticiple,
                    PartsOfSpeech.VerbGerundOrPresentParticiple, CoordinationTransformer.Adjective, JJP, PartsOfSpeech.AdjectiveComparative,
                    CoordinationTransformer.Noun, PartsOfSpeech.AdjectiveSuperlative, PartsOfSpeech.Determiner, PartsOfSpeech.ForeignWord,
                    PartsOfSpeech.AdverbComparative, PartsOfSpeech.AdverbSuperlative, SBAR, PartsOfSpeech.Adverb
                }
            });
            // JJP is introduced for NML-like adjective phrases in Vadas' treebank; Chris wishes he hadn't used JJP which should be a POS-tag.
            // ADVP rule rewritten by Chris in Nov 2010 to be rightdis.  This is right! JJ.* is often head and rightmost.
            NonTerminalInfo.Add(ADVP, new string[][]
            {
                new string[] { Left, ADVP, PartsOfSpeech.PrepositionOrSubordinateConjunction },
                new string[]
                {
                    RightDis, PartsOfSpeech.Adverb, PartsOfSpeech.AdverbComparative, PartsOfSpeech.AdverbSuperlative,
                    PartsOfSpeech.Adjective, PartsOfSpeech.AdjectiveComparative, PartsOfSpeech.AdjectiveSuperlative
                },
                new string[]
                {
                    RightDis, PartsOfSpeech.Particle, PartsOfSpeech.Determiner, PartsOfSpeech.NounSingularOrMass,
                    PartsOfSpeech.CardinalNumber, CoordinationTransformer.Noun, PartsOfSpeech.VerbPastParticiple,
                    PartsOfSpeech.ProperNounSingular, PartsOfSpeech.CoordinatingConjunction, PartsOfSpeech.ForeignWord,
                    PartsOfSpeech.NounPlural, CoordinationTransformer.Adjective, NML
                }
            });
            NonTerminalInfo.Add(CONJP,
                                new string[][]
            {
                new string[]
                {
                    Right, PartsOfSpeech.CoordinatingConjunction, PartsOfSpeech.Adverb,
                    PartsOfSpeech.PrepositionOrSubordinateConjunction
                }
            });
            NonTerminalInfo.Add(FRAG, new string[][] { new string[] { Right } }); // crap
            NonTerminalInfo.Add(INTJ, new string[][] { new string[] { Left } });
            NonTerminalInfo.Add(LST,
                                new string[][] { new string[] { Right, PartsOfSpeech.ListItemMarker, PartsOfSpeech.ColonSemiColon } });

            // NML is head in: (NAC-LOC (NML San Antonio) (, ,) (NNP Texas))
            // TODO: NNP should be head (rare cases, could be ignored):
            //   (NAC (NML New York) (NNP Court) (PP of Appeals))
            //   (NAC (NML Prudential Insurance) (NNP Co.) (PP Of America))
            // Chris: This could maybe still do with more thought, but NAC is rare.
            NonTerminalInfo.Add(NAC,
                                new string[][]
            {
                new string[]
                {
                    Left, PartsOfSpeech.NounSingularOrMass, PartsOfSpeech.NounPlural, NML,
                    PartsOfSpeech.ProperNounSingular, PartsOfSpeech.ProperNounPlural, CoordinationTransformer.Noun, NAC,
                    PartsOfSpeech.ExistentialThere, PartsOfSpeech.DollarSign, PartsOfSpeech.CardinalNumber, QP,
                    PartsOfSpeech.PersonalPronoun, PartsOfSpeech.VerbGerundOrPresentParticiple,
                    PartsOfSpeech.Adjective,
                    PartsOfSpeech.AdjectiveSuperlative, PartsOfSpeech.AdjectiveComparative, CoordinationTransformer.Adjective, JJP,
                    PartsOfSpeech.ForeignWord
                }
            });

            // Added JJ to PP head table, since it is a head in several cases, e.g.:
            // (PP (JJ next) (PP to them))
            // When you have both JJ and IN daughters, it is invariably "such as" -- not so clear which should be head, but leave as IN
            // should prefer JJ? (PP (JJ such) (IN as) (NP (NN crocidolite)))  Michel thinks we should make JJ a head of PP
            // added SYM as used in new treebanks for symbols filling role of IN
            // Changed PP search to left -- just what you want for conjunction (and consistent with SemanticHeadFinder)
            NonTerminalInfo.Add(PP,
                                new string[][]
            {
                new string[]
                {
                    Right, PartsOfSpeech.PrepositionOrSubordinateConjunction, PartsOfSpeech.To,
                    PartsOfSpeech.VerbGerundOrPresentParticiple, PartsOfSpeech.VerbPastParticiple,
                    PartsOfSpeech.Particle, PartsOfSpeech.ForeignWord, PartsOfSpeech.Adjective, PartsOfSpeech.Symbol
                },
                new string[] { Left, PP }
            });

            NonTerminalInfo.Add(PRN,
                                new string[][]
            {
                new string[]
                {
                    Left, VP, CoordinationTransformer.Noun, PP, SQ, S, SINV, SBAR, CoordinationTransformer.Adjective, JJP, ADVP, INTJ, WHNP,
                    NAC,
                    PartsOfSpeech.VerbNon3rdPersSingPresent, PartsOfSpeech.Adjective,
                    PartsOfSpeech.NounSingularOrMass, PartsOfSpeech.ProperNounSingular
                }
            });
            NonTerminalInfo.Add(PRT, new string[][] { new string[] { Right, PartsOfSpeech.Particle } });
            // add '#' for pounds!!
            NonTerminalInfo.Add(QP,
                                new string[][]
            {
                new string[]
                {
                    Left, PartsOfSpeech.DollarSign, PartsOfSpeech.PrepositionOrSubordinateConjunction,
                    PartsOfSpeech.NounPlural, PartsOfSpeech.NounSingularOrMass, PartsOfSpeech.Adjective,
                    PartsOfSpeech.CardinalNumber, PartsOfSpeech.Predeterminer, PartsOfSpeech.Determiner,
                    PartsOfSpeech.Adverb, NCD, QP, PartsOfSpeech.AdjectiveComparative,
                    PartsOfSpeech.AdjectiveSuperlative
                }
            });
            // reduced relative clause can be any predicate VP, ADJP, NP, PP.
            // For choosing between NP and PP, really need to know which one is temporal and to choose the other.
            // It's not clear ADVP needs to be in the list at all (delete?).
            NonTerminalInfo.Add(RRC,
                                new string[][] { new string[] { Left, RRC }, new string[] { Right, VP, CoordinationTransformer.Adjective, JJP, CoordinationTransformer.Noun, PP, ADVP } });

            // delete IN -- go for main part of sentence; add FRAG

            NonTerminalInfo.Add(S,
                                new string[][] { new string[] { Left, PartsOfSpeech.To, VP, S, FRAG, SBAR, CoordinationTransformer.Adjective, JJP, UCP, CoordinationTransformer.Noun } });
            NonTerminalInfo.Add(SBAR,
                                new string[][]
            {
                new string[]
                {
                    Left, WHNP, WHPP, WHADVP, WHADJP, PartsOfSpeech.PrepositionOrSubordinateConjunction,
                    PartsOfSpeech.Determiner, S, SQ, SINV, SBAR, FRAG
                }
            });
            NonTerminalInfo.Add(SBARQ,
                                new string[][] { new string[] { Left, SQ, S, SINV, SBARQ, FRAG, SBAR } });
            // cdm: if you have 2 VP under an SINV, you should really take the 2nd as syntactic head, because the first is a topicalized VP complement of the second, but for now I didn't change this, since it didn't help parsing.  (If it were changed, it'd need to be also changed to the opposite in SemanticHeadFinder.)
            NonTerminalInfo.Add(SINV,
                                new string[][]
            {
                new string[]
                {
                    Left, PartsOfSpeech.Verb3rdPersSingPresent, PartsOfSpeech.VerbPastTense,
                    PartsOfSpeech.VerbNon3rdPersSingPresent, PartsOfSpeech.VerbBaseForm, PartsOfSpeech.Modal,
                    PartsOfSpeech.VerbPastParticiple, VP, S, SINV, CoordinationTransformer.Adjective, JJP, CoordinationTransformer.Noun
                }
            });
            NonTerminalInfo.Add(SQ,
                                new string[][]
            {
                new string[]
                {
                    Left, PartsOfSpeech.Verb3rdPersSingPresent, PartsOfSpeech.VerbPastTense,
                    PartsOfSpeech.VerbNon3rdPersSingPresent, PartsOfSpeech.VerbBaseForm, PartsOfSpeech.Modal, AUX,
                    AUXG, VP, SQ
                }
            });
            // TODO: Should maybe put S before SQ for tag questions. Check.
            NonTerminalInfo.Add(UCP, new string[][] { new string[] { Right } });
            // below is weird!! Make 2 lists, one for good and one for bad heads??
            // VP: added AUX and AUXG to work with Charniak tags
            NonTerminalInfo.Add(VP,
                                new string[][]
            {
                new string[]
                {
                    Left, PartsOfSpeech.To, PartsOfSpeech.VerbPastTense, PartsOfSpeech.VerbPastParticiple,
                    PartsOfSpeech.Modal, PartsOfSpeech.Verb3rdPersSingPresent, PartsOfSpeech.VerbBaseForm,
                    PartsOfSpeech.VerbGerundOrPresentParticiple, PartsOfSpeech.VerbNon3rdPersSingPresent, VP,
                    AUX, AUXG, CoordinationTransformer.Adjective, JJP,
                    PartsOfSpeech.NounSingularOrMass, PartsOfSpeech.NounPlural, PartsOfSpeech.Adjective, CoordinationTransformer.Noun,
                    PartsOfSpeech.ProperNounSingular
                }
            });
            NonTerminalInfo.Add(WHADJP,
                                new string[][]
            {
                new string[]
                {
                    Left, PartsOfSpeech.WhAdverb, WHADVP, PartsOfSpeech.Adverb, PartsOfSpeech.Adjective, CoordinationTransformer.Adjective,
                    JJP, PartsOfSpeech.AdjectiveComparative
                }
            });
            NonTerminalInfo.Add(WHADVP, new string[][] { new string[] { Right, PartsOfSpeech.WhAdverb, WHADVP } });
            NonTerminalInfo.Add(WHNP,
                                new string[][]
            {
                new string[]
                {
                    Left, PartsOfSpeech.WhDeterminer, PartsOfSpeech.WhPronoun, PartsOfSpeech.PossessiveWhPronoun,
                    WHADJP, WHPP, WHNP
                }
            });
            NonTerminalInfo.Add(WHPP,
                                new string[][]
            {
                new string[]
                {
                    Right, PartsOfSpeech.PrepositionOrSubordinateConjunction, PartsOfSpeech.To,
                    PartsOfSpeech.ForeignWord
                }
            });
            NonTerminalInfo.Add(X,
                                new string[][] { new string[] { Right, S, VP, CoordinationTransformer.Adjective, JJP, CoordinationTransformer.Noun, SBAR, PP, X } });
            NonTerminalInfo.Add(CoordinationTransformer.Noun,
                                new string[][]
            {
                new string[]
                {
                    RightDis, PartsOfSpeech.NounSingularOrMass, PartsOfSpeech.ProperNounSingular,
                    PartsOfSpeech.ProperNounPlural, PartsOfSpeech.NounPlural, NML, NX,
                    PartsOfSpeech.PossessiveEnding, PartsOfSpeech.AdjectiveComparative
                },
                new string[] { Left, CoordinationTransformer.Noun, PartsOfSpeech.PersonalPronoun },
                new string[] { RightDis, PartsOfSpeech.DollarSign, CoordinationTransformer.Adjective, JJP, PRN, PartsOfSpeech.ForeignWord },
                new string[] { Right, PartsOfSpeech.CardinalNumber },
                new string[]
                {
                    RightDis, PartsOfSpeech.Adjective, PartsOfSpeech.AdjectiveSuperlative, PartsOfSpeech.Adverb, QP,
                    PartsOfSpeech.Determiner, PartsOfSpeech.WhDeterminer, PartsOfSpeech.AdverbComparative, ADVP
                }
            });
            NonTerminalInfo.Add(NX, NonTerminalInfo[CoordinationTransformer.Noun]);
            // TODO: seems JJ should be head of NML in this case:
            // (NP (NML (JJ former) (NML Red Sox) (JJ great)) (NNP Luis) (NNP Tiant)),
            // (although JJ great is tagged wrong)
            NonTerminalInfo.Add(NML, NonTerminalInfo[CoordinationTransformer.Noun]);


            NonTerminalInfo.Add(POSSP, new string[][] { new string[] { Right, PartsOfSpeech.PossessiveEnding } });

            /* HJT: Adding the following to deal with oddly formed data in (for example) the Brown corpus */
            NonTerminalInfo.Add(ROOT, new string[][] { new string[] { Left, S, SQ, SINV, SBAR, FRAG } });
            // Just to handle trees which have TOP instead of ROOT at the root
            NonTerminalInfo.Add(TOP, NonTerminalInfo[ROOT]);
            NonTerminalInfo.Add(TYPO, new string[][]
            {
                new string[]
                {
                    Left, PartsOfSpeech.NounSingularOrMass, CoordinationTransformer.Noun, NML, PartsOfSpeech.ProperNounSingular,
                    PartsOfSpeech.ProperNounPlural, PartsOfSpeech.To,
                    PartsOfSpeech.VerbPastTense, PartsOfSpeech.VerbPastParticiple, PartsOfSpeech.Modal,
                    PartsOfSpeech.Verb3rdPersSingPresent, PartsOfSpeech.VerbBaseForm,
                    PartsOfSpeech.VerbGerundOrPresentParticiple, PartsOfSpeech.VerbNon3rdPersSingPresent, VP, CoordinationTransformer.Adjective,
                    JJP, FRAG
                }
            }); // for Brown (Roger)
            NonTerminalInfo.Add(ADV, new string[][]
            {
                new string[]
                {
                    Right, PartsOfSpeech.Adverb, PartsOfSpeech.AdverbComparative, PartsOfSpeech.AdverbSuperlative,
                    PartsOfSpeech.ForeignWord,
                    ADVP, PartsOfSpeech.To, PartsOfSpeech.CardinalNumber, PartsOfSpeech.AdjectiveComparative,
                    PartsOfSpeech.Adjective, PartsOfSpeech.PrepositionOrSubordinateConjunction, CoordinationTransformer.Noun, "NML",
                    PartsOfSpeech.AdjectiveSuperlative, PartsOfSpeech.NounSingularOrMass
                }
            });

            // SWBD
            NonTerminalInfo.Add(EDITED, new string[][] { new string[] { Left } });
            // crap rule for Switchboard (if don't delete EDITED nodes)
            // in sw2756, a PartsOfSpeech.VerbBaseForm. (copy "VP" to handle this problem, though should really fix it on reading)
            NonTerminalInfo.Add(PartsOfSpeech.VerbBaseForm,
                                new string[][]
            {
                new string[]
                {
                    Left, PartsOfSpeech.To, PartsOfSpeech.VerbPastTense, PartsOfSpeech.VerbPastParticiple,
                    PartsOfSpeech.Modal, PartsOfSpeech.Verb3rdPersSingPresent, PartsOfSpeech.VerbBaseForm,
                    PartsOfSpeech.VerbGerundOrPresentParticiple, PartsOfSpeech.VerbNon3rdPersSingPresent, VP,
                    AUX, AUXG, CoordinationTransformer.Adjective, JJP,
                    PartsOfSpeech.NounSingularOrMass, PartsOfSpeech.NounPlural, PartsOfSpeech.Adjective, CoordinationTransformer.Noun,
                    PartsOfSpeech.ProperNounSingular
                }
            });

            NonTerminalInfo.Add(META, new string[][] { new string[] { Left } });
            // rule for OntoNotes, but maybe should just be deleted in TreeReader??
            NonTerminalInfo.Add(XS, new string[][] { new string[] { Right, PartsOfSpeech.PrepositionOrSubordinateConjunction } });
            // rule for new structure in QP, introduced by Stanford in QPTreeTransformer
            // NonTerminalInfo.Add(null, new string[][] {{Left}});  // rule for OntoNotes from Michel, but it would be better to fix this in TreeReader or to use a default rule?

            // todo: Uncomment this line if we always want to take the leftmost if no head rule is defined for the mother category.
            // defaultRule = defaultLeftRule; // Don't exception, take leftmost if no rule defined for a certain parent category
        }
コード例 #10
0
        public ModCollinsHeadFinder(AbstractTreebankLanguagePack tlp) : base(tlp, tlp.PunctuationTags())
        {
            // avoid punctuation as head in readonly default rule

            NonTerminalInfo = new Dictionary<string, string[][]>();

            // This version from Collins' diss (1999: 236-238)
            // NNS, NN is actually sensible (money, etc.)!
            // QP early isn't; should prefer JJR NN RB
            // remove ADVP; it just shouldn't be there.
            // if two JJ, should take right one (e.g. South Korean)
            // NonTerminalInfo.Add(CoordinationTransformer.Adjective, new string[][]{{Left, PartsOfSpeech.NounPlural, PartsOfSpeech.NounSingularOrMass, PartsOfSpeech.DollarSign, "QP"}, {Right, PartsOfSpeech.Adjective}, {Left, PartsOfSpeech.VerbPastParticiple, PartsOfSpeech.VerbGerundOrPresentParticiple, CoordinationTransformer.Adjective, "JJP", PartsOfSpeech.AdjectiveComparative, CoordinationTransformer.Noun, PartsOfSpeech.AdjectiveSuperlative, PartsOfSpeech.Determiner, PartsOfSpeech.ForeignWord, PartsOfSpeech.AdverbComparative, PartsOfSpeech.AdverbSuperlative, "SBAR", PartsOfSpeech.Adverb}});
            NonTerminalInfo.Add(CoordinationTransformer.Adjective,
                new string[][]
                {
                    new string[] {Left, PartsOfSpeech.DollarSign},
                    new string[]
                    {
                        RightDis, PartsOfSpeech.NounPlural, PartsOfSpeech.NounSingularOrMass, PartsOfSpeech.Adjective,
                        QP, PartsOfSpeech.VerbPastParticiple, PartsOfSpeech.VerbGerundOrPresentParticiple
                    },
                    new string[] {Left, CoordinationTransformer.Adjective},
                    new string[]
                    {
                        RightDis, JJP, PartsOfSpeech.AdjectiveComparative, PartsOfSpeech.AdjectiveSuperlative,
                        PartsOfSpeech.Determiner, PartsOfSpeech.Adverb, PartsOfSpeech.AdverbComparative,
                        PartsOfSpeech.CardinalNumber, PartsOfSpeech.PrepositionOrSubordinateConjunction,
                        PartsOfSpeech.VerbPastTense
                    },
                    new string[] {Left, ADVP, CoordinationTransformer.Noun}
                });
            NonTerminalInfo.Add(JJP,
                new string[][]
                {
                    new string[]
                    {
                        Left, PartsOfSpeech.NounPlural, PartsOfSpeech.NounSingularOrMass, PartsOfSpeech.DollarSign,
                        QP, PartsOfSpeech.Adjective, PartsOfSpeech.VerbPastParticiple,
                        PartsOfSpeech.VerbGerundOrPresentParticiple, CoordinationTransformer.Adjective, JJP, PartsOfSpeech.AdjectiveComparative,
                        CoordinationTransformer.Noun, PartsOfSpeech.AdjectiveSuperlative, PartsOfSpeech.Determiner, PartsOfSpeech.ForeignWord,
                        PartsOfSpeech.AdverbComparative, PartsOfSpeech.AdverbSuperlative, SBAR, PartsOfSpeech.Adverb
                    }
                });
            // JJP is introduced for NML-like adjective phrases in Vadas' treebank; Chris wishes he hadn't used JJP which should be a POS-tag.
            // ADVP rule rewritten by Chris in Nov 2010 to be rightdis.  This is right! JJ.* is often head and rightmost.
            NonTerminalInfo.Add(ADVP, new string[][]
            {
                new string[] {Left, ADVP, PartsOfSpeech.PrepositionOrSubordinateConjunction},
                new string[]
                {
                    RightDis, PartsOfSpeech.Adverb, PartsOfSpeech.AdverbComparative, PartsOfSpeech.AdverbSuperlative,
                    PartsOfSpeech.Adjective, PartsOfSpeech.AdjectiveComparative, PartsOfSpeech.AdjectiveSuperlative
                },
                new string[]
                {
                    RightDis, PartsOfSpeech.Particle, PartsOfSpeech.Determiner, PartsOfSpeech.NounSingularOrMass,
                    PartsOfSpeech.CardinalNumber, CoordinationTransformer.Noun, PartsOfSpeech.VerbPastParticiple,
                    PartsOfSpeech.ProperNounSingular, PartsOfSpeech.CoordinatingConjunction, PartsOfSpeech.ForeignWord,
                    PartsOfSpeech.NounPlural, CoordinationTransformer.Adjective, NML
                }
            });
            NonTerminalInfo.Add(CONJP,
                new string[][]
                {
                    new string[]
                    {
                        Right, PartsOfSpeech.CoordinatingConjunction, PartsOfSpeech.Adverb,
                        PartsOfSpeech.PrepositionOrSubordinateConjunction
                    }
                });
            NonTerminalInfo.Add(FRAG, new string[][] {new string[] {Right}}); // crap
            NonTerminalInfo.Add(INTJ, new string[][] {new string[] {Left}});
            NonTerminalInfo.Add(LST,
                new string[][] {new string[] {Right, PartsOfSpeech.ListItemMarker, PartsOfSpeech.ColonSemiColon}});

            // NML is head in: (NAC-LOC (NML San Antonio) (, ,) (NNP Texas))
            // TODO: NNP should be head (rare cases, could be ignored):
            //   (NAC (NML New York) (NNP Court) (PP of Appeals))
            //   (NAC (NML Prudential Insurance) (NNP Co.) (PP Of America))
            // Chris: This could maybe still do with more thought, but NAC is rare.
            NonTerminalInfo.Add(NAC,
                new string[][]
                {
                    new string[]
                    {
                        Left, PartsOfSpeech.NounSingularOrMass, PartsOfSpeech.NounPlural, NML,
                        PartsOfSpeech.ProperNounSingular, PartsOfSpeech.ProperNounPlural, CoordinationTransformer.Noun, NAC,
                        PartsOfSpeech.ExistentialThere, PartsOfSpeech.DollarSign, PartsOfSpeech.CardinalNumber, QP,
                        PartsOfSpeech.PersonalPronoun, PartsOfSpeech.VerbGerundOrPresentParticiple,
                        PartsOfSpeech.Adjective,
                        PartsOfSpeech.AdjectiveSuperlative, PartsOfSpeech.AdjectiveComparative, CoordinationTransformer.Adjective, JJP,
                        PartsOfSpeech.ForeignWord
                    }
                });

            // Added JJ to PP head table, since it is a head in several cases, e.g.:
            // (PP (JJ next) (PP to them))
            // When you have both JJ and IN daughters, it is invariably "such as" -- not so clear which should be head, but leave as IN
            // should prefer JJ? (PP (JJ such) (IN as) (NP (NN crocidolite)))  Michel thinks we should make JJ a head of PP
            // added SYM as used in new treebanks for symbols filling role of IN
            // Changed PP search to left -- just what you want for conjunction (and consistent with SemanticHeadFinder)
            NonTerminalInfo.Add(PP,
                new string[][]
                {
                    new string[]
                    {
                        Right, PartsOfSpeech.PrepositionOrSubordinateConjunction, PartsOfSpeech.To,
                        PartsOfSpeech.VerbGerundOrPresentParticiple, PartsOfSpeech.VerbPastParticiple,
                        PartsOfSpeech.Particle, PartsOfSpeech.ForeignWord, PartsOfSpeech.Adjective, PartsOfSpeech.Symbol
                    },
                    new string[] {Left, PP}
                });

            NonTerminalInfo.Add(PRN,
                new string[][]
                {
                    new string[]
                    {
                        Left, VP, CoordinationTransformer.Noun, PP, SQ, S, SINV, SBAR, CoordinationTransformer.Adjective, JJP, ADVP, INTJ, WHNP,
                        NAC,
                        PartsOfSpeech.VerbNon3rdPersSingPresent, PartsOfSpeech.Adjective,
                        PartsOfSpeech.NounSingularOrMass, PartsOfSpeech.ProperNounSingular
                    }
                });
            NonTerminalInfo.Add(PRT, new string[][] {new string[] {Right, PartsOfSpeech.Particle}});
            // add '#' for pounds!!
            NonTerminalInfo.Add(QP,
                new string[][]
                {
                    new string[]
                    {
                        Left, PartsOfSpeech.DollarSign, PartsOfSpeech.PrepositionOrSubordinateConjunction,
                        PartsOfSpeech.NounPlural, PartsOfSpeech.NounSingularOrMass, PartsOfSpeech.Adjective,
                        PartsOfSpeech.CardinalNumber, PartsOfSpeech.Predeterminer, PartsOfSpeech.Determiner,
                        PartsOfSpeech.Adverb, NCD, QP, PartsOfSpeech.AdjectiveComparative,
                        PartsOfSpeech.AdjectiveSuperlative
                    }
                });
            // reduced relative clause can be any predicate VP, ADJP, NP, PP.
            // For choosing between NP and PP, really need to know which one is temporal and to choose the other.
            // It's not clear ADVP needs to be in the list at all (delete?).
            NonTerminalInfo.Add(RRC,
                new string[][] { new string[] { Left, RRC }, new string[] { Right, VP, CoordinationTransformer.Adjective, JJP, CoordinationTransformer.Noun, PP, ADVP } });

            // delete IN -- go for main part of sentence; add FRAG

            NonTerminalInfo.Add(S,
                new string[][] { new string[] { Left, PartsOfSpeech.To, VP, S, FRAG, SBAR, CoordinationTransformer.Adjective, JJP, UCP, CoordinationTransformer.Noun } });
            NonTerminalInfo.Add(SBAR,
                new string[][]
                {
                    new string[]
                    {
                        Left, WHNP, WHPP, WHADVP, WHADJP, PartsOfSpeech.PrepositionOrSubordinateConjunction,
                        PartsOfSpeech.Determiner, S, SQ, SINV, SBAR, FRAG
                    }
                });
            NonTerminalInfo.Add(SBARQ,
                new string[][] {new string[] {Left, SQ, S, SINV, SBARQ, FRAG, SBAR}});
            // cdm: if you have 2 VP under an SINV, you should really take the 2nd as syntactic head, because the first is a topicalized VP complement of the second, but for now I didn't change this, since it didn't help parsing.  (If it were changed, it'd need to be also changed to the opposite in SemanticHeadFinder.)
            NonTerminalInfo.Add(SINV,
                new string[][]
                {
                    new string[]
                    {
                        Left, PartsOfSpeech.Verb3rdPersSingPresent, PartsOfSpeech.VerbPastTense,
                        PartsOfSpeech.VerbNon3rdPersSingPresent, PartsOfSpeech.VerbBaseForm, PartsOfSpeech.Modal,
                        PartsOfSpeech.VerbPastParticiple, VP, S, SINV, CoordinationTransformer.Adjective, JJP, CoordinationTransformer.Noun
                    }
                });
            NonTerminalInfo.Add(SQ,
                new string[][]
                {
                    new string[]
                    {
                        Left, PartsOfSpeech.Verb3rdPersSingPresent, PartsOfSpeech.VerbPastTense,
                        PartsOfSpeech.VerbNon3rdPersSingPresent, PartsOfSpeech.VerbBaseForm, PartsOfSpeech.Modal, AUX,
                        AUXG, VP, SQ
                    }
                });
                // TODO: Should maybe put S before SQ for tag questions. Check.
            NonTerminalInfo.Add(UCP, new string[][] {new string[] {Right}});
            // below is weird!! Make 2 lists, one for good and one for bad heads??
            // VP: added AUX and AUXG to work with Charniak tags
            NonTerminalInfo.Add(VP,
                new string[][]
                {
                    new string[]
                    {
                        Left, PartsOfSpeech.To, PartsOfSpeech.VerbPastTense, PartsOfSpeech.VerbPastParticiple,
                        PartsOfSpeech.Modal, PartsOfSpeech.Verb3rdPersSingPresent, PartsOfSpeech.VerbBaseForm,
                        PartsOfSpeech.VerbGerundOrPresentParticiple, PartsOfSpeech.VerbNon3rdPersSingPresent, VP,
                        AUX, AUXG, CoordinationTransformer.Adjective, JJP,
                        PartsOfSpeech.NounSingularOrMass, PartsOfSpeech.NounPlural, PartsOfSpeech.Adjective, CoordinationTransformer.Noun,
                        PartsOfSpeech.ProperNounSingular
                    }
                });
            NonTerminalInfo.Add(WHADJP,
                new string[][]
                {
                    new string[]
                    {
                        Left, PartsOfSpeech.WhAdverb, WHADVP, PartsOfSpeech.Adverb, PartsOfSpeech.Adjective, CoordinationTransformer.Adjective,
                        JJP, PartsOfSpeech.AdjectiveComparative
                    }
                });
            NonTerminalInfo.Add(WHADVP, new string[][] {new string[] {Right, PartsOfSpeech.WhAdverb, WHADVP}});
            NonTerminalInfo.Add(WHNP,
                new string[][]
                {
                    new string[]
                    {
                        Left, PartsOfSpeech.WhDeterminer, PartsOfSpeech.WhPronoun, PartsOfSpeech.PossessiveWhPronoun,
                        WHADJP, WHPP, WHNP
                    }
                });
            NonTerminalInfo.Add(WHPP,
                new string[][]
                {
                    new string[]
                    {
                        Right, PartsOfSpeech.PrepositionOrSubordinateConjunction, PartsOfSpeech.To,
                        PartsOfSpeech.ForeignWord
                    }
                });
            NonTerminalInfo.Add(X,
                new string[][] { new string[] { Right, S, VP, CoordinationTransformer.Adjective, JJP, CoordinationTransformer.Noun, SBAR, PP, X } });
            NonTerminalInfo.Add(CoordinationTransformer.Noun,
                new string[][]
                {
                    new string[]
                    {
                        RightDis, PartsOfSpeech.NounSingularOrMass, PartsOfSpeech.ProperNounSingular,
                        PartsOfSpeech.ProperNounPlural, PartsOfSpeech.NounPlural, NML, NX,
                        PartsOfSpeech.PossessiveEnding, PartsOfSpeech.AdjectiveComparative
                    },
                    new string[] {Left, CoordinationTransformer.Noun, PartsOfSpeech.PersonalPronoun},
                    new string[] {RightDis, PartsOfSpeech.DollarSign, CoordinationTransformer.Adjective, JJP, PRN, PartsOfSpeech.ForeignWord},
                    new string[] {Right, PartsOfSpeech.CardinalNumber},
                    new string[]
                    {
                        RightDis, PartsOfSpeech.Adjective, PartsOfSpeech.AdjectiveSuperlative, PartsOfSpeech.Adverb, QP,
                        PartsOfSpeech.Determiner, PartsOfSpeech.WhDeterminer, PartsOfSpeech.AdverbComparative, ADVP
                    }
                });
            NonTerminalInfo.Add(NX, NonTerminalInfo[CoordinationTransformer.Noun]);
            // TODO: seems JJ should be head of NML in this case:
            // (NP (NML (JJ former) (NML Red Sox) (JJ great)) (NNP Luis) (NNP Tiant)),
            // (although JJ great is tagged wrong)
            NonTerminalInfo.Add(NML, NonTerminalInfo[CoordinationTransformer.Noun]);


            NonTerminalInfo.Add(POSSP, new string[][] {new string[] {Right, PartsOfSpeech.PossessiveEnding}});

            /* HJT: Adding the following to deal with oddly formed data in (for example) the Brown corpus */
            NonTerminalInfo.Add(ROOT, new string[][] {new string[] {Left, S, SQ, SINV, SBAR, FRAG}});
            // Just to handle trees which have TOP instead of ROOT at the root
            NonTerminalInfo.Add(TOP, NonTerminalInfo[ROOT]);
            NonTerminalInfo.Add(TYPO, new string[][]
            {
                new string[]
                {
                    Left, PartsOfSpeech.NounSingularOrMass, CoordinationTransformer.Noun, NML, PartsOfSpeech.ProperNounSingular,
                    PartsOfSpeech.ProperNounPlural, PartsOfSpeech.To,
                    PartsOfSpeech.VerbPastTense, PartsOfSpeech.VerbPastParticiple, PartsOfSpeech.Modal,
                    PartsOfSpeech.Verb3rdPersSingPresent, PartsOfSpeech.VerbBaseForm,
                    PartsOfSpeech.VerbGerundOrPresentParticiple, PartsOfSpeech.VerbNon3rdPersSingPresent, VP, CoordinationTransformer.Adjective,
                    JJP, FRAG
                }
            }); // for Brown (Roger)
            NonTerminalInfo.Add(ADV, new string[][]
            {
                new string[]
                {
                    Right, PartsOfSpeech.Adverb, PartsOfSpeech.AdverbComparative, PartsOfSpeech.AdverbSuperlative,
                    PartsOfSpeech.ForeignWord,
                    ADVP, PartsOfSpeech.To, PartsOfSpeech.CardinalNumber, PartsOfSpeech.AdjectiveComparative,
                    PartsOfSpeech.Adjective, PartsOfSpeech.PrepositionOrSubordinateConjunction, CoordinationTransformer.Noun, "NML",
                    PartsOfSpeech.AdjectiveSuperlative, PartsOfSpeech.NounSingularOrMass
                }
            });

            // SWBD
            NonTerminalInfo.Add(EDITED, new string[][] {new string[] {Left}});
                // crap rule for Switchboard (if don't delete EDITED nodes)
            // in sw2756, a PartsOfSpeech.VerbBaseForm. (copy "VP" to handle this problem, though should really fix it on reading)
            NonTerminalInfo.Add(PartsOfSpeech.VerbBaseForm,
                new string[][]
                {
                    new string[]
                    {
                        Left, PartsOfSpeech.To, PartsOfSpeech.VerbPastTense, PartsOfSpeech.VerbPastParticiple,
                        PartsOfSpeech.Modal, PartsOfSpeech.Verb3rdPersSingPresent, PartsOfSpeech.VerbBaseForm,
                        PartsOfSpeech.VerbGerundOrPresentParticiple, PartsOfSpeech.VerbNon3rdPersSingPresent, VP,
                        AUX, AUXG, CoordinationTransformer.Adjective, JJP,
                        PartsOfSpeech.NounSingularOrMass, PartsOfSpeech.NounPlural, PartsOfSpeech.Adjective, CoordinationTransformer.Noun,
                        PartsOfSpeech.ProperNounSingular
                    }
                });

            NonTerminalInfo.Add(META, new string[][] {new string[] {Left}});
                // rule for OntoNotes, but maybe should just be deleted in TreeReader??
            NonTerminalInfo.Add(XS, new string[][] {new string[] {Right, PartsOfSpeech.PrepositionOrSubordinateConjunction}});
                // rule for new structure in QP, introduced by Stanford in QPTreeTransformer
            // NonTerminalInfo.Add(null, new string[][] {{Left}});  // rule for OntoNotes from Michel, but it would be better to fix this in TreeReader or to use a default rule?

            // todo: Uncomment this line if we always want to take the leftmost if no head rule is defined for the mother category.
            // defaultRule = defaultLeftRule; // Don't exception, take leftmost if no rule defined for a certain parent category
        }
コード例 #11
0
ファイル: CollinsHeadFinder.cs プロジェクト: gblosser/OpenNlp
        public CollinsHeadFinder(AbstractTreebankLanguagePack tlp, string[] categoriesToAvoid)
            : base(tlp, categoriesToAvoid)
        {

            NonTerminalInfo = new Dictionary<string, string[][]>();
            // This version from Collins' diss (1999: 236-238)
            NonTerminalInfo.Add(CoordinationTransformer.Adjective,
                new string[][]
                {
                    new string[]
                    {
                        Left, PartsOfSpeech.NounPlural, QP, PartsOfSpeech.NounSingularOrMass,
                        PartsOfSpeech.DollarSign, ADVP, PartsOfSpeech.Adjective, PartsOfSpeech.VerbPastParticiple,
                        PartsOfSpeech.VerbGerundOrPresentParticiple, CoordinationTransformer.Adjective, PartsOfSpeech.AdjectiveComparative, CoordinationTransformer.Noun,
                        PartsOfSpeech.AdjectiveSuperlative, PartsOfSpeech.Determiner, PartsOfSpeech.ForeignWord,
                        PartsOfSpeech.AdverbComparative, PartsOfSpeech.AdverbSuperlative, SBAR, PartsOfSpeech.Adverb
                    }
                });
            NonTerminalInfo.Add(ADVP,
                new string[][]
                {
                    new string[]
                    {
                        Right, PartsOfSpeech.Adverb, PartsOfSpeech.AdverbComparative, PartsOfSpeech.AdverbSuperlative,
                        PartsOfSpeech.ForeignWord, ADVP, PartsOfSpeech.To, PartsOfSpeech.CardinalNumber,
                        PartsOfSpeech.AdjectiveComparative, PartsOfSpeech.Adjective,
                        PartsOfSpeech.PrepositionOrSubordinateConjunction, CoordinationTransformer.Noun, PartsOfSpeech.AdjectiveSuperlative,
                        PartsOfSpeech.NounSingularOrMass
                    }
                });
            NonTerminalInfo.Add(CONJP,
                new string[][]
                {
                    new string[]
                    {
                        Right, PartsOfSpeech.CoordinatingConjunction, PartsOfSpeech.Adverb,
                        PartsOfSpeech.PrepositionOrSubordinateConjunction
                    }
                });
            NonTerminalInfo.Add(FRAG, new string[][] {new string[] {Right}}); // crap
            NonTerminalInfo.Add(INTJ, new string[][] {new string[] {Left}});
            NonTerminalInfo.Add(LST,
                new string[][] {new string[] {Right, PartsOfSpeech.ListItemMarker, PartsOfSpeech.ColonSemiColon}});
            NonTerminalInfo.Add(NAC,
                new string[][]
                {
                    new string[]
                    {
                        Left, PartsOfSpeech.NounSingularOrMass, PartsOfSpeech.NounPlural,
                        PartsOfSpeech.ProperNounSingular, PartsOfSpeech.ProperNounPlural, CoordinationTransformer.Noun, NAC,
                        PartsOfSpeech.ExistentialThere, PartsOfSpeech.DollarSign, PartsOfSpeech.CardinalNumber, QP,
                        PartsOfSpeech.PersonalPronoun, PartsOfSpeech.VerbGerundOrPresentParticiple,
                        PartsOfSpeech.Adjective, PartsOfSpeech.AdjectiveSuperlative,
                        PartsOfSpeech.AdjectiveComparative, CoordinationTransformer.Adjective, PartsOfSpeech.ForeignWord
                    }
                });
            NonTerminalInfo.Add(NX, new string[][] {new string[] {Left}}); // crap
            NonTerminalInfo.Add(PP,
                new string[][]
                {
                    new string[]
                    {
                        Right, PartsOfSpeech.PrepositionOrSubordinateConjunction, PartsOfSpeech.To,
                        PartsOfSpeech.VerbGerundOrPresentParticiple, PartsOfSpeech.VerbPastParticiple,
                        PartsOfSpeech.Particle, PartsOfSpeech.ForeignWord
                    }
                });
            // should prefer JJ? (PP (JJ such) (IN as) (NP (NN crocidolite)))
            NonTerminalInfo.Add(PRN, new string[][] {new string[] {Left}});
            NonTerminalInfo.Add(PRT, new string[][] {new string[] {Right, PartsOfSpeech.Particle}});
            NonTerminalInfo.Add(QP,
                new string[][]
                {
                    new string[]
                    {
                        Left, PartsOfSpeech.DollarSign, PartsOfSpeech.PrepositionOrSubordinateConjunction,
                        PartsOfSpeech.NounPlural, PartsOfSpeech.NounSingularOrMass, PartsOfSpeech.Adjective,
                        PartsOfSpeech.Adverb, PartsOfSpeech.Determiner, PartsOfSpeech.CardinalNumber, NCD, QP,
                        PartsOfSpeech.AdjectiveComparative, PartsOfSpeech.AdjectiveSuperlative
                    }
                });
            NonTerminalInfo.Add(RRC,
                new string[][]
                {
                    new string[]
                    {
                        Right, AbstractCollinsHeadFinder.VerbPhrase, CoordinationTransformer.Noun, ADVP,
                        CoordinationTransformer.Adjective, PP
                    }
                });
            NonTerminalInfo.Add(S,
                new string[][]
                {
                    new string[]
                    {
                        Left, PartsOfSpeech.To, PartsOfSpeech.PrepositionOrSubordinateConjunction, AbstractCollinsHeadFinder.VerbPhrase, S, SBAR,
                        CoordinationTransformer.Adjective, UCP, CoordinationTransformer.Noun
                    }
                });
            NonTerminalInfo.Add(SBAR,
                new string[][]
                {
                    new string[]
                    {
                        Left, WHNP, WHPP, WHADVP, WHADJP, PartsOfSpeech.PrepositionOrSubordinateConjunction,
                        PartsOfSpeech.Determiner, S, SQ, SINV, SBAR, FRAG
                    }
                });
            NonTerminalInfo.Add(SBARQ, new string[][] {new string[] {Left, SQ, S, SINV, SBARQ, FRAG}});
            NonTerminalInfo.Add(SINV,
                new string[][]
                {
                    new string[]
                    {
                        Left, PartsOfSpeech.Verb3rdPersSingPresent, PartsOfSpeech.VerbPastTense,
                        PartsOfSpeech.VerbNon3rdPersSingPresent, PartsOfSpeech.VerbBaseForm, PartsOfSpeech.Modal, AbstractCollinsHeadFinder.VerbPhrase,
                        S, SINV, CoordinationTransformer.Adjective, CoordinationTransformer.Noun
                    }
                });
            NonTerminalInfo.Add(SQ,
                new string[][]
                {
                    new string[]
                    {
                        Left, PartsOfSpeech.Verb3rdPersSingPresent, PartsOfSpeech.VerbPastTense,
                        PartsOfSpeech.VerbNon3rdPersSingPresent, PartsOfSpeech.VerbBaseForm, PartsOfSpeech.Modal, AbstractCollinsHeadFinder.VerbPhrase,
                        SQ
                    }
                });
            NonTerminalInfo.Add("UCP", new string[][] {new string[] {Right}});
            NonTerminalInfo.Add(AbstractCollinsHeadFinder.VerbPhrase,
                new string[][]
                {
                    new string[]
                    {
                        Left, PartsOfSpeech.To, PartsOfSpeech.VerbPastTense, PartsOfSpeech.VerbPastParticiple,
                        PartsOfSpeech.Modal, PartsOfSpeech.Verb3rdPersSingPresent, PartsOfSpeech.VerbBaseForm,
                        PartsOfSpeech.VerbGerundOrPresentParticiple, PartsOfSpeech.VerbNon3rdPersSingPresent, AUX,
                        AUXG, AbstractCollinsHeadFinder.VerbPhrase, CoordinationTransformer.Adjective, PartsOfSpeech.NounSingularOrMass,
                        PartsOfSpeech.NounPlural, CoordinationTransformer.Noun
                    }
                });
            NonTerminalInfo.Add(WHADJP,
                new string[][]
                {
                    new string[]
                    {
                        Left, PartsOfSpeech.CoordinatingConjunction, PartsOfSpeech.WhAdverb, PartsOfSpeech.Adjective,
                        CoordinationTransformer.Adjective
                    }
                });
            NonTerminalInfo.Add(WHADVP, new string[][] {new string[] {Right, PartsOfSpeech.CoordinatingConjunction, PartsOfSpeech.WhAdverb}});
            NonTerminalInfo.Add(WHNP,
                new string[][]
                {
                    new string[]
                    {
                        Left, PartsOfSpeech.WhDeterminer, PartsOfSpeech.WhPronoun, PartsOfSpeech.PossessiveWhPronoun,
                        WHADJP, WHPP, WHNP
                    }
                });
            NonTerminalInfo.Add(WHPP,
                new string[][]
                {
                    new string[]
                    {
                        Right, PartsOfSpeech.PrepositionOrSubordinateConjunction, PartsOfSpeech.To,
                        PartsOfSpeech.ForeignWord
                    }
                });
            NonTerminalInfo.Add(X, new string[][] {new string[] {Right}}); // crap rule
            NonTerminalInfo.Add(CoordinationTransformer.Noun,
                new string[][]
                {
                    new string[]
                    {
                        RightDis, PartsOfSpeech.NounSingularOrMass, PartsOfSpeech.ProperNounSingular,
                        PartsOfSpeech.ProperNounPlural, PartsOfSpeech.NounPlural, NX, PartsOfSpeech.PossessiveEnding,
                        PartsOfSpeech.AdjectiveComparative
                    },
                    new string[] {Left, CoordinationTransformer.Noun},
                    new string[] {RightDis, PartsOfSpeech.DollarSign, CoordinationTransformer.Adjective, PRN},
                    new string[] {Right, PartsOfSpeech.CardinalNumber},
                    new string[]
                    {
                        RightDis, PartsOfSpeech.Adjective, PartsOfSpeech.AdjectiveSuperlative, PartsOfSpeech.Adverb, QP
                    }
                });
            NonTerminalInfo.Add(TYPO, new string[][] {new string[] {Left}}); // another crap rule, for Brown (Roger)
            NonTerminalInfo.Add(EDITED, new string[][] {new string[] {Left}});
                // crap rule for Switchboard (if don't delete EDITED nodes)
            NonTerminalInfo.Add(XS, new string[][] {new string[] {Right, PartsOfSpeech.PrepositionOrSubordinateConjunction}}); // rule for new structure in QP
        }
コード例 #12
0
ファイル: CollinsHeadFinder.cs プロジェクト: gblosser/OpenNlp
 /// <summary>
 /// This constructor provides the traditional behavior, where there is
 /// no special avoidance of punctuation categories.
 /// </summary>
 /// <param name="tlp">TreebankLanguagePack used for basic category function</param>
 public CollinsHeadFinder(AbstractTreebankLanguagePack tlp) : this(tlp, EmptyStringArray)
 {
 }
コード例 #13
0
 public BasicCategoryStringFunction(AbstractTreebankLanguagePack tlp)
 {
     this.tlp = tlp;
 }
コード例 #14
0
 public CollinsHeadFinder(AbstractTreebankLanguagePack tlp, string[] categoriesToAvoid)
     : base(tlp, categoriesToAvoid)
 {
     NonTerminalInfo = new Dictionary <string, string[][]>();
     // This version from Collins' diss (1999: 236-238)
     NonTerminalInfo.Add(CoordinationTransformer.Adjective,
                         new string[][]
     {
         new string[]
         {
             Left, PartsOfSpeech.NounPlural, QP, PartsOfSpeech.NounSingularOrMass,
             PartsOfSpeech.DollarSign, ADVP, PartsOfSpeech.Adjective, PartsOfSpeech.VerbPastParticiple,
             PartsOfSpeech.VerbGerundOrPresentParticiple, CoordinationTransformer.Adjective, PartsOfSpeech.AdjectiveComparative, CoordinationTransformer.Noun,
             PartsOfSpeech.AdjectiveSuperlative, PartsOfSpeech.Determiner, PartsOfSpeech.ForeignWord,
             PartsOfSpeech.AdverbComparative, PartsOfSpeech.AdverbSuperlative, SBAR, PartsOfSpeech.Adverb
         }
     });
     NonTerminalInfo.Add(ADVP,
                         new string[][]
     {
         new string[]
         {
             Right, PartsOfSpeech.Adverb, PartsOfSpeech.AdverbComparative, PartsOfSpeech.AdverbSuperlative,
             PartsOfSpeech.ForeignWord, ADVP, PartsOfSpeech.To, PartsOfSpeech.CardinalNumber,
             PartsOfSpeech.AdjectiveComparative, PartsOfSpeech.Adjective,
             PartsOfSpeech.PrepositionOrSubordinateConjunction, CoordinationTransformer.Noun, PartsOfSpeech.AdjectiveSuperlative,
             PartsOfSpeech.NounSingularOrMass
         }
     });
     NonTerminalInfo.Add(CONJP,
                         new string[][]
     {
         new string[]
         {
             Right, PartsOfSpeech.CoordinatingConjunction, PartsOfSpeech.Adverb,
             PartsOfSpeech.PrepositionOrSubordinateConjunction
         }
     });
     NonTerminalInfo.Add(FRAG, new string[][] { new string[] { Right } }); // crap
     NonTerminalInfo.Add(INTJ, new string[][] { new string[] { Left } });
     NonTerminalInfo.Add(LST,
                         new string[][] { new string[] { Right, PartsOfSpeech.ListItemMarker, PartsOfSpeech.ColonSemiColon } });
     NonTerminalInfo.Add(NAC,
                         new string[][]
     {
         new string[]
         {
             Left, PartsOfSpeech.NounSingularOrMass, PartsOfSpeech.NounPlural,
             PartsOfSpeech.ProperNounSingular, PartsOfSpeech.ProperNounPlural, CoordinationTransformer.Noun, NAC,
             PartsOfSpeech.ExistentialThere, PartsOfSpeech.DollarSign, PartsOfSpeech.CardinalNumber, QP,
             PartsOfSpeech.PersonalPronoun, PartsOfSpeech.VerbGerundOrPresentParticiple,
             PartsOfSpeech.Adjective, PartsOfSpeech.AdjectiveSuperlative,
             PartsOfSpeech.AdjectiveComparative, CoordinationTransformer.Adjective, PartsOfSpeech.ForeignWord
         }
     });
     NonTerminalInfo.Add(NX, new string[][] { new string[] { Left } }); // crap
     NonTerminalInfo.Add(PP,
                         new string[][]
     {
         new string[]
         {
             Right, PartsOfSpeech.PrepositionOrSubordinateConjunction, PartsOfSpeech.To,
             PartsOfSpeech.VerbGerundOrPresentParticiple, PartsOfSpeech.VerbPastParticiple,
             PartsOfSpeech.Particle, PartsOfSpeech.ForeignWord
         }
     });
     // should prefer JJ? (PP (JJ such) (IN as) (NP (NN crocidolite)))
     NonTerminalInfo.Add(PRN, new string[][] { new string[] { Left } });
     NonTerminalInfo.Add(PRT, new string[][] { new string[] { Right, PartsOfSpeech.Particle } });
     NonTerminalInfo.Add(QP,
                         new string[][]
     {
         new string[]
         {
             Left, PartsOfSpeech.DollarSign, PartsOfSpeech.PrepositionOrSubordinateConjunction,
             PartsOfSpeech.NounPlural, PartsOfSpeech.NounSingularOrMass, PartsOfSpeech.Adjective,
             PartsOfSpeech.Adverb, PartsOfSpeech.Determiner, PartsOfSpeech.CardinalNumber, NCD, QP,
             PartsOfSpeech.AdjectiveComparative, PartsOfSpeech.AdjectiveSuperlative
         }
     });
     NonTerminalInfo.Add(RRC,
                         new string[][]
     {
         new string[]
         {
             Right, AbstractCollinsHeadFinder.VerbPhrase, CoordinationTransformer.Noun, ADVP,
             CoordinationTransformer.Adjective, PP
         }
     });
     NonTerminalInfo.Add(S,
                         new string[][]
     {
         new string[]
         {
             Left, PartsOfSpeech.To, PartsOfSpeech.PrepositionOrSubordinateConjunction, AbstractCollinsHeadFinder.VerbPhrase, S, SBAR,
             CoordinationTransformer.Adjective, UCP, CoordinationTransformer.Noun
         }
     });
     NonTerminalInfo.Add(SBAR,
                         new string[][]
     {
         new string[]
         {
             Left, WHNP, WHPP, WHADVP, WHADJP, PartsOfSpeech.PrepositionOrSubordinateConjunction,
             PartsOfSpeech.Determiner, S, SQ, SINV, SBAR, FRAG
         }
     });
     NonTerminalInfo.Add(SBARQ, new string[][] { new string[] { Left, SQ, S, SINV, SBARQ, FRAG } });
     NonTerminalInfo.Add(SINV,
                         new string[][]
     {
         new string[]
         {
             Left, PartsOfSpeech.Verb3rdPersSingPresent, PartsOfSpeech.VerbPastTense,
             PartsOfSpeech.VerbNon3rdPersSingPresent, PartsOfSpeech.VerbBaseForm, PartsOfSpeech.Modal, AbstractCollinsHeadFinder.VerbPhrase,
             S, SINV, CoordinationTransformer.Adjective, CoordinationTransformer.Noun
         }
     });
     NonTerminalInfo.Add(SQ,
                         new string[][]
     {
         new string[]
         {
             Left, PartsOfSpeech.Verb3rdPersSingPresent, PartsOfSpeech.VerbPastTense,
             PartsOfSpeech.VerbNon3rdPersSingPresent, PartsOfSpeech.VerbBaseForm, PartsOfSpeech.Modal, AbstractCollinsHeadFinder.VerbPhrase,
             SQ
         }
     });
     NonTerminalInfo.Add("UCP", new string[][] { new string[] { Right } });
     NonTerminalInfo.Add(AbstractCollinsHeadFinder.VerbPhrase,
                         new string[][]
     {
         new string[]
         {
             Left, PartsOfSpeech.To, PartsOfSpeech.VerbPastTense, PartsOfSpeech.VerbPastParticiple,
             PartsOfSpeech.Modal, PartsOfSpeech.Verb3rdPersSingPresent, PartsOfSpeech.VerbBaseForm,
             PartsOfSpeech.VerbGerundOrPresentParticiple, PartsOfSpeech.VerbNon3rdPersSingPresent, AUX,
             AUXG, AbstractCollinsHeadFinder.VerbPhrase, CoordinationTransformer.Adjective, PartsOfSpeech.NounSingularOrMass,
             PartsOfSpeech.NounPlural, CoordinationTransformer.Noun
         }
     });
     NonTerminalInfo.Add(WHADJP,
                         new string[][]
     {
         new string[]
         {
             Left, PartsOfSpeech.CoordinatingConjunction, PartsOfSpeech.WhAdverb, PartsOfSpeech.Adjective,
             CoordinationTransformer.Adjective
         }
     });
     NonTerminalInfo.Add(WHADVP, new string[][] { new string[] { Right, PartsOfSpeech.CoordinatingConjunction, PartsOfSpeech.WhAdverb } });
     NonTerminalInfo.Add(WHNP,
                         new string[][]
     {
         new string[]
         {
             Left, PartsOfSpeech.WhDeterminer, PartsOfSpeech.WhPronoun, PartsOfSpeech.PossessiveWhPronoun,
             WHADJP, WHPP, WHNP
         }
     });
     NonTerminalInfo.Add(WHPP,
                         new string[][]
     {
         new string[]
         {
             Right, PartsOfSpeech.PrepositionOrSubordinateConjunction, PartsOfSpeech.To,
             PartsOfSpeech.ForeignWord
         }
     });
     NonTerminalInfo.Add(X, new string[][] { new string[] { Right } }); // crap rule
     NonTerminalInfo.Add(CoordinationTransformer.Noun,
                         new string[][]
     {
         new string[]
         {
             RightDis, PartsOfSpeech.NounSingularOrMass, PartsOfSpeech.ProperNounSingular,
             PartsOfSpeech.ProperNounPlural, PartsOfSpeech.NounPlural, NX, PartsOfSpeech.PossessiveEnding,
             PartsOfSpeech.AdjectiveComparative
         },
         new string[] { Left, CoordinationTransformer.Noun },
         new string[] { RightDis, PartsOfSpeech.DollarSign, CoordinationTransformer.Adjective, PRN },
         new string[] { Right, PartsOfSpeech.CardinalNumber },
         new string[]
         {
             RightDis, PartsOfSpeech.Adjective, PartsOfSpeech.AdjectiveSuperlative, PartsOfSpeech.Adverb, QP
         }
     });
     NonTerminalInfo.Add(TYPO, new string[][] { new string[] { Left } }); // another crap rule, for Brown (Roger)
     NonTerminalInfo.Add(EDITED, new string[][] { new string[] { Left } });
     // crap rule for Switchboard (if don't delete EDITED nodes)
     NonTerminalInfo.Add(XS, new string[][] { new string[] { Right, PartsOfSpeech.PrepositionOrSubordinateConjunction } }); // rule for new structure in QP
 }
コード例 #15
0
 public DependencyTreeTransformer()
 {
     Tlp = new PennTreebankLanguagePack();
 }
コード例 #16
0
        public ModCollinsHeadFinder(AbstractTreebankLanguagePack tlp) : base(tlp, tlp.PunctuationTags())
        {
            // avoid punctuation as head in readonly default rule

            nonTerminalInfo = new Dictionary <string, string[][]>();

            // This version from Collins' diss (1999: 236-238)
            // NNS, NN is actually sensible (money, etc.)!
            // QP early isn't; should prefer JJR NN RB
            // remove ADVP; it just shouldn't be there.
            // if two JJ, should take right one (e.g. South Korean)
            // nonTerminalInfo.Add("ADJP", new string[][]{{"left", "NNS", "NN", "$", "QP"}, {"right", "JJ"}, {"left", "VBN", "VBG", "ADJP", "JJP", "JJR", "NP", "JJS", "DT", "FW", "RBR", "RBS", "SBAR", "RB"}});
            nonTerminalInfo.Add("ADJP",
                                new string[][]
            {
                new string[] { "left", "$" }, new string[] { "rightdis", "NNS", "NN", "JJ", "QP", "VBN", "VBG" },
                new string[] { "left", "ADJP" },
                new string[] { "rightdis", "JJP", "JJR", "JJS", "DT", "RB", "RBR", "CD", "IN", "VBD" },
                new string[] { "left", "ADVP", "NP" }
            });
            nonTerminalInfo.Add("JJP",
                                new string[][]
            {
                new string[]
                {
                    "left", "NNS", "NN", "$", "QP", "JJ", "VBN", "VBG", "ADJP", "JJP", "JJR", "NP", "JJS", "DT", "FW",
                    "RBR", "RBS", "SBAR", "RB"
                }
            });
            // JJP is introduced for NML-like adjective phrases in Vadas' treebank; Chris wishes he hadn't used JJP which should be a POS-tag.
            // ADVP rule rewritten by Chris in Nov 2010 to be rightdis.  This is right! JJ.* is often head and rightmost.
            nonTerminalInfo.Add("ADVP", new string[][]
            {
                new string[] { "left", "ADVP", "IN" },
                new string[] { "rightdis", "RB", "RBR", "RBS", "JJ", "JJR", "JJS" },
                new string[] { "rightdis", "RP", "DT", "NN", "CD", "NP", "VBN", "NNP", "CC", "FW", "NNS", "ADJP", "NML" }
            });
            nonTerminalInfo.Add("CONJP", new string[][] { new string[] { "right", "CC", "RB", "IN" } });
            nonTerminalInfo.Add("FRAG", new string[][] { new string[] { "right" } }); // crap
            nonTerminalInfo.Add("INTJ", new string[][] { new string[] { "left" } });
            nonTerminalInfo.Add("LST", new string[][] { new string[] { "right", "LS", ":" } });

            // NML is head in: (NAC-LOC (NML San Antonio) (, ,) (NNP Texas))
            // TODO: NNP should be head (rare cases, could be ignored):
            //   (NAC (NML New York) (NNP Court) (PP of Appeals))
            //   (NAC (NML Prudential Insurance) (NNP Co.) (PP Of America))
            // Chris: This could maybe still do with more thought, but NAC is rare.
            nonTerminalInfo.Add("NAC",
                                new string[][]
            {
                new string[]
                {
                    "left", "NN", "NNS", "NML", "NNP", "NNPS", "NP", "NAC", "EX", "$", "CD", "QP", "PRP", "VBG", "JJ",
                    "JJS", "JJR", "ADJP", "JJP", "FW"
                }
            });

            // Added JJ to PP head table, since it is a head in several cases, e.g.:
            // (PP (JJ next) (PP to them))
            // When you have both JJ and IN daughters, it is invariably "such as" -- not so clear which should be head, but leave as IN
            // should prefer JJ? (PP (JJ such) (IN as) (NP (NN crocidolite)))  Michel thinks we should make JJ a head of PP
            // added SYM as used in new treebanks for symbols filling role of IN
            // Changed PP search to left -- just what you want for conjunction (and consistent with SemanticHeadFinder)
            nonTerminalInfo.Add("PP",
                                new string[][]
                                { new string[] { "right", "IN", "TO", "VBG", "VBN", "RP", "FW", "JJ", "SYM" }, new string[] { "left", "PP" } });

            nonTerminalInfo.Add("PRN",
                                new string[][]
            {
                new string[]
                {
                    "left", "VP", "NP", "PP", "SQ", "S", "SINV", "SBAR", "ADJP", "JJP", "ADVP", "INTJ", "WHNP", "NAC",
                    "VBP", "JJ", "NN", "NNP"
                }
            });
            nonTerminalInfo.Add("PRT", new string[][] { new string[] { "right", "RP" } });
            // add '#' for pounds!!
            nonTerminalInfo.Add("QP",
                                new string[][]
            {
                new string[] { "left", "$", "IN", "NNS", "NN", "JJ", "CD", "PDT", "DT", "RB", "NCD", "QP", "JJR", "JJS" }
            });
            // reduced relative clause can be any predicate VP, ADJP, NP, PP.
            // For choosing between NP and PP, really need to know which one is temporal and to choose the other.
            // It's not clear ADVP needs to be in the list at all (delete?).
            nonTerminalInfo.Add("RRC",
                                new string[][]
                                { new string[] { "left", "RRC" }, new string[] { "right", "VP", "ADJP", "JJP", "NP", "PP", "ADVP" } });

            // delete IN -- go for main part of sentence; add FRAG

            nonTerminalInfo.Add("S",
                                new string[][] { new string[] { "left", "TO", "VP", "S", "FRAG", "SBAR", "ADJP", "JJP", "UCP", "NP" } });
            nonTerminalInfo.Add("SBAR",
                                new string[][]
            {
                new string[]
                { "left", "WHNP", "WHPP", "WHADVP", "WHADJP", "IN", "DT", "S", "SQ", "SINV", "SBAR", "FRAG" }
            });
            nonTerminalInfo.Add("SBARQ",
                                new string[][] { new string[] { "left", "SQ", "S", "SINV", "SBARQ", "FRAG", "SBAR" } });
            // cdm: if you have 2 VP under an SINV, you should really take the 2nd as syntactic head, because the first is a topicalized VP complement of the second, but for now I didn't change this, since it didn't help parsing.  (If it were changed, it'd need to be also changed to the opposite in SemanticHeadFinder.)
            nonTerminalInfo.Add("SINV",
                                new string[][]
                                { new string[] { "left", "VBZ", "VBD", "VBP", "VB", "MD", "VBN", "VP", "S", "SINV", "ADJP", "JJP", "NP" } });
            nonTerminalInfo.Add("SQ",
                                new string[][] { new string[] { "left", "VBZ", "VBD", "VBP", "VB", "MD", "AUX", "AUXG", "VP", "SQ" } });
            // TODO: Should maybe put S before SQ for tag questions. Check.
            nonTerminalInfo.Add("UCP", new string[][] { new string[] { "right" } });
            // below is weird!! Make 2 lists, one for good and one for bad heads??
            // VP: added AUX and AUXG to work with Charniak tags
            nonTerminalInfo.Add("VP",
                                new string[][]
            {
                new string[]
                {
                    "left", "TO", "VBD", "VBN", "MD", "VBZ", "VB", "VBG", "VBP", "VP", "AUX", "AUXG", "ADJP", "JJP",
                    "NN", "NNS", "JJ", "NP", "NNP"
                }
            });
            nonTerminalInfo.Add("WHADJP",
                                new string[][] { new string[] { "left", "WRB", "WHADVP", "RB", "JJ", "ADJP", "JJP", "JJR" } });
            nonTerminalInfo.Add("WHADVP", new string[][] { new string[] { "right", "WRB", "WHADVP" } });
            nonTerminalInfo.Add("WHNP",
                                new string[][] { new string[] { "left", "WDT", "WP", "WP$", "WHADJP", "WHPP", "WHNP" } });
            nonTerminalInfo.Add("WHPP", new string[][] { new string[] { "right", "IN", "TO", "FW" } });
            nonTerminalInfo.Add("X",
                                new string[][] { new string[] { "right", "S", "VP", "ADJP", "JJP", "NP", "SBAR", "PP", "X" } });
            nonTerminalInfo.Add("NP",
                                new string[][]
            {
                new string[] { "rightdis", "NN", "NNP", "NNPS", "NNS", "NML", "NX", "POS", "JJR" },
                new string[] { "left", "NP", "PRP" }, new string[] { "rightdis", "$", "ADJP", "JJP", "PRN", "FW" },
                new string[] { "right", "CD" },
                new string[] { "rightdis", "JJ", "JJS", "RB", "QP", "DT", "WDT", "RBR", "ADVP" }
            });
            nonTerminalInfo.Add("NX", nonTerminalInfo["NP"]);
            // TODO: seems JJ should be head of NML in this case:
            // (NP (NML (JJ former) (NML Red Sox) (JJ great)) (NNP Luis) (NNP Tiant)),
            // (although JJ great is tagged wrong)
            nonTerminalInfo.Add("NML", nonTerminalInfo["NP"]);


            nonTerminalInfo.Add("POSSP", new string[][] { new string[] { "right", "POS" } });

            /* HJT: Adding the following to deal with oddly formed data in (for example) the Brown corpus */
            nonTerminalInfo.Add("ROOT", new string[][] { new string[] { "left", "S", "SQ", "SINV", "SBAR", "FRAG" } });
            // Just to handle trees which have TOP instead of ROOT at the root
            nonTerminalInfo.Add("TOP", nonTerminalInfo["ROOT"]);
            nonTerminalInfo.Add("TYPO", new string[][]
            {
                new string[]
                {
                    "left", "NN", "NP", "NML", "NNP", "NNPS", "TO",
                    "VBD", "VBN", "MD", "VBZ", "VB", "VBG", "VBP", "VP", "ADJP", "JJP", "FRAG"
                }
            }); // for Brown (Roger)
            nonTerminalInfo.Add("ADV", new string[][]
            {
                new string[]
                {
                    "right", "RB", "RBR", "RBS", "FW",
                    "ADVP", "TO", "CD", "JJR", "JJ", "IN", "NP", "NML", "JJS", "NN"
                }
            });

            // SWBD
            nonTerminalInfo.Add("EDITED", new string[][] { new string[] { "left" } });
            // crap rule for Switchboard (if don't delete EDITED nodes)
            // in sw2756, a "VB". (copy "VP" to handle this problem, though should really fix it on reading)
            nonTerminalInfo.Add("VB",
                                new string[][]
            {
                new string[]
                {
                    "left", "TO", "VBD", "VBN", "MD", "VBZ", "VB", "VBG", "VBP", "VP", "AUX", "AUXG", "ADJP", "JJP",
                    "NN", "NNS", "JJ", "NP", "NNP"
                }
            });

            nonTerminalInfo.Add("META", new string[][] { new string[] { "left" } });
            // rule for OntoNotes, but maybe should just be deleted in TreeReader??
            nonTerminalInfo.Add("XS", new string[][] { new string[] { "right", "IN" } });
            // rule for new structure in QP, introduced by Stanford in QPTreeTransformer
            // nonTerminalInfo.Add(null, new string[][] {{"left"}});  // rule for OntoNotes from Michel, but it would be better to fix this in TreeReader or to use a default rule?

            // todo: Uncomment this line if we always want to take the leftmost if no head rule is defined for the mother category.
            // defaultRule = defaultLeftRule; // Don't exception, take leftmost if no rule defined for a certain parent category
        }