public TueBaDZPennTreeNormalizer(ITreebankLanguagePack tlp, int nodeCleanup, IList <TreeNormalizer> tns)
 {
     this.tlp         = tlp;
     this.nodeCleanup = nodeCleanup;
     root             = tlp.StartSymbol();
     Sharpen.Collections.AddAll(this.tns, tns);
 }
 public TueBaDZPennTreeNormalizer(ITreebankLanguagePack tlp, int nodeCleanup)
 {
     //  public TueBaDZPennTreeNormalizer() {
     //    this(new TueBaDZLanguagePack(), 0);
     //  }
     this.tlp         = tlp;
     this.nodeCleanup = nodeCleanup;
     root             = tlp.StartSymbol();
 }
Пример #3
0
 public DybroFrenchHeadFinder(ITreebankLanguagePack tlp)
     : base(tlp)
 {
     //French POS:
     // A (adjective), ADV (adverb), C (conjunction and subordinating conjunction), CL (clitics),
     // CS (subordinating conjunction) but occurs only once!,
     // D (determiner), ET (foreign word), I (interjection), N (noun),
     // P (preposition), PREF (prefix), PRO (strong pronoun -- very confusing), V (verb), PUNC (punctuation)
     // There is also the expanded French CC tagset.
     // V, A, ADV, PRO, C, CL, N, D are all split into multiple tags.
     // http://www.linguist.univ-paris-diderot.fr/~mcandito/Publications/crabbecandi-taln2008-final.pdf
     // (perhaps you can find an English translation somewhere)
     nonTerminalInfo = Generics.NewHashMap();
     // "sentence"
     nonTerminalInfo[tlp.StartSymbol()] = new string[][] { new string[] { "right", "VN", "AP", "NP", "Srel", "VPpart", "AdP", "I", "Ssub", "VPinf", "PP" }, new string[] { "rightdis", "ADV", "ADVWH" }, new string[] { "right" } };
     nonTerminalInfo["SENT"]            = new string[][] { new string[] { "right", "VN", "AP", "NP", "Srel", "VPpart", "AdP", "I", "Ssub", "VPinf", "PP" }, new string[] { "rightdis", "ADV", "ADVWH" }, new string[] { "right" } };
     // adjectival phrases
     nonTerminalInfo["AP"] = new string[][] { new string[] { "rightdis", "A", "ADJ", "ADJWH" }, new string[] { "right", "ET" }, new string[] { "rightdis", "V", "VIMP", "VINF", "VS", "VPP", "VPR" }, new string[] { "rightdis", "ADV", "ADVWH" } };
     // adverbial phrases
     nonTerminalInfo["AdP"] = new string[][] { new string[] { "rightdis", "ADV", "ADVWH" }, new string[] { "right" } };
     // coordinated phrases
     nonTerminalInfo["COORD"] = new string[][] { new string[] { "leftdis", "C", "CC", "CS" }, new string[] { "left" } };
     // noun phrases
     nonTerminalInfo["NP"] = new string[][] { new string[] { "leftdis", "N", "NPP", "NC", "PRO", "PROWH", "PROREL" }, new string[] { "left", "NP" }, new string[] { "leftdis", "A", "ADJ", "ADJWH" }, new string[] { "left", "AP", "I", "VPpart" }, new
                                              string[] { "leftdis", "ADV", "ADVWH" }, new string[] { "left", "AdP", "ET" }, new string[] { "leftdis", "D", "DET", "DETWH" } };
     // prepositional phrases
     nonTerminalInfo["PP"] = new string[][] { new string[] { "left", "P" }, new string[] { "left" } };
     // verbal nucleus
     nonTerminalInfo["VN"] = new string[][] { new string[] { "right", "V", "VPinf" }, new string[] { "right" } };
     // infinitive clauses
     nonTerminalInfo["VPinf"] = new string[][] { new string[] { "left", "VN" }, new string[] { "leftdis", "V", "VIMP", "VINF", "VS", "VPP", "VPR" }, new string[] { "left" } };
     // nonfinite clauses
     nonTerminalInfo["VPpart"] = new string[][] { new string[] { "leftdis", "V", "VIMP", "VINF", "VS", "VPP", "VPR" }, new string[] { "left", "VN" }, new string[] { "left" } };
     // relative clauses
     nonTerminalInfo["Srel"] = new string[][] { new string[] { "right", "VN", "AP", "NP" }, new string[] { "right" } };
     // subordinate clauses
     nonTerminalInfo["Ssub"] = new string[][] { new string[] { "right", "VN", "AP", "NP", "PP", "VPinf", "Ssub", "VPpart" }, new string[] { "rightdis", "A", "ADJ", "ADJWH" }, new string[] { "rightdis", "ADV", "ADVWH" }, new string[] { "right" } };
     // parenthetical clauses
     nonTerminalInfo["Sint"] = new string[][] { new string[] { "right", "VN", "AP", "NP", "PP", "VPinf", "Ssub", "VPpart" }, new string[] { "rightdis", "A", "ADJ", "ADJWH" }, new string[] { "rightdis", "ADV", "ADVWH" }, new string[] { "right" } };
     // adverbes
     //nonTerminalInfo.put("ADV", new String[][] {{"left", "ADV", "PP", "P"}});
     // compound categories: start with MW: D, A, C, N, ADV, V, P, PRO, CL
     nonTerminalInfo["MWD"]   = new string[][] { new string[] { "leftdis", "D", "DET", "DETWH" }, new string[] { "left" } };
     nonTerminalInfo["MWA"]   = new string[][] { new string[] { "left", "P" }, new string[] { "leftdis", "N", "NPP", "NC" }, new string[] { "rightdis", "A", "ADJ", "ADJWH" }, new string[] { "right" } };
     nonTerminalInfo["MWC"]   = new string[][] { new string[] { "leftdis", "C", "CC", "CS" }, new string[] { "left" } };
     nonTerminalInfo["MWN"]   = new string[][] { new string[] { "rightdis", "N", "NPP", "NC" }, new string[] { "rightdis", "ET" }, new string[] { "right" } };
     nonTerminalInfo["MWV"]   = new string[][] { new string[] { "leftdis", "V", "VIMP", "VINF", "VS", "VPP", "VPR" }, new string[] { "left" } };
     nonTerminalInfo["MWP"]   = new string[][] { new string[] { "left", "P" }, new string[] { "leftdis", "ADV", "ADVWH" }, new string[] { "leftdis", "PRO", "PROWH", "PROREL" }, new string[] { "left" } };
     nonTerminalInfo["MWPRO"] = new string[][] { new string[] { "leftdis", "PRO", "PROWH", "PROREL" }, new string[] { "leftdis", "CL", "CLS", "CLR", "CLO" }, new string[] { "leftdis", "N", "NPP", "NC" }, new string[] { "leftdis", "A", "ADJ", "ADJWH" }, new string[] { "left" } };
     nonTerminalInfo["MWCL"]  = new string[][] { new string[] { "leftdis", "CL", "CLS", "CLR", "CLO" }, new string[] { "right" } };
     nonTerminalInfo["MWADV"] = new string[][] { new string[] { "left", "P" }, new string[] { "leftdis", "ADV", "ADVWH" }, new string[] { "left" } };
     nonTerminalInfo["MWI"]   = new string[][] { new string[] { "leftdis", "N", "NPP", "NC" }, new string[] { "leftdis", "ADV", "ADVWH" }, new string[] { "left", "P" }, new string[] { "left" } };
     nonTerminalInfo["MWET"]  = new string[][] { new string[] { "left", "ET" }, new string[] { "leftdis", "N", "NPP", "NC" }, new string[] { "left" } };
     //TODO: wsg2011: For phrasal nodes that lacked a label.
     nonTerminalInfo[FrenchXMLTreeReader.MissingPhrasal] = new string[][] { new string[] { "left" } };
 }
 public SpanishHeadFinder(ITreebankLanguagePack tlp)
     : base(tlp)
 {
     nonTerminalInfo = Generics.NewHashMap();
     // "sentence"
     string[][] rootRules = new string[][] { new string[] { "right", "grup.verb", "s.a", "sn" }, new string[] { "left", "S" }, new string[] { "right", "sadv", "grup.adv", "neg", "interjeccio", "i", "sp", "grup.prep" }, InsertVerbs(new string[] {
             "rightdis"
         }, new string[] { "nc0s000", "nc0p000", "nc00000", "np00000", "rg", "rn" }) };
     nonTerminalInfo[tlp.StartSymbol()] = rootRules;
     nonTerminalInfo["S"]        = rootRules;
     nonTerminalInfo["sentence"] = rootRules;
     nonTerminalInfo["inc"]      = rootRules;
     // adjectival phrases
     string[][] adjectivePhraseRules = new string[][] { new string[] { "leftdis", "grup.a", "s.a", "spec" } };
     nonTerminalInfo["s.a"]    = adjectivePhraseRules;
     nonTerminalInfo["sa"]     = adjectivePhraseRules;
     nonTerminalInfo["grup.a"] = new string[][] { new string[] { "rightdis", "aq0000", "ao0000" }, InsertVerbs(new string[] { "right" }, new string[] {  }), new string[] { "right", "rg", "rn" } };
     // adverbial phrases
     nonTerminalInfo["sadv"]     = new string[][] { new string[] { "left", "grup.adv", "sadv" } };
     nonTerminalInfo["grup.adv"] = new string[][] { new string[] { "left", "conj" }, new string[] { "rightdis", "rg", "rn", "neg", "grup.adv" }, new string[] { "rightdis", "pr000000", "pi000000", "nc0s000", "nc0p000", "nc00000", "np00000" } };
     nonTerminalInfo["neg"]      = new string[][] { new string[] { "leftdis", "rg", "rn" } };
     // noun phrases
     nonTerminalInfo["sn"]       = new string[][] { new string[] { "leftdis", "nc0s000", "nc0p000", "nc00000" }, new string[] { "left", "grup.nom", "grup.w", "grup.z", "sn" }, new string[] { "leftdis", "spec" } };
     nonTerminalInfo["grup.nom"] = new string[][] { new string[] { "leftdis", "nc0s000", "nc0p000", "nc00000", "np00000", "w", "grup.w" }, new string[] { "leftdis", "pi000000", "pd000000" }, new string[] { "left", "grup.nom", "sp" }, new string[]
                                                    { "leftdis", "pn000000", "aq0000", "ao0000" }, new string[] { "left", "grup.a", "i", "grup.verb" }, new string[] { "leftdis", "grup.adv" } };
     // verb phrases
     nonTerminalInfo["grup.verb"] = new string[][] { InsertVerbs(new string[] { "left" }, new string[] {  }) };
     nonTerminalInfo["infinitiu"] = new string[][] { InsertVerbs(new string[] { "left" }, new string[] { "infinitiu" }) };
     nonTerminalInfo["gerundi"]   = new string[][] { new string[] { "left", "vmg0000", "vag0000", "vsg0000", "gerundi" } };
     nonTerminalInfo["participi"] = new string[][] { new string[] { "left", "aq", "vmp0000", "vap0000", "vsp0000", "grup.a" } };
     // specifiers
     nonTerminalInfo["spec"] = new string[][] { new string[] { "left", "conj", "spec" }, new string[] { "leftdis", "da0000", "de0000", "di0000", "dd0000", "dp0000", "dn0000", "dt0000" }, new string[] { "leftdis", "z0", "grup.z" }, new string[] {
                                                    "left", "rg", "rn"
                                                }, new string[] { "leftdis", "pt000000", "pe000000", "pd000000", "pp000000", "pi000000", "pn000000", "pr000000" }, new string[] { "left", "grup.adv", "w" } };
     // entre A y B
     // etc.
     nonTerminalInfo["conj"]        = new string[][] { new string[] { "leftdis", "cs", "cc" }, new string[] { "leftdis", "grup.cc", "grup.cs" }, new string[] { "left", "sp" } };
     nonTerminalInfo["interjeccio"] = new string[][] { new string[] { "leftdis", "i", "nc0s000", "nc0p000", "nc00000", "np00000", "pi000000" }, new string[] { "left", "interjeccio" } };
     nonTerminalInfo["relatiu"]     = new string[][] { new string[] { "left", "pr000000" } };
     // prepositional phrases
     nonTerminalInfo["sp"]   = new string[][] { new string[] { "left", "prep", "sp" } };
     nonTerminalInfo["prep"] = new string[][] { new string[] { "leftdis", "sp000", "prep", "grup.prep" } };
     // custom categories
     nonTerminalInfo["grup.cc"]   = new string[][] { new string[] { "left", "cs" } };
     nonTerminalInfo["grup.cs"]   = new string[][] { new string[] { "left", "cs" } };
     nonTerminalInfo["grup.prep"] = new string[][] { new string[] { "left", "prep", "grup.prep", "s" } };
     nonTerminalInfo["grup.pron"] = new string[][] { new string[] { "rightdis", "px000000" } };
     nonTerminalInfo["grup.w"]    = new string[][] { new string[] { "right", "w" }, new string[] { "leftdis", "z0" }, new string[] { "left" } };
     nonTerminalInfo["grup.z"]    = new string[][] { new string[] { "leftdis", "z0", "zu", "zp", "zd", "zm" }, new string[] { "right", "nc0s000", "nc0p000", "nc00000", "np00000" } };
 }
Пример #5
0
 protected internal ArabicHeadFinder(ITreebankLanguagePack tlp, ArabicHeadFinder.TagSet tagSet)
     : base(tlp)
 {
     //this(new ArabicTreebankLanguagePack(), tagSet);
     this.tagSet = tagSet;
     //log.info("##testing: noun tag is " + tagSet.noun());
     nonTerminalInfo         = Generics.NewHashMap();
     nonTerminalInfo["NX"]   = new string[][] { new string[] { "left", "DT", "DTNN", "DTNNS", "DTNNP", "DTNNPS", "DTJJ", "DTNOUN_QUANT", "NOUN_QUANT", "MWNP" } };
     nonTerminalInfo["ADJP"] = new string[][] { new string[] { "rightdis", tagSet.Adj(), "DTJJ", "ADJ_NUM", "DTADJ_NUM", "JJR", "DTJJR", "MWADJP" }, new string[] { "right", "ADJP", "VN", tagSet.Noun(), "MWNP", "NNP", "NNPS", "NNS", "DTNN", "DTNNS"
                                                                                                                                                                    , "DTNNP", "DTNNPS", "DTJJ", "DTNOUN_QUANT", "NOUN_QUANT" }, new string[] { "right", "RB", "MWADVP", "CD", "DTRB", "DTCD" }, new string[] { "right", "DT" } };
     // sometimes right, sometimes left headed??
     nonTerminalInfo["MWADJP"] = new string[][] { new string[] { "rightdis", tagSet.Adj(), "DTJJ", "ADJ_NUM", "DTADJ_NUM", "JJR", "DTJJR" }, new string[] { "right", tagSet.Noun(), "MWNP", "NNP", "NNPS", "NNS", "DTNN", "DTNNS", "DTNNP", "DTNNPS",
                                                                                                                                                            "DTJJ", "DTNOUN_QUANT", "NOUN_QUANT" }, new string[] { "right", "RB", "MWADVP", "CD", "DTRB", "DTCD" }, new string[] { "right", "DT" } };
     // sometimes right, sometimes left headed??
     nonTerminalInfo["ADVP"] = new string[][] { new string[] { "left", "WRB", "RB", "MWADVP", "ADVP", "WHADVP", "DTRB" }, new string[] { "left", "CD", "RP", tagSet.Noun(), "MWNP", "CC", "MWCONJP", tagSet.Adj(), "MWADJP", "DTJJ", "ADJ_NUM", "DTADJ_NUM"
                                                                                                                                         , "IN", "MWPP", "NP", "NNP", "NOFUNC", "DTRP", "DTNN", "DTNNP", "DTNNPS", "DTNNS", "DTJJ", "DTNOUN_QUANT", "NOUN_QUANT" } };
     // NNP is a gerund that they called an unknown (=NNP, believe it or not...)
     nonTerminalInfo["MWADVP"] = new string[][] { new string[] { "left", "WRB", "RB", "ADVP", "WHADVP", "DTRB" }, new string[] { "left", "CD", "RP", tagSet.Noun(), "MWNP", "CC", "MWCONJP", tagSet.Adj(), "MWADJP", "DTJJ", "ADJ_NUM", "DTADJ_NUM", "IN"
                                                                                                                                 , "MWPP", "NP", "NNP", "NOFUNC", "DTRP", "DTNN", "DTNNP", "DTNNPS", "DTNNS", "DTJJ", "DTNOUN_QUANT", "NOUN_QUANT" } };
     // NNP is a gerund that they called an unknown (=NNP, believe it or not...)
     nonTerminalInfo["CONJP"]   = new string[][] { new string[] { "right", "IN", "RB", "MWADVP", tagSet.Noun(), "MWNP", "NNS", "NNP", "NNPS", "DTRB", "DTNN", "DTNNS", "DTNNP", "DTNNPS", "DTNOUN_QUANT", "NOUN_QUANT" } };
     nonTerminalInfo["MWCONJP"] = new string[][] { new string[] { "right", "IN", "RB", "MWADVP", tagSet.Noun(), "MWNP", "NNS", "NNP", "NNPS", "DTRB", "DTNN", "DTNNS", "DTNNP", "DTNNPS", "DTNOUN_QUANT", "NOUN_QUANT" } };
     nonTerminalInfo["FRAG"]    = new string[][] { new string[] { "left", tagSet.Noun(), "MWNP", "NNPS", "NNP", "NNS", "DTNN", "DTNNS", "DTNNP", "DTNNPS", "DTNOUN_QUANT", "NOUN_QUANT" }, new string[] { "left", "VBP" } };
     nonTerminalInfo["MWFRAG"]  = new string[][] { new string[] { "left", tagSet.Noun(), "MWNP", "NNPS", "NNP", "NNS", "DTNN", "DTNNS", "DTNNP", "DTNNPS", "DTNOUN_QUANT", "NOUN_QUANT" }, new string[] { "left", "VBP" } };
     nonTerminalInfo["INTJ"]    = new string[][] { new string[] { "left", "RP", "UH", "DTRP" } };
     nonTerminalInfo["LST"]     = new string[][] { new string[] { "left" } };
     nonTerminalInfo["NAC"]     = new string[][] { new string[] { "left", "NP", "SBAR", "PP", "MWP", "ADJP", "S", "PRT", "UCP" }, new string[] { "left", "ADVP" } };
     // note: maybe CC, RB should be the heads?
     nonTerminalInfo["NP"] = new string[][] { new string[] { "left", tagSet.Noun(), "MWNP", tagSet.DetPlusNoun(), "NNS", "NNP", "NNPS", "NP", "PRP", "WHNP", "QP", "WP", "DTNNS", "DTNNPS", "DTNNP", "NOFUNC", "NO_FUNC", "DTNOUN_QUANT", "NOUN_QUANT" }, new string[] { "left", tagSet.Adj(), "MWADJP", "DTJJ", "JJR", "DTJJR", "ADJ_NUM", "DTADJ_NUM" }, new string[] { "right", "CD", "DTCD" }, new string[] { "left", "PRP$" }, new string[] { "right", "DT" } };
     // should the JJ rule be left or right?
     nonTerminalInfo["MWNP"] = new string[][] { new string[] { "left", tagSet.Noun(), "MWNP", tagSet.DetPlusNoun(), "NNS", "NNP", "NNPS", "PRP", "QP", "WP", "DTNNS", "DTNNPS", "DTNNP", "DTNOUN_QUANT", "NOUN_QUANT" }, new string[] { "left", tagSet
                                                                                                                                                                                                                                        .Adj(), "MWADJP", "DTJJ", "JJR", "DTJJR", "ADJ_NUM", "DTADJ_NUM" }, new string[] { "right", "CD", "DTCD" }, new string[] { "left", "PRP$" }, new string[] { "right", "DT" } };
     // should the JJ rule be left or right?
     nonTerminalInfo["PP"] = new string[][] { new string[] { "left", tagSet.Prep(), "MWPP", "PP", "MWP", "PRT", "X" }, new string[] { "left", "NNP", "RP", tagSet.Noun(), "MWNP" }, new string[] { "left", "NP" } };
     // NN is for a mistaken "fy", and many wsT
     nonTerminalInfo["MWPP"] = new string[][] { new string[] { "left", tagSet.Prep(), "PP", "MWP", "PRT", "X" }, new string[] { "left", "NNP", "RP", tagSet.Noun(), "MWNP" }, new string[] { "left", "NP" } };
     // NN is for a mistaken "fy", and many wsT
     nonTerminalInfo["PRN"] = new string[][] { new string[] { "left", "NP" } };
     // don't get PUNC
     nonTerminalInfo["MWPRN"] = new string[][] { new string[] { "left", "IN" } };
     // don't get PUNC
     nonTerminalInfo["PRT"] = new string[][] { new string[] { "left", "RP", "PRT", "IN", "DTRP" } };
     nonTerminalInfo["QP"]  = new string[][] { new string[] { "right", "CD", "DTCD", tagSet.Noun(), "MWNP", tagSet.Adj(), "MWADJP", "NNS", "NNP", "NNPS", "DTNN", "DTNNS", "DTNNP", "DTNNPS", "DTJJ", "DTNOUN_QUANT", "NOUN_QUANT" } };
     nonTerminalInfo["S"]   = new string[][] { new string[] { "left", "VP", "MWVP", "S" }, new string[] { "right", "PP", "MWP", "ADVP", "SBAR", "UCP", "ADJP" } };
     // really important to put in -PRD sensitivity here!
     nonTerminalInfo["MWS"] = new string[][] { new string[] { "left", "VP", "MWVP", "S" }, new string[] { "right", "PP", "MWP", "ADVP", "SBAR", "UCP", "ADJP" } };
     // really important to put in -PRD sensitivity here!
     nonTerminalInfo["SQ"] = new string[][] { new string[] { "left", "VP", "MWVP", "PP", "MWP" } };
     // to be principled, we need -PRD sensitivity here too.
     nonTerminalInfo["SBAR"] = new string[][] { new string[] { "left", "WHNP", "WHADVP", "WRB", "RP", "IN", "SBAR", "CC", "MWCONJP", "WP", "WHPP", "ADVP", "PRT", "RB", "MWADVP", "X", "DTRB", "DTRP" }, new string[] { "left", tagSet.Noun(), "MWNP",
                                                                                                                                                                                                                        "NNP", "NNS", "NNPS", "DTNN", "DTNNS", "DTNNP", "DTNNPS", "DTNOUN_QUANT", "NOUN_QUANT" }, new string[] { "left", "S" } };
     nonTerminalInfo["MWSBAR"] = new string[][] { new string[] { "left", "WHNP", "WHADVP", "WRB", "RP", "IN", "SBAR", "CC", "MWCONJP", "WP", "WHPP", "ADVP", "PRT", "RB", "MWADVP", "X", "DTRB", "DTRP" }, new string[] { "left", tagSet.Noun(), "MWNP"
                                                                                                                                                                                                                          , "NNP", "NNS", "NNPS", "DTNN", "DTNNS", "DTNNP", "DTNNPS", "DTNOUN_QUANT", "NOUN_QUANT" }, new string[] { "left", "S" } };
     nonTerminalInfo["SBARQ"] = new string[][] { new string[] { "left", "WHNP", "WHADVP", "RP", "IN", "SBAR", "CC", "MWCONJP", "WP", "WHPP", "ADVP", "PRT", "RB", "MWADVP", "X" }, new string[] { "left", tagSet.Noun(), "MWNP", "NNP", "NNS", "NNPS",
                                                                                                                                                                                                  "DTNN", "DTNNS", "DTNNP", "DTNNPS", "DTNOUN_QUANT", "NOUN_QUANT" }, new string[] { "left", "S" } };
     // copied from SBAR rule -- look more closely when there's time
     nonTerminalInfo["UCP"] = new string[][] { new string[] { "left" } };
     nonTerminalInfo["VP"]  = new string[][] { new string[] { "left", "VBD", "VBN", "VBP", "VBG", "DTVBG", "VN", "DTVN", "VP", "RB", "MWADVP", "X", "VB" }, new string[] { "left", "IN" }, new string[] { "left", "NNP", tagSet.Noun(), "MWNP", "DTNN",
                                                                                                                                                                                                          "DTNNP", "DTNNPS", "DTNNS", "DTNOUN_QUANT", "NOUN_QUANT" } };
     // exclude RP because we don't want negation markers as heads -- no useful information?
     nonTerminalInfo["MWVP"] = new string[][] { new string[] { "left", "VBD", "VBN", "VBP", "VBG", "DTVBG", "VN", "DTVN", "VP", "MWVP", "RB", "MWADVP", "X", "VB" }, new string[] { "left", "IN" }, new string[] { "left", "NNP", tagSet.Noun(), "MWNP"
                                                                                                                                                                                                                   , "DTNN", "DTNNP", "DTNNPS", "DTNNS", "DTNOUN_QUANT", "NOUN_QUANT" } };
     // exclude RP because we don't want negation markers as heads -- no useful information?
     //also, RB is used as gerunds
     nonTerminalInfo["WHADVP"] = new string[][] { new string[] { "left", "WRB", "WP" }, new string[] { "right", "CC", "MWCONJP" }, new string[] { "left", "IN" } };
     nonTerminalInfo["WHNP"]   = new string[][] { new string[] { "right", "WP" } };
     nonTerminalInfo["WHPP"]   = new string[][] { new string[] { "left", "IN", "MWPP", "RB", "MWADVP" } };
     nonTerminalInfo["X"]      = new string[][] { new string[] { "left" } };
     //Added by Mona 12/7/04 for the newly created DT nonterm cat
     nonTerminalInfo["DTNN"]   = new string[][] { new string[] { "right" } };
     nonTerminalInfo["DTNNS"]  = new string[][] { new string[] { "right" } };
     nonTerminalInfo["DTNNP"]  = new string[][] { new string[] { "right" } };
     nonTerminalInfo["DTNNPS"] = new string[][] { new string[] { "right" } };
     nonTerminalInfo["DTJJ"]   = new string[][] { new string[] { "right" } };
     nonTerminalInfo["DTRP"]   = new string[][] { new string[] { "right" } };
     nonTerminalInfo["DTRB"]   = new string[][] { new string[] { "right" } };
     nonTerminalInfo["DTCD"]   = new string[][] { new string[] { "right" } };
     nonTerminalInfo["DTIN"]   = new string[][] { new string[] { "right" } };
     // stand-in dependency:
     nonTerminalInfo["EDITED"]          = new string[][] { new string[] { "left" } };
     nonTerminalInfo[tlp.StartSymbol()] = new string[][] { new string[] { "left" } };
     // one stray SINV in the training set...garbage head rule here.
     nonTerminalInfo["SINV"] = new string[][] { new string[] { "left", "ADJP", "VP" } };
 }
Пример #6
0
        /// <summary>Binarizes the tree according to options set up in the constructor.</summary>
        /// <remarks>
        /// Binarizes the tree according to options set up in the constructor.
        /// Does the whole tree by calling itself recursively.
        /// </remarks>
        /// <param name="t">
        /// A tree to be binarized. The non-leaf nodes must already have
        /// CategoryWordTag labels, with heads percolated.
        /// </param>
        /// <returns>A binary tree.</returns>
        public virtual Tree TransformTree(Tree t)
        {
            // handle null
            if (t == null)
            {
                return(null);
            }
            string cat = t.Label().Value();

            // handle words
            if (t.IsLeaf())
            {
                ILabel label = new Word(cat);
                //new CategoryWordTag(cat,cat,"");
                return(tf.NewLeaf(label));
            }
            // handle tags
            if (t.IsPreTerminal())
            {
                Tree   childResult = TransformTree(t.GetChild(0));
                string word        = childResult.Value();
                // would be nicer if Word/CWT ??
                IList <Tree> newChildren = new List <Tree>(1);
                newChildren.Add(childResult);
                return(tf.NewTreeNode(new CategoryWordTag(cat, word, cat), newChildren));
            }
            // handle categories
            Tree headChild = hf.DetermineHead(t);

            /*
             * System.out.println("### finding head for:");
             * t.pennPrint();
             * System.out.println("### its head is:");
             * headChild.pennPrint();
             */
            if (headChild == null && !t.Label().Value().StartsWith(tlp.StartSymbol()))
            {
                log.Info("### No head found for:");
                t.PennPrint();
            }
            int headNum = -1;

            Tree[]       kids          = t.Children();
            IList <Tree> newChildren_1 = new List <Tree>(kids.Length);

            for (int childNum = 0; childNum < kids.Length; childNum++)
            {
                Tree child       = kids[childNum];
                Tree childResult = TransformTree(child);
                // recursive call
                if (child == headChild)
                {
                    headNum = childNum;
                }
                newChildren_1.Add(childResult);
            }
            Tree result;

            // XXXXX UPTO HERE!!!  ALMOST DONE!!!
            if (t.Label().Value().StartsWith(tlp.StartSymbol()))
            {
                // handle the ROOT tree properly

                /*
                 * //CategoryWordTag label = (CategoryWordTag) t.label();
                 * // binarize without the last kid and then add it back to the top tree
                 * Tree lastKid = (Tree)newChildren.remove(newChildren.size()-1);
                 * Tree tempTree = tf.newTreeNode(label, newChildren);
                 * tempTree = binarizeLocalTree(tempTree, headNum, result.head);
                 * newChildren = tempTree.getChildrenAsList();
                 * newChildren.add(lastKid); // add it back
                 */
                result = tf.NewTreeNode(t.Label(), newChildren_1);
            }
            else
            {
                // label shouldn't have changed
                //      CategoryWordTag headLabel = (CategoryWordTag) headChild.label();
                string word  = ((IHasWord)headChild.Label()).Word();
                string tag   = ((IHasTag)headChild.Label()).Tag();
                ILabel label = new CategoryWordTag(cat, word, tag);
                result = tf.NewTreeNode(label, newChildren_1);
                // cdm Mar 2005: invent a head so I don't have to rewrite all this
                // code, but with the removal of TreeHeadPair, some of the rest of
                // this should probably be rewritten too to not use this head variable
                TaggedWord head = new TaggedWord(word, tag);
                result = BinarizeLocalTree(result, headNum, head);
            }
            return(result);
        }
Пример #7
0
 public virtual string RootSymbol()
 {
     return(tlp.StartSymbol());
 }
Пример #8
0
 public GrammaticalFunctionTreeNormalizer(ITreebankLanguagePack tlp, int nodeCleanup)
 {
     this.tlp         = tlp;
     this.nodeCleanup = nodeCleanup;
     root             = tlp.StartSymbol();
 }