public TueBaDZPennTreeNormalizer(ITreebankLanguagePack tlp, int nodeCleanup, IList <TreeNormalizer> tns) { this.tlp = tlp; this.nodeCleanup = nodeCleanup; root = tlp.StartSymbol(); Sharpen.Collections.AddAll(this.tns, tns); }
public TueBaDZPennTreeNormalizer(ITreebankLanguagePack tlp, int nodeCleanup) { // public TueBaDZPennTreeNormalizer() { // this(new TueBaDZLanguagePack(), 0); // } this.tlp = tlp; this.nodeCleanup = nodeCleanup; root = tlp.StartSymbol(); }
public DybroFrenchHeadFinder(ITreebankLanguagePack tlp) : base(tlp) { //French POS: // A (adjective), ADV (adverb), C (conjunction and subordinating conjunction), CL (clitics), // CS (subordinating conjunction) but occurs only once!, // D (determiner), ET (foreign word), I (interjection), N (noun), // P (preposition), PREF (prefix), PRO (strong pronoun -- very confusing), V (verb), PUNC (punctuation) // There is also the expanded French CC tagset. // V, A, ADV, PRO, C, CL, N, D are all split into multiple tags. // http://www.linguist.univ-paris-diderot.fr/~mcandito/Publications/crabbecandi-taln2008-final.pdf // (perhaps you can find an English translation somewhere) nonTerminalInfo = Generics.NewHashMap(); // "sentence" nonTerminalInfo[tlp.StartSymbol()] = new string[][] { new string[] { "right", "VN", "AP", "NP", "Srel", "VPpart", "AdP", "I", "Ssub", "VPinf", "PP" }, new string[] { "rightdis", "ADV", "ADVWH" }, new string[] { "right" } }; nonTerminalInfo["SENT"] = new string[][] { new string[] { "right", "VN", "AP", "NP", "Srel", "VPpart", "AdP", "I", "Ssub", "VPinf", "PP" }, new string[] { "rightdis", "ADV", "ADVWH" }, new string[] { "right" } }; // adjectival phrases nonTerminalInfo["AP"] = new string[][] { new string[] { "rightdis", "A", "ADJ", "ADJWH" }, new string[] { "right", "ET" }, new string[] { "rightdis", "V", "VIMP", "VINF", "VS", "VPP", "VPR" }, new string[] { "rightdis", "ADV", "ADVWH" } }; // adverbial phrases nonTerminalInfo["AdP"] = new string[][] { new string[] { "rightdis", "ADV", "ADVWH" }, new string[] { "right" } }; // coordinated phrases nonTerminalInfo["COORD"] = new string[][] { new string[] { "leftdis", "C", "CC", "CS" }, new string[] { "left" } }; // noun phrases nonTerminalInfo["NP"] = new string[][] { new string[] { "leftdis", "N", "NPP", "NC", "PRO", "PROWH", "PROREL" }, new string[] { "left", "NP" }, new string[] { "leftdis", "A", "ADJ", "ADJWH" }, new string[] { "left", "AP", "I", "VPpart" }, new string[] { "leftdis", "ADV", "ADVWH" }, new string[] { "left", "AdP", "ET" }, new string[] { "leftdis", "D", "DET", "DETWH" } }; // prepositional phrases nonTerminalInfo["PP"] = new string[][] { new string[] { "left", "P" }, new string[] { "left" } }; // verbal nucleus nonTerminalInfo["VN"] = new string[][] { new string[] { "right", "V", "VPinf" }, new string[] { "right" } }; // infinitive clauses nonTerminalInfo["VPinf"] = new string[][] { new string[] { "left", "VN" }, new string[] { "leftdis", "V", "VIMP", "VINF", "VS", "VPP", "VPR" }, new string[] { "left" } }; // nonfinite clauses nonTerminalInfo["VPpart"] = new string[][] { new string[] { "leftdis", "V", "VIMP", "VINF", "VS", "VPP", "VPR" }, new string[] { "left", "VN" }, new string[] { "left" } }; // relative clauses nonTerminalInfo["Srel"] = new string[][] { new string[] { "right", "VN", "AP", "NP" }, new string[] { "right" } }; // subordinate clauses nonTerminalInfo["Ssub"] = new string[][] { new string[] { "right", "VN", "AP", "NP", "PP", "VPinf", "Ssub", "VPpart" }, new string[] { "rightdis", "A", "ADJ", "ADJWH" }, new string[] { "rightdis", "ADV", "ADVWH" }, new string[] { "right" } }; // parenthetical clauses nonTerminalInfo["Sint"] = new string[][] { new string[] { "right", "VN", "AP", "NP", "PP", "VPinf", "Ssub", "VPpart" }, new string[] { "rightdis", "A", "ADJ", "ADJWH" }, new string[] { "rightdis", "ADV", "ADVWH" }, new string[] { "right" } }; // adverbes //nonTerminalInfo.put("ADV", new String[][] {{"left", "ADV", "PP", "P"}}); // compound categories: start with MW: D, A, C, N, ADV, V, P, PRO, CL nonTerminalInfo["MWD"] = new string[][] { new string[] { "leftdis", "D", "DET", "DETWH" }, new string[] { "left" } }; nonTerminalInfo["MWA"] = new string[][] { new string[] { "left", "P" }, new string[] { "leftdis", "N", "NPP", "NC" }, new string[] { "rightdis", "A", "ADJ", "ADJWH" }, new string[] { "right" } }; nonTerminalInfo["MWC"] = new string[][] { new string[] { "leftdis", "C", "CC", "CS" }, new string[] { "left" } }; nonTerminalInfo["MWN"] = new string[][] { new string[] { "rightdis", "N", "NPP", "NC" }, new string[] { "rightdis", "ET" }, new string[] { "right" } }; nonTerminalInfo["MWV"] = new string[][] { new string[] { "leftdis", "V", "VIMP", "VINF", "VS", "VPP", "VPR" }, new string[] { "left" } }; nonTerminalInfo["MWP"] = new string[][] { new string[] { "left", "P" }, new string[] { "leftdis", "ADV", "ADVWH" }, new string[] { "leftdis", "PRO", "PROWH", "PROREL" }, new string[] { "left" } }; nonTerminalInfo["MWPRO"] = new string[][] { new string[] { "leftdis", "PRO", "PROWH", "PROREL" }, new string[] { "leftdis", "CL", "CLS", "CLR", "CLO" }, new string[] { "leftdis", "N", "NPP", "NC" }, new string[] { "leftdis", "A", "ADJ", "ADJWH" }, new string[] { "left" } }; nonTerminalInfo["MWCL"] = new string[][] { new string[] { "leftdis", "CL", "CLS", "CLR", "CLO" }, new string[] { "right" } }; nonTerminalInfo["MWADV"] = new string[][] { new string[] { "left", "P" }, new string[] { "leftdis", "ADV", "ADVWH" }, new string[] { "left" } }; nonTerminalInfo["MWI"] = new string[][] { new string[] { "leftdis", "N", "NPP", "NC" }, new string[] { "leftdis", "ADV", "ADVWH" }, new string[] { "left", "P" }, new string[] { "left" } }; nonTerminalInfo["MWET"] = new string[][] { new string[] { "left", "ET" }, new string[] { "leftdis", "N", "NPP", "NC" }, new string[] { "left" } }; //TODO: wsg2011: For phrasal nodes that lacked a label. nonTerminalInfo[FrenchXMLTreeReader.MissingPhrasal] = new string[][] { new string[] { "left" } }; }
public SpanishHeadFinder(ITreebankLanguagePack tlp) : base(tlp) { nonTerminalInfo = Generics.NewHashMap(); // "sentence" string[][] rootRules = new string[][] { new string[] { "right", "grup.verb", "s.a", "sn" }, new string[] { "left", "S" }, new string[] { "right", "sadv", "grup.adv", "neg", "interjeccio", "i", "sp", "grup.prep" }, InsertVerbs(new string[] { "rightdis" }, new string[] { "nc0s000", "nc0p000", "nc00000", "np00000", "rg", "rn" }) }; nonTerminalInfo[tlp.StartSymbol()] = rootRules; nonTerminalInfo["S"] = rootRules; nonTerminalInfo["sentence"] = rootRules; nonTerminalInfo["inc"] = rootRules; // adjectival phrases string[][] adjectivePhraseRules = new string[][] { new string[] { "leftdis", "grup.a", "s.a", "spec" } }; nonTerminalInfo["s.a"] = adjectivePhraseRules; nonTerminalInfo["sa"] = adjectivePhraseRules; nonTerminalInfo["grup.a"] = new string[][] { new string[] { "rightdis", "aq0000", "ao0000" }, InsertVerbs(new string[] { "right" }, new string[] { }), new string[] { "right", "rg", "rn" } }; // adverbial phrases nonTerminalInfo["sadv"] = new string[][] { new string[] { "left", "grup.adv", "sadv" } }; nonTerminalInfo["grup.adv"] = new string[][] { new string[] { "left", "conj" }, new string[] { "rightdis", "rg", "rn", "neg", "grup.adv" }, new string[] { "rightdis", "pr000000", "pi000000", "nc0s000", "nc0p000", "nc00000", "np00000" } }; nonTerminalInfo["neg"] = new string[][] { new string[] { "leftdis", "rg", "rn" } }; // noun phrases nonTerminalInfo["sn"] = new string[][] { new string[] { "leftdis", "nc0s000", "nc0p000", "nc00000" }, new string[] { "left", "grup.nom", "grup.w", "grup.z", "sn" }, new string[] { "leftdis", "spec" } }; nonTerminalInfo["grup.nom"] = new string[][] { new string[] { "leftdis", "nc0s000", "nc0p000", "nc00000", "np00000", "w", "grup.w" }, new string[] { "leftdis", "pi000000", "pd000000" }, new string[] { "left", "grup.nom", "sp" }, new string[] { "leftdis", "pn000000", "aq0000", "ao0000" }, new string[] { "left", "grup.a", "i", "grup.verb" }, new string[] { "leftdis", "grup.adv" } }; // verb phrases nonTerminalInfo["grup.verb"] = new string[][] { InsertVerbs(new string[] { "left" }, new string[] { }) }; nonTerminalInfo["infinitiu"] = new string[][] { InsertVerbs(new string[] { "left" }, new string[] { "infinitiu" }) }; nonTerminalInfo["gerundi"] = new string[][] { new string[] { "left", "vmg0000", "vag0000", "vsg0000", "gerundi" } }; nonTerminalInfo["participi"] = new string[][] { new string[] { "left", "aq", "vmp0000", "vap0000", "vsp0000", "grup.a" } }; // specifiers nonTerminalInfo["spec"] = new string[][] { new string[] { "left", "conj", "spec" }, new string[] { "leftdis", "da0000", "de0000", "di0000", "dd0000", "dp0000", "dn0000", "dt0000" }, new string[] { "leftdis", "z0", "grup.z" }, new string[] { "left", "rg", "rn" }, new string[] { "leftdis", "pt000000", "pe000000", "pd000000", "pp000000", "pi000000", "pn000000", "pr000000" }, new string[] { "left", "grup.adv", "w" } }; // entre A y B // etc. nonTerminalInfo["conj"] = new string[][] { new string[] { "leftdis", "cs", "cc" }, new string[] { "leftdis", "grup.cc", "grup.cs" }, new string[] { "left", "sp" } }; nonTerminalInfo["interjeccio"] = new string[][] { new string[] { "leftdis", "i", "nc0s000", "nc0p000", "nc00000", "np00000", "pi000000" }, new string[] { "left", "interjeccio" } }; nonTerminalInfo["relatiu"] = new string[][] { new string[] { "left", "pr000000" } }; // prepositional phrases nonTerminalInfo["sp"] = new string[][] { new string[] { "left", "prep", "sp" } }; nonTerminalInfo["prep"] = new string[][] { new string[] { "leftdis", "sp000", "prep", "grup.prep" } }; // custom categories nonTerminalInfo["grup.cc"] = new string[][] { new string[] { "left", "cs" } }; nonTerminalInfo["grup.cs"] = new string[][] { new string[] { "left", "cs" } }; nonTerminalInfo["grup.prep"] = new string[][] { new string[] { "left", "prep", "grup.prep", "s" } }; nonTerminalInfo["grup.pron"] = new string[][] { new string[] { "rightdis", "px000000" } }; nonTerminalInfo["grup.w"] = new string[][] { new string[] { "right", "w" }, new string[] { "leftdis", "z0" }, new string[] { "left" } }; nonTerminalInfo["grup.z"] = new string[][] { new string[] { "leftdis", "z0", "zu", "zp", "zd", "zm" }, new string[] { "right", "nc0s000", "nc0p000", "nc00000", "np00000" } }; }
protected internal ArabicHeadFinder(ITreebankLanguagePack tlp, ArabicHeadFinder.TagSet tagSet) : base(tlp) { //this(new ArabicTreebankLanguagePack(), tagSet); this.tagSet = tagSet; //log.info("##testing: noun tag is " + tagSet.noun()); nonTerminalInfo = Generics.NewHashMap(); nonTerminalInfo["NX"] = new string[][] { new string[] { "left", "DT", "DTNN", "DTNNS", "DTNNP", "DTNNPS", "DTJJ", "DTNOUN_QUANT", "NOUN_QUANT", "MWNP" } }; nonTerminalInfo["ADJP"] = new string[][] { new string[] { "rightdis", tagSet.Adj(), "DTJJ", "ADJ_NUM", "DTADJ_NUM", "JJR", "DTJJR", "MWADJP" }, new string[] { "right", "ADJP", "VN", tagSet.Noun(), "MWNP", "NNP", "NNPS", "NNS", "DTNN", "DTNNS" , "DTNNP", "DTNNPS", "DTJJ", "DTNOUN_QUANT", "NOUN_QUANT" }, new string[] { "right", "RB", "MWADVP", "CD", "DTRB", "DTCD" }, new string[] { "right", "DT" } }; // sometimes right, sometimes left headed?? nonTerminalInfo["MWADJP"] = new string[][] { new string[] { "rightdis", tagSet.Adj(), "DTJJ", "ADJ_NUM", "DTADJ_NUM", "JJR", "DTJJR" }, new string[] { "right", tagSet.Noun(), "MWNP", "NNP", "NNPS", "NNS", "DTNN", "DTNNS", "DTNNP", "DTNNPS", "DTJJ", "DTNOUN_QUANT", "NOUN_QUANT" }, new string[] { "right", "RB", "MWADVP", "CD", "DTRB", "DTCD" }, new string[] { "right", "DT" } }; // sometimes right, sometimes left headed?? nonTerminalInfo["ADVP"] = new string[][] { new string[] { "left", "WRB", "RB", "MWADVP", "ADVP", "WHADVP", "DTRB" }, new string[] { "left", "CD", "RP", tagSet.Noun(), "MWNP", "CC", "MWCONJP", tagSet.Adj(), "MWADJP", "DTJJ", "ADJ_NUM", "DTADJ_NUM" , "IN", "MWPP", "NP", "NNP", "NOFUNC", "DTRP", "DTNN", "DTNNP", "DTNNPS", "DTNNS", "DTJJ", "DTNOUN_QUANT", "NOUN_QUANT" } }; // NNP is a gerund that they called an unknown (=NNP, believe it or not...) nonTerminalInfo["MWADVP"] = new string[][] { new string[] { "left", "WRB", "RB", "ADVP", "WHADVP", "DTRB" }, new string[] { "left", "CD", "RP", tagSet.Noun(), "MWNP", "CC", "MWCONJP", tagSet.Adj(), "MWADJP", "DTJJ", "ADJ_NUM", "DTADJ_NUM", "IN" , "MWPP", "NP", "NNP", "NOFUNC", "DTRP", "DTNN", "DTNNP", "DTNNPS", "DTNNS", "DTJJ", "DTNOUN_QUANT", "NOUN_QUANT" } }; // NNP is a gerund that they called an unknown (=NNP, believe it or not...) nonTerminalInfo["CONJP"] = new string[][] { new string[] { "right", "IN", "RB", "MWADVP", tagSet.Noun(), "MWNP", "NNS", "NNP", "NNPS", "DTRB", "DTNN", "DTNNS", "DTNNP", "DTNNPS", "DTNOUN_QUANT", "NOUN_QUANT" } }; nonTerminalInfo["MWCONJP"] = new string[][] { new string[] { "right", "IN", "RB", "MWADVP", tagSet.Noun(), "MWNP", "NNS", "NNP", "NNPS", "DTRB", "DTNN", "DTNNS", "DTNNP", "DTNNPS", "DTNOUN_QUANT", "NOUN_QUANT" } }; nonTerminalInfo["FRAG"] = new string[][] { new string[] { "left", tagSet.Noun(), "MWNP", "NNPS", "NNP", "NNS", "DTNN", "DTNNS", "DTNNP", "DTNNPS", "DTNOUN_QUANT", "NOUN_QUANT" }, new string[] { "left", "VBP" } }; nonTerminalInfo["MWFRAG"] = new string[][] { new string[] { "left", tagSet.Noun(), "MWNP", "NNPS", "NNP", "NNS", "DTNN", "DTNNS", "DTNNP", "DTNNPS", "DTNOUN_QUANT", "NOUN_QUANT" }, new string[] { "left", "VBP" } }; nonTerminalInfo["INTJ"] = new string[][] { new string[] { "left", "RP", "UH", "DTRP" } }; nonTerminalInfo["LST"] = new string[][] { new string[] { "left" } }; nonTerminalInfo["NAC"] = new string[][] { new string[] { "left", "NP", "SBAR", "PP", "MWP", "ADJP", "S", "PRT", "UCP" }, new string[] { "left", "ADVP" } }; // note: maybe CC, RB should be the heads? nonTerminalInfo["NP"] = new string[][] { new string[] { "left", tagSet.Noun(), "MWNP", tagSet.DetPlusNoun(), "NNS", "NNP", "NNPS", "NP", "PRP", "WHNP", "QP", "WP", "DTNNS", "DTNNPS", "DTNNP", "NOFUNC", "NO_FUNC", "DTNOUN_QUANT", "NOUN_QUANT" }, new string[] { "left", tagSet.Adj(), "MWADJP", "DTJJ", "JJR", "DTJJR", "ADJ_NUM", "DTADJ_NUM" }, new string[] { "right", "CD", "DTCD" }, new string[] { "left", "PRP$" }, new string[] { "right", "DT" } }; // should the JJ rule be left or right? nonTerminalInfo["MWNP"] = new string[][] { new string[] { "left", tagSet.Noun(), "MWNP", tagSet.DetPlusNoun(), "NNS", "NNP", "NNPS", "PRP", "QP", "WP", "DTNNS", "DTNNPS", "DTNNP", "DTNOUN_QUANT", "NOUN_QUANT" }, new string[] { "left", tagSet .Adj(), "MWADJP", "DTJJ", "JJR", "DTJJR", "ADJ_NUM", "DTADJ_NUM" }, new string[] { "right", "CD", "DTCD" }, new string[] { "left", "PRP$" }, new string[] { "right", "DT" } }; // should the JJ rule be left or right? nonTerminalInfo["PP"] = new string[][] { new string[] { "left", tagSet.Prep(), "MWPP", "PP", "MWP", "PRT", "X" }, new string[] { "left", "NNP", "RP", tagSet.Noun(), "MWNP" }, new string[] { "left", "NP" } }; // NN is for a mistaken "fy", and many wsT nonTerminalInfo["MWPP"] = new string[][] { new string[] { "left", tagSet.Prep(), "PP", "MWP", "PRT", "X" }, new string[] { "left", "NNP", "RP", tagSet.Noun(), "MWNP" }, new string[] { "left", "NP" } }; // NN is for a mistaken "fy", and many wsT nonTerminalInfo["PRN"] = new string[][] { new string[] { "left", "NP" } }; // don't get PUNC nonTerminalInfo["MWPRN"] = new string[][] { new string[] { "left", "IN" } }; // don't get PUNC nonTerminalInfo["PRT"] = new string[][] { new string[] { "left", "RP", "PRT", "IN", "DTRP" } }; nonTerminalInfo["QP"] = new string[][] { new string[] { "right", "CD", "DTCD", tagSet.Noun(), "MWNP", tagSet.Adj(), "MWADJP", "NNS", "NNP", "NNPS", "DTNN", "DTNNS", "DTNNP", "DTNNPS", "DTJJ", "DTNOUN_QUANT", "NOUN_QUANT" } }; nonTerminalInfo["S"] = new string[][] { new string[] { "left", "VP", "MWVP", "S" }, new string[] { "right", "PP", "MWP", "ADVP", "SBAR", "UCP", "ADJP" } }; // really important to put in -PRD sensitivity here! nonTerminalInfo["MWS"] = new string[][] { new string[] { "left", "VP", "MWVP", "S" }, new string[] { "right", "PP", "MWP", "ADVP", "SBAR", "UCP", "ADJP" } }; // really important to put in -PRD sensitivity here! nonTerminalInfo["SQ"] = new string[][] { new string[] { "left", "VP", "MWVP", "PP", "MWP" } }; // to be principled, we need -PRD sensitivity here too. nonTerminalInfo["SBAR"] = new string[][] { new string[] { "left", "WHNP", "WHADVP", "WRB", "RP", "IN", "SBAR", "CC", "MWCONJP", "WP", "WHPP", "ADVP", "PRT", "RB", "MWADVP", "X", "DTRB", "DTRP" }, new string[] { "left", tagSet.Noun(), "MWNP", "NNP", "NNS", "NNPS", "DTNN", "DTNNS", "DTNNP", "DTNNPS", "DTNOUN_QUANT", "NOUN_QUANT" }, new string[] { "left", "S" } }; nonTerminalInfo["MWSBAR"] = new string[][] { new string[] { "left", "WHNP", "WHADVP", "WRB", "RP", "IN", "SBAR", "CC", "MWCONJP", "WP", "WHPP", "ADVP", "PRT", "RB", "MWADVP", "X", "DTRB", "DTRP" }, new string[] { "left", tagSet.Noun(), "MWNP" , "NNP", "NNS", "NNPS", "DTNN", "DTNNS", "DTNNP", "DTNNPS", "DTNOUN_QUANT", "NOUN_QUANT" }, new string[] { "left", "S" } }; nonTerminalInfo["SBARQ"] = new string[][] { new string[] { "left", "WHNP", "WHADVP", "RP", "IN", "SBAR", "CC", "MWCONJP", "WP", "WHPP", "ADVP", "PRT", "RB", "MWADVP", "X" }, new string[] { "left", tagSet.Noun(), "MWNP", "NNP", "NNS", "NNPS", "DTNN", "DTNNS", "DTNNP", "DTNNPS", "DTNOUN_QUANT", "NOUN_QUANT" }, new string[] { "left", "S" } }; // copied from SBAR rule -- look more closely when there's time nonTerminalInfo["UCP"] = new string[][] { new string[] { "left" } }; nonTerminalInfo["VP"] = new string[][] { new string[] { "left", "VBD", "VBN", "VBP", "VBG", "DTVBG", "VN", "DTVN", "VP", "RB", "MWADVP", "X", "VB" }, new string[] { "left", "IN" }, new string[] { "left", "NNP", tagSet.Noun(), "MWNP", "DTNN", "DTNNP", "DTNNPS", "DTNNS", "DTNOUN_QUANT", "NOUN_QUANT" } }; // exclude RP because we don't want negation markers as heads -- no useful information? nonTerminalInfo["MWVP"] = new string[][] { new string[] { "left", "VBD", "VBN", "VBP", "VBG", "DTVBG", "VN", "DTVN", "VP", "MWVP", "RB", "MWADVP", "X", "VB" }, new string[] { "left", "IN" }, new string[] { "left", "NNP", tagSet.Noun(), "MWNP" , "DTNN", "DTNNP", "DTNNPS", "DTNNS", "DTNOUN_QUANT", "NOUN_QUANT" } }; // exclude RP because we don't want negation markers as heads -- no useful information? //also, RB is used as gerunds nonTerminalInfo["WHADVP"] = new string[][] { new string[] { "left", "WRB", "WP" }, new string[] { "right", "CC", "MWCONJP" }, new string[] { "left", "IN" } }; nonTerminalInfo["WHNP"] = new string[][] { new string[] { "right", "WP" } }; nonTerminalInfo["WHPP"] = new string[][] { new string[] { "left", "IN", "MWPP", "RB", "MWADVP" } }; nonTerminalInfo["X"] = new string[][] { new string[] { "left" } }; //Added by Mona 12/7/04 for the newly created DT nonterm cat nonTerminalInfo["DTNN"] = new string[][] { new string[] { "right" } }; nonTerminalInfo["DTNNS"] = new string[][] { new string[] { "right" } }; nonTerminalInfo["DTNNP"] = new string[][] { new string[] { "right" } }; nonTerminalInfo["DTNNPS"] = new string[][] { new string[] { "right" } }; nonTerminalInfo["DTJJ"] = new string[][] { new string[] { "right" } }; nonTerminalInfo["DTRP"] = new string[][] { new string[] { "right" } }; nonTerminalInfo["DTRB"] = new string[][] { new string[] { "right" } }; nonTerminalInfo["DTCD"] = new string[][] { new string[] { "right" } }; nonTerminalInfo["DTIN"] = new string[][] { new string[] { "right" } }; // stand-in dependency: nonTerminalInfo["EDITED"] = new string[][] { new string[] { "left" } }; nonTerminalInfo[tlp.StartSymbol()] = new string[][] { new string[] { "left" } }; // one stray SINV in the training set...garbage head rule here. nonTerminalInfo["SINV"] = new string[][] { new string[] { "left", "ADJP", "VP" } }; }
/// <summary>Binarizes the tree according to options set up in the constructor.</summary> /// <remarks> /// Binarizes the tree according to options set up in the constructor. /// Does the whole tree by calling itself recursively. /// </remarks> /// <param name="t"> /// A tree to be binarized. The non-leaf nodes must already have /// CategoryWordTag labels, with heads percolated. /// </param> /// <returns>A binary tree.</returns> public virtual Tree TransformTree(Tree t) { // handle null if (t == null) { return(null); } string cat = t.Label().Value(); // handle words if (t.IsLeaf()) { ILabel label = new Word(cat); //new CategoryWordTag(cat,cat,""); return(tf.NewLeaf(label)); } // handle tags if (t.IsPreTerminal()) { Tree childResult = TransformTree(t.GetChild(0)); string word = childResult.Value(); // would be nicer if Word/CWT ?? IList <Tree> newChildren = new List <Tree>(1); newChildren.Add(childResult); return(tf.NewTreeNode(new CategoryWordTag(cat, word, cat), newChildren)); } // handle categories Tree headChild = hf.DetermineHead(t); /* * System.out.println("### finding head for:"); * t.pennPrint(); * System.out.println("### its head is:"); * headChild.pennPrint(); */ if (headChild == null && !t.Label().Value().StartsWith(tlp.StartSymbol())) { log.Info("### No head found for:"); t.PennPrint(); } int headNum = -1; Tree[] kids = t.Children(); IList <Tree> newChildren_1 = new List <Tree>(kids.Length); for (int childNum = 0; childNum < kids.Length; childNum++) { Tree child = kids[childNum]; Tree childResult = TransformTree(child); // recursive call if (child == headChild) { headNum = childNum; } newChildren_1.Add(childResult); } Tree result; // XXXXX UPTO HERE!!! ALMOST DONE!!! if (t.Label().Value().StartsWith(tlp.StartSymbol())) { // handle the ROOT tree properly /* * //CategoryWordTag label = (CategoryWordTag) t.label(); * // binarize without the last kid and then add it back to the top tree * Tree lastKid = (Tree)newChildren.remove(newChildren.size()-1); * Tree tempTree = tf.newTreeNode(label, newChildren); * tempTree = binarizeLocalTree(tempTree, headNum, result.head); * newChildren = tempTree.getChildrenAsList(); * newChildren.add(lastKid); // add it back */ result = tf.NewTreeNode(t.Label(), newChildren_1); } else { // label shouldn't have changed // CategoryWordTag headLabel = (CategoryWordTag) headChild.label(); string word = ((IHasWord)headChild.Label()).Word(); string tag = ((IHasTag)headChild.Label()).Tag(); ILabel label = new CategoryWordTag(cat, word, tag); result = tf.NewTreeNode(label, newChildren_1); // cdm Mar 2005: invent a head so I don't have to rewrite all this // code, but with the removal of TreeHeadPair, some of the rest of // this should probably be rewritten too to not use this head variable TaggedWord head = new TaggedWord(word, tag); result = BinarizeLocalTree(result, headNum, head); } return(result); }
public virtual string RootSymbol() { return(tlp.StartSymbol()); }
public GrammaticalFunctionTreeNormalizer(ITreebankLanguagePack tlp, int nodeCleanup) { this.tlp = tlp; this.nodeCleanup = nodeCleanup; root = tlp.StartSymbol(); }