Esempio n. 1
0
 public ArabicHeadFinder(ArabicHeadFinder.TagSet tagSet)
     : this(tagSet.LangPack(), tagSet)
 {
 }
Esempio n. 2
0
 protected internal ArabicHeadFinder(ITreebankLanguagePack tlp, ArabicHeadFinder.TagSet tagSet)
     : base(tlp)
 {
     //this(new ArabicTreebankLanguagePack(), tagSet);
     this.tagSet = tagSet;
     //log.info("##testing: noun tag is " + tagSet.noun());
     nonTerminalInfo         = Generics.NewHashMap();
     nonTerminalInfo["NX"]   = new string[][] { new string[] { "left", "DT", "DTNN", "DTNNS", "DTNNP", "DTNNPS", "DTJJ", "DTNOUN_QUANT", "NOUN_QUANT", "MWNP" } };
     nonTerminalInfo["ADJP"] = new string[][] { new string[] { "rightdis", tagSet.Adj(), "DTJJ", "ADJ_NUM", "DTADJ_NUM", "JJR", "DTJJR", "MWADJP" }, new string[] { "right", "ADJP", "VN", tagSet.Noun(), "MWNP", "NNP", "NNPS", "NNS", "DTNN", "DTNNS"
                                                                                                                                                                    , "DTNNP", "DTNNPS", "DTJJ", "DTNOUN_QUANT", "NOUN_QUANT" }, new string[] { "right", "RB", "MWADVP", "CD", "DTRB", "DTCD" }, new string[] { "right", "DT" } };
     // sometimes right, sometimes left headed??
     nonTerminalInfo["MWADJP"] = new string[][] { new string[] { "rightdis", tagSet.Adj(), "DTJJ", "ADJ_NUM", "DTADJ_NUM", "JJR", "DTJJR" }, new string[] { "right", tagSet.Noun(), "MWNP", "NNP", "NNPS", "NNS", "DTNN", "DTNNS", "DTNNP", "DTNNPS",
                                                                                                                                                            "DTJJ", "DTNOUN_QUANT", "NOUN_QUANT" }, new string[] { "right", "RB", "MWADVP", "CD", "DTRB", "DTCD" }, new string[] { "right", "DT" } };
     // sometimes right, sometimes left headed??
     nonTerminalInfo["ADVP"] = new string[][] { new string[] { "left", "WRB", "RB", "MWADVP", "ADVP", "WHADVP", "DTRB" }, new string[] { "left", "CD", "RP", tagSet.Noun(), "MWNP", "CC", "MWCONJP", tagSet.Adj(), "MWADJP", "DTJJ", "ADJ_NUM", "DTADJ_NUM"
                                                                                                                                         , "IN", "MWPP", "NP", "NNP", "NOFUNC", "DTRP", "DTNN", "DTNNP", "DTNNPS", "DTNNS", "DTJJ", "DTNOUN_QUANT", "NOUN_QUANT" } };
     // NNP is a gerund that they called an unknown (=NNP, believe it or not...)
     nonTerminalInfo["MWADVP"] = new string[][] { new string[] { "left", "WRB", "RB", "ADVP", "WHADVP", "DTRB" }, new string[] { "left", "CD", "RP", tagSet.Noun(), "MWNP", "CC", "MWCONJP", tagSet.Adj(), "MWADJP", "DTJJ", "ADJ_NUM", "DTADJ_NUM", "IN"
                                                                                                                                 , "MWPP", "NP", "NNP", "NOFUNC", "DTRP", "DTNN", "DTNNP", "DTNNPS", "DTNNS", "DTJJ", "DTNOUN_QUANT", "NOUN_QUANT" } };
     // NNP is a gerund that they called an unknown (=NNP, believe it or not...)
     nonTerminalInfo["CONJP"]   = new string[][] { new string[] { "right", "IN", "RB", "MWADVP", tagSet.Noun(), "MWNP", "NNS", "NNP", "NNPS", "DTRB", "DTNN", "DTNNS", "DTNNP", "DTNNPS", "DTNOUN_QUANT", "NOUN_QUANT" } };
     nonTerminalInfo["MWCONJP"] = new string[][] { new string[] { "right", "IN", "RB", "MWADVP", tagSet.Noun(), "MWNP", "NNS", "NNP", "NNPS", "DTRB", "DTNN", "DTNNS", "DTNNP", "DTNNPS", "DTNOUN_QUANT", "NOUN_QUANT" } };
     nonTerminalInfo["FRAG"]    = new string[][] { new string[] { "left", tagSet.Noun(), "MWNP", "NNPS", "NNP", "NNS", "DTNN", "DTNNS", "DTNNP", "DTNNPS", "DTNOUN_QUANT", "NOUN_QUANT" }, new string[] { "left", "VBP" } };
     nonTerminalInfo["MWFRAG"]  = new string[][] { new string[] { "left", tagSet.Noun(), "MWNP", "NNPS", "NNP", "NNS", "DTNN", "DTNNS", "DTNNP", "DTNNPS", "DTNOUN_QUANT", "NOUN_QUANT" }, new string[] { "left", "VBP" } };
     nonTerminalInfo["INTJ"]    = new string[][] { new string[] { "left", "RP", "UH", "DTRP" } };
     nonTerminalInfo["LST"]     = new string[][] { new string[] { "left" } };
     nonTerminalInfo["NAC"]     = new string[][] { new string[] { "left", "NP", "SBAR", "PP", "MWP", "ADJP", "S", "PRT", "UCP" }, new string[] { "left", "ADVP" } };
     // note: maybe CC, RB should be the heads?
     nonTerminalInfo["NP"] = new string[][] { new string[] { "left", tagSet.Noun(), "MWNP", tagSet.DetPlusNoun(), "NNS", "NNP", "NNPS", "NP", "PRP", "WHNP", "QP", "WP", "DTNNS", "DTNNPS", "DTNNP", "NOFUNC", "NO_FUNC", "DTNOUN_QUANT", "NOUN_QUANT" }, new string[] { "left", tagSet.Adj(), "MWADJP", "DTJJ", "JJR", "DTJJR", "ADJ_NUM", "DTADJ_NUM" }, new string[] { "right", "CD", "DTCD" }, new string[] { "left", "PRP$" }, new string[] { "right", "DT" } };
     // should the JJ rule be left or right?
     nonTerminalInfo["MWNP"] = new string[][] { new string[] { "left", tagSet.Noun(), "MWNP", tagSet.DetPlusNoun(), "NNS", "NNP", "NNPS", "PRP", "QP", "WP", "DTNNS", "DTNNPS", "DTNNP", "DTNOUN_QUANT", "NOUN_QUANT" }, new string[] { "left", tagSet
                                                                                                                                                                                                                                        .Adj(), "MWADJP", "DTJJ", "JJR", "DTJJR", "ADJ_NUM", "DTADJ_NUM" }, new string[] { "right", "CD", "DTCD" }, new string[] { "left", "PRP$" }, new string[] { "right", "DT" } };
     // should the JJ rule be left or right?
     nonTerminalInfo["PP"] = new string[][] { new string[] { "left", tagSet.Prep(), "MWPP", "PP", "MWP", "PRT", "X" }, new string[] { "left", "NNP", "RP", tagSet.Noun(), "MWNP" }, new string[] { "left", "NP" } };
     // NN is for a mistaken "fy", and many wsT
     nonTerminalInfo["MWPP"] = new string[][] { new string[] { "left", tagSet.Prep(), "PP", "MWP", "PRT", "X" }, new string[] { "left", "NNP", "RP", tagSet.Noun(), "MWNP" }, new string[] { "left", "NP" } };
     // NN is for a mistaken "fy", and many wsT
     nonTerminalInfo["PRN"] = new string[][] { new string[] { "left", "NP" } };
     // don't get PUNC
     nonTerminalInfo["MWPRN"] = new string[][] { new string[] { "left", "IN" } };
     // don't get PUNC
     nonTerminalInfo["PRT"] = new string[][] { new string[] { "left", "RP", "PRT", "IN", "DTRP" } };
     nonTerminalInfo["QP"]  = new string[][] { new string[] { "right", "CD", "DTCD", tagSet.Noun(), "MWNP", tagSet.Adj(), "MWADJP", "NNS", "NNP", "NNPS", "DTNN", "DTNNS", "DTNNP", "DTNNPS", "DTJJ", "DTNOUN_QUANT", "NOUN_QUANT" } };
     nonTerminalInfo["S"]   = new string[][] { new string[] { "left", "VP", "MWVP", "S" }, new string[] { "right", "PP", "MWP", "ADVP", "SBAR", "UCP", "ADJP" } };
     // really important to put in -PRD sensitivity here!
     nonTerminalInfo["MWS"] = new string[][] { new string[] { "left", "VP", "MWVP", "S" }, new string[] { "right", "PP", "MWP", "ADVP", "SBAR", "UCP", "ADJP" } };
     // really important to put in -PRD sensitivity here!
     nonTerminalInfo["SQ"] = new string[][] { new string[] { "left", "VP", "MWVP", "PP", "MWP" } };
     // to be principled, we need -PRD sensitivity here too.
     nonTerminalInfo["SBAR"] = new string[][] { new string[] { "left", "WHNP", "WHADVP", "WRB", "RP", "IN", "SBAR", "CC", "MWCONJP", "WP", "WHPP", "ADVP", "PRT", "RB", "MWADVP", "X", "DTRB", "DTRP" }, new string[] { "left", tagSet.Noun(), "MWNP",
                                                                                                                                                                                                                        "NNP", "NNS", "NNPS", "DTNN", "DTNNS", "DTNNP", "DTNNPS", "DTNOUN_QUANT", "NOUN_QUANT" }, new string[] { "left", "S" } };
     nonTerminalInfo["MWSBAR"] = new string[][] { new string[] { "left", "WHNP", "WHADVP", "WRB", "RP", "IN", "SBAR", "CC", "MWCONJP", "WP", "WHPP", "ADVP", "PRT", "RB", "MWADVP", "X", "DTRB", "DTRP" }, new string[] { "left", tagSet.Noun(), "MWNP"
                                                                                                                                                                                                                          , "NNP", "NNS", "NNPS", "DTNN", "DTNNS", "DTNNP", "DTNNPS", "DTNOUN_QUANT", "NOUN_QUANT" }, new string[] { "left", "S" } };
     nonTerminalInfo["SBARQ"] = new string[][] { new string[] { "left", "WHNP", "WHADVP", "RP", "IN", "SBAR", "CC", "MWCONJP", "WP", "WHPP", "ADVP", "PRT", "RB", "MWADVP", "X" }, new string[] { "left", tagSet.Noun(), "MWNP", "NNP", "NNS", "NNPS",
                                                                                                                                                                                                  "DTNN", "DTNNS", "DTNNP", "DTNNPS", "DTNOUN_QUANT", "NOUN_QUANT" }, new string[] { "left", "S" } };
     // copied from SBAR rule -- look more closely when there's time
     nonTerminalInfo["UCP"] = new string[][] { new string[] { "left" } };
     nonTerminalInfo["VP"]  = new string[][] { new string[] { "left", "VBD", "VBN", "VBP", "VBG", "DTVBG", "VN", "DTVN", "VP", "RB", "MWADVP", "X", "VB" }, new string[] { "left", "IN" }, new string[] { "left", "NNP", tagSet.Noun(), "MWNP", "DTNN",
                                                                                                                                                                                                          "DTNNP", "DTNNPS", "DTNNS", "DTNOUN_QUANT", "NOUN_QUANT" } };
     // exclude RP because we don't want negation markers as heads -- no useful information?
     nonTerminalInfo["MWVP"] = new string[][] { new string[] { "left", "VBD", "VBN", "VBP", "VBG", "DTVBG", "VN", "DTVN", "VP", "MWVP", "RB", "MWADVP", "X", "VB" }, new string[] { "left", "IN" }, new string[] { "left", "NNP", tagSet.Noun(), "MWNP"
                                                                                                                                                                                                                   , "DTNN", "DTNNP", "DTNNPS", "DTNNS", "DTNOUN_QUANT", "NOUN_QUANT" } };
     // exclude RP because we don't want negation markers as heads -- no useful information?
     //also, RB is used as gerunds
     nonTerminalInfo["WHADVP"] = new string[][] { new string[] { "left", "WRB", "WP" }, new string[] { "right", "CC", "MWCONJP" }, new string[] { "left", "IN" } };
     nonTerminalInfo["WHNP"]   = new string[][] { new string[] { "right", "WP" } };
     nonTerminalInfo["WHPP"]   = new string[][] { new string[] { "left", "IN", "MWPP", "RB", "MWADVP" } };
     nonTerminalInfo["X"]      = new string[][] { new string[] { "left" } };
     //Added by Mona 12/7/04 for the newly created DT nonterm cat
     nonTerminalInfo["DTNN"]   = new string[][] { new string[] { "right" } };
     nonTerminalInfo["DTNNS"]  = new string[][] { new string[] { "right" } };
     nonTerminalInfo["DTNNP"]  = new string[][] { new string[] { "right" } };
     nonTerminalInfo["DTNNPS"] = new string[][] { new string[] { "right" } };
     nonTerminalInfo["DTJJ"]   = new string[][] { new string[] { "right" } };
     nonTerminalInfo["DTRP"]   = new string[][] { new string[] { "right" } };
     nonTerminalInfo["DTRB"]   = new string[][] { new string[] { "right" } };
     nonTerminalInfo["DTCD"]   = new string[][] { new string[] { "right" } };
     nonTerminalInfo["DTIN"]   = new string[][] { new string[] { "right" } };
     // stand-in dependency:
     nonTerminalInfo["EDITED"]          = new string[][] { new string[] { "left" } };
     nonTerminalInfo[tlp.StartSymbol()] = new string[][] { new string[] { "left" } };
     // one stray SINV in the training set...garbage head rule here.
     nonTerminalInfo["SINV"] = new string[][] { new string[] { "left", "ADJP", "VP" } };
 }