Esempi in C# (CSharp) per ITreebankLanguagePack.PunctuationTags

Linguaggio di programmazione: C# (CSharp)

Metodo/funzione: PunctuationTags

Esempi su hotexamples.com: 2

ITreebankLanguagePack.PunctuationTags in C# (CSharp): 2 esempi trovati. Questi sono i migliori esempi reali in C# (CSharp) per ITreebankLanguagePack.PunctuationTags, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

BasicCategory(19)

StartSymbol(8)

GrammaticalStructureFactory(5)

CategoryAndFunction(3)

GetTokenizerFactory(3)

SentenceFinalPunctuationWords(3)

PunctuationWordRejectFilter(3)

PunctuationTags(2)

IsStartSymbol(2)

IsSentenceFinalPunctuationTag(2)

IsPunctuationWord(2)

IsEvalBIgnoredPunctuationTag(2)

GetEncoding(2)

PunctuationTagAcceptFilter(1)

IsPunctuationTag(1)

SupportsGrammaticalStructures(1)

TreeReaderFactory(1)

TreebankFileExtension(1)

Esempio n. 1

Mostra file

 private ICollection <string> GetPunctuationTags()
 {
     if (tlp is PennTreebankLanguagePack)
     {
         // Hack for English: match punctuation tags used in Danqi's paper
         return(new HashSet <string>(Arrays.AsList("''", ",", ".", ":", "``", "-LRB-", "-RRB-")));
     }
     else
     {
         return(CollectionUtils.AsSet(tlp.PunctuationTags()));
     }
 }

Esempio n. 2

Mostra file

 public ModCollinsHeadFinder(ITreebankLanguagePack tlp)
     : base(tlp, tlp.PunctuationTags())
 {
     // avoid punctuation as head in final default rule
     nonTerminalInfo = Generics.NewHashMap();
     // This version from Collins' diss (1999: 236-238)
     // NNS, NN is actually sensible (money, etc.)!
     // QP early isn't; should prefer JJR NN RB
     // remove ADVP; it just shouldn't be there.
     // if two JJ, should take right one (e.g. South Korean)
     // nonTerminalInfo.put("ADJP", new String[][]{{"left", "NNS", "NN", "$", "QP"}, {"right", "JJ"}, {"left", "VBN", "VBG", "ADJP", "JJP", "JJR", "NP", "JJS", "DT", "FW", "RBR", "RBS", "SBAR", "RB"}});
     nonTerminalInfo["ADJP"] = new string[][] { new string[] { "left", "$" }, new string[] { "rightdis", "NNS", "NN", "JJ", "QP", "VBN", "VBG" }, new string[] { "left", "ADJP" }, new string[] { "rightdis", "JJP", "JJR", "JJS", "DT", "RB", "RBR",
                                                                                                                                                                                                  "CD", "IN", "VBD" }, new string[] { "left", "ADVP", "NP" } };
     nonTerminalInfo["JJP"] = new string[][] { new string[] { "left", "NNS", "NN", "$", "QP", "JJ", "VBN", "VBG", "ADJP", "JJP", "JJR", "NP", "JJS", "DT", "FW", "RBR", "RBS", "SBAR", "RB" } };
     // JJP is introduced for NML-like adjective phrases in Vadas' treebank; Chris wishes he hadn't used JJP which should be a POS-tag.
     // ADVP rule rewritten by Chris in Nov 2010 to be rightdis.  This is right! JJ.* is often head and rightmost.
     nonTerminalInfo["ADVP"] = new string[][] { new string[] { "left", "ADVP", "IN" }, new string[] { "rightdis", "RB", "RBR", "RBS", "JJ", "JJR", "JJS" }, new string[] { "rightdis", "RP", "DT", "NN", "CD", "NP", "VBN", "NNP", "CC", "FW", "NNS",
                                                                                                                                                                           "ADJP", "NML" } };
     nonTerminalInfo["CONJP"] = new string[][] { new string[] { "right", "CC", "RB", "IN" } };
     nonTerminalInfo["FRAG"]  = new string[][] { new string[] { "right" } };
     // crap
     nonTerminalInfo["INTJ"] = new string[][] { new string[] { "left" } };
     nonTerminalInfo["LST"]  = new string[][] { new string[] { "right", "LS", ":" } };
     // NML is head in: (NAC-LOC (NML San Antonio) (, ,) (NNP Texas))
     // TODO: NNP should be head (rare cases, could be ignored):
     //   (NAC (NML New York) (NNP Court) (PP of Appeals))
     //   (NAC (NML Prudential Insurance) (NNP Co.) (PP Of America))
     // Chris: This could maybe still do with more thought, but NAC is rare.
     nonTerminalInfo["NAC"] = new string[][] { new string[] { "left", "NN", "NNS", "NML", "NNP", "NNPS", "NP", "NAC", "EX", "$", "CD", "QP", "PRP", "VBG", "JJ", "JJS", "JJR", "ADJP", "JJP", "FW" } };
     // Added JJ to PP head table, since it is a head in several cases, e.g.:
     // (PP (JJ next) (PP to them))
     // When you have both JJ and IN daughters, it is invariably "such as" -- not so clear which should be head, but leave as IN
     // should prefer JJ? (PP (JJ such) (IN as) (NP (NN crocidolite)))  Michel thinks we should make JJ a head of PP
     // added SYM as used in new treebanks for symbols filling role of IN
     // Changed PP search to left -- just what you want for conjunction (and consistent with SemanticHeadFinder)
     nonTerminalInfo["PP"]  = new string[][] { new string[] { "right", "IN", "TO", "VBG", "VBN", "RP", "FW", "JJ", "SYM" }, new string[] { "left", "PP" } };
     nonTerminalInfo["PRN"] = new string[][] { new string[] { "left", "VP", "NP", "PP", "SQ", "S", "SINV", "SBAR", "ADJP", "JJP", "ADVP", "INTJ", "WHNP", "NAC", "VBP", "JJ", "NN", "NNP" } };
     nonTerminalInfo["PRT"] = new string[][] { new string[] { "right", "RP" } };
     // add '#' for pounds!!
     nonTerminalInfo["QP"] = new string[][] { new string[] { "left", "$", "IN", "NNS", "NN", "JJ", "CD", "PDT", "DT", "RB", "NCD", "QP", "JJR", "JJS" } };
     // reduced relative clause can be any predicate VP, ADJP, NP, PP.
     // For choosing between NP and PP, really need to know which one is temporal and to choose the other.
     // It's not clear ADVP needs to be in the list at all (delete?).
     nonTerminalInfo["RRC"] = new string[][] { new string[] { "left", "RRC" }, new string[] { "right", "VP", "ADJP", "JJP", "NP", "PP", "ADVP" } };
     // delete IN -- go for main part of sentence; add FRAG
     nonTerminalInfo["S"]     = new string[][] { new string[] { "left", "TO", "VP", "S", "FRAG", "SBAR", "ADJP", "JJP", "UCP", "NP" } };
     nonTerminalInfo["SBAR"]  = new string[][] { new string[] { "left", "WHNP", "WHPP", "WHADVP", "WHADJP", "IN", "DT", "S", "SQ", "SINV", "SBAR", "FRAG" } };
     nonTerminalInfo["SBARQ"] = new string[][] { new string[] { "left", "SQ", "S", "SINV", "SBARQ", "FRAG", "SBAR" } };
     // cdm: if you have 2 VP under an SINV, you should really take the 2nd as syntactic head, because the first is a topicalized VP complement of the second, but for now I didn't change this, since it didn't help parsing.  (If it were changed, it'd need to be also changed to the opposite in SemanticHeadFinder.)
     nonTerminalInfo["SINV"] = new string[][] { new string[] { "left", "VBZ", "VBD", "VBP", "VB", "MD", "VBN", "VP", "S", "SINV", "ADJP", "JJP", "NP" } };
     nonTerminalInfo["SQ"]   = new string[][] { new string[] { "left", "VBZ", "VBD", "VBP", "VB", "MD", "AUX", "AUXG", "VP", "SQ" } };
     // TODO: Should maybe put S before SQ for tag questions. Check.
     nonTerminalInfo["UCP"] = new string[][] { new string[] { "right" } };
     // below is weird!! Make 2 lists, one for good and one for bad heads??
     // VP: added AUX and AUXG to work with Charniak tags
     nonTerminalInfo["VP"]     = new string[][] { new string[] { "left", "TO", "VBD", "VBN", "MD", "VBZ", "VB", "VBG", "VBP", "VP", "AUX", "AUXG", "ADJP", "JJP", "NN", "NNS", "JJ", "NP", "NNP" } };
     nonTerminalInfo["WHADJP"] = new string[][] { new string[] { "left", "WRB", "WHADVP", "RB", "JJ", "ADJP", "JJP", "JJR" } };
     nonTerminalInfo["WHADVP"] = new string[][] { new string[] { "right", "WRB", "WHADVP" } };
     nonTerminalInfo["WHNP"]   = new string[][] { new string[] { "left", "WDT", "WP", "WP$", "WHADJP", "WHPP", "WHNP" } };
     nonTerminalInfo["WHPP"]   = new string[][] { new string[] { "right", "IN", "TO", "FW" } };
     nonTerminalInfo["X"]      = new string[][] { new string[] { "right", "S", "VP", "ADJP", "JJP", "NP", "SBAR", "PP", "X" } };
     nonTerminalInfo["NP"]     = new string[][] { new string[] { "rightdis", "NN", "NNP", "NNPS", "NNS", "NML", "NX", "POS", "JJR" }, new string[] { "left", "NP", "PRP" }, new string[] { "rightdis", "$", "ADJP", "JJP", "PRN", "FW" }, new string[] { "right"
                                                                                                                                                                                                                                                         , "CD" }, new string[] { "rightdis", "JJ", "JJS", "RB", "QP", "DT", "WDT", "RBR", "ADVP" } };
     nonTerminalInfo["NX"] = nonTerminalInfo["NP"];
     // TODO: seems JJ should be head of NML in this case:
     // (NP (NML (JJ former) (NML Red Sox) (JJ great)) (NNP Luis) (NNP Tiant)),
     // (although JJ great is tagged wrong)
     nonTerminalInfo["NML"]   = nonTerminalInfo["NP"];
     nonTerminalInfo["POSSP"] = new string[][] { new string[] { "right", "POS" } };
     /* HJT: Adding the following to deal with oddly formed data in (for example) the Brown corpus */
     nonTerminalInfo["ROOT"] = new string[][] { new string[] { "left", "S", "SQ", "SINV", "SBAR", "FRAG" } };
     // Just to handle trees which have TOP instead of ROOT at the root
     nonTerminalInfo["TOP"]  = nonTerminalInfo["ROOT"];
     nonTerminalInfo["TYPO"] = new string[][] { new string[] { "left", "NN", "NP", "NML", "NNP", "NNPS", "TO", "VBD", "VBN", "MD", "VBZ", "VB", "VBG", "VBP", "VP", "ADJP", "JJP", "FRAG" } };
     // for Brown (Roger)
     nonTerminalInfo["ADV"] = new string[][] { new string[] { "right", "RB", "RBR", "RBS", "FW", "ADVP", "TO", "CD", "JJR", "JJ", "IN", "NP", "NML", "JJS", "NN" } };
     // SWBD
     nonTerminalInfo["EDITED"] = new string[][] { new string[] { "left" } };
     // crap rule for Switchboard (if don't delete EDITED nodes)
     // in sw2756, a "VB". (copy "VP" to handle this problem, though should really fix it on reading)
     nonTerminalInfo["VB"]   = new string[][] { new string[] { "left", "TO", "VBD", "VBN", "MD", "VBZ", "VB", "VBG", "VBP", "VP", "AUX", "AUXG", "ADJP", "JJP", "NN", "NNS", "JJ", "NP", "NNP" } };
     nonTerminalInfo["META"] = new string[][] { new string[] { "left" } };
     // rule for OntoNotes, but maybe should just be deleted in TreeReader??
     nonTerminalInfo["XS"] = new string[][] { new string[] { "right", "IN" } };
 }