/// <summary> /// Called by determineHead and may be overridden in subclasses /// if special treatment is necessary for particular categories. /// </summary> /// <param name="t">The tre to determine the head daughter of</param> /// <param name="parent">The parent of t (or may be null)</param> /// <returns>The head daughter of t</returns> protected virtual Tree DetermineNonTrivialHead(Tree t, Tree parent) { Tree theHead = null; string motherCat = Tlp.BasicCategory(t.Label().Value()); if (motherCat.StartsWith("@")) { motherCat = motherCat.Substring(1); } // We know we have nonterminals underneath // (a bit of a Penn Treebank assumption, but). // Look at label. // a total special case.... // first look for POS tag at end // this appears to be redundant in the Collins case since the rule already would do that // Tree lastDtr = t.lastChild(); // if (tlp.basicCategory(lastDtr.label().value()).equals("POS")) { // theHead = lastDtr; // } else { string[][] how = null; var success = NonTerminalInfo.TryGetValue(motherCat, out how); Tree[] kids = t.Children(); if (!success) { if (DefaultRule != null) { return(TraverseLocate(kids, DefaultRule, true)); } else { throw new ArgumentException("No head rule defined for " + motherCat + " using " /*+ this.getClass()*/ + " in " + t); } } for (int i = 0; i < how.Length; i++) { bool lastResort = (i == how.Length - 1); theHead = TraverseLocate(kids, how[i], lastResort); if (theHead != null) { break; } } return(theHead); }
// only leaves NP-TMP and NP-ADV protected string CleanUpLabel(string label) { if (label == null) { return(""); // This shouldn't really happen, but can happen if there are unlabeled nodes further down a tree, as apparently happens in at least the 20100730 era American National Corpus } bool nptemp = TmpPattern.IsMatch(label); bool npadv = AdvPattern.IsMatch(label); label = Tlp.BasicCategory(label); if (nptemp) { label = label + "-TMP"; } else if (npadv) { label = label + "-ADV"; } return(label); }
public string Apply(string input) { return(tlp.BasicCategory(input)); }