private static void RemoveDeleteSplittersFromSplitters(ITreebankLanguagePack tlp, Options op) { if (op.trainOptions.deleteSplitters != null) { IList <string> deleted = new List <string>(); foreach (string del in op.trainOptions.deleteSplitters) { string baseDel = tlp.BasicCategory(del); bool checkBasic = del.Equals(baseDel); for (IEnumerator <string> it = op.trainOptions.splitters.GetEnumerator(); it.MoveNext();) { string elem = it.Current; string baseElem = tlp.BasicCategory(elem); bool delStr = checkBasic && baseElem.Equals(baseDel) || elem.Equals(del); if (delStr) { it.Remove(); deleted.Add(elem); } } } if (op.testOptions.verbose) { log.Info("Removed from vertical splitters: " + deleted); } } }
public virtual bool Test(Tree tree) { if (tree == null) { return(false); } foreach (Tree child in tree.Children()) { ILabel label = child.Label(); string value = (label == null) ? null : label.Value(); if (value == null) { continue; } if (pattern.Matcher(value).Matches()) { return(true); } string basic = tlp.BasicCategory(value); if (pattern.Matcher(basic).Matches()) { return(true); } } return(false); }
/// <summary> /// Remove things like hyphened functional tags and equals from the /// end of a node label. /// </summary> protected internal virtual string CleanUpLabel(string label) { if (label == null) { return(root); } else { if (nodeCleanup == 1) { return(tlp.CategoryAndFunction(label)); } else { if (nodeCleanup == 2) { return(tlp.BasicCategory(label)); } else { return(label); } } } }
public override ILabel TransformNonterminalLabel(Tree tree) { if (tree.Label() == null) { return(null); } return(tree.Label().LabelFactory().NewLabel(tlp.BasicCategory(tree.Label().Value()))); }
public virtual string Project(string tagStr) { // return tagStr; string ret = tlp.BasicCategory(tagStr); // log.info("BCTP mapped " + tagStr + " to " + ret); return(ret); }
// static only /// <summary> /// Counts how many spans are present in goldTree, including /// preterminals, but not present in guessTree, along with how many /// spans are present in guessTree and not goldTree. /// </summary> /// <remarks> /// Counts how many spans are present in goldTree, including /// preterminals, but not present in guessTree, along with how many /// spans are present in guessTree and not goldTree. Each one counts /// as an error, meaning that something like a mislabeled span or /// preterminal counts as two errors. /// <br /> /// Span labels are compared using the basicCategory() function /// from the passed in TreebankLanguagePack. /// </remarks> public static int CountSpanErrors(ITreebankLanguagePack tlp, Tree goldTree, Tree guessTree) { ICollection <Constituent> goldConstituents = goldTree.Constituents(LabeledConstituent.Factory()); ICollection <Constituent> guessConstituents = guessTree.Constituents(LabeledConstituent.Factory()); ICollection <Constituent> simpleGoldConstituents = SimplifyConstituents(tlp, goldConstituents); ICollection <Constituent> simpleGuessConstituents = SimplifyConstituents(tlp, guessConstituents); //System.out.println(simpleGoldConstituents); //System.out.println(simpleGuessConstituents); int errors = 0; foreach (Constituent gold in simpleGoldConstituents) { if (!simpleGuessConstituents.Contains(gold)) { ++errors; } } foreach (Constituent guess in simpleGuessConstituents) { if (!simpleGoldConstituents.Contains(guess)) { ++errors; } } // The spans returned by constituents() doesn't include the // preterminals, so we need to count those ourselves now IList <TaggedWord> goldWords = goldTree.TaggedYield(); IList <TaggedWord> guessWords = guessTree.TaggedYield(); int len = Math.Min(goldWords.Count, guessWords.Count); for (int i = 0; i < len; ++i) { string goldTag = tlp.BasicCategory(goldWords[i].Tag()); string guessTag = tlp.BasicCategory(guessWords[i].Tag()); if (!goldTag.Equals(guessTag)) { // we count one error for each span that is present in the // gold and not in the guess, and one error for each span that // is present in the guess and not the gold, so this counts as // two errors errors += 2; } } return(errors); }
// pcfgPE.printGoodBad(); private static IList <TaggedWord> CleanTags(IList <TaggedWord> twList, ITreebankLanguagePack tlp) { int sz = twList.Count; IList <TaggedWord> l = new List <TaggedWord>(sz); foreach (TaggedWord tw in twList) { TaggedWord tw2 = new TaggedWord(tw.Word(), tlp.BasicCategory(tw.Tag())); l.Add(tw2); } return(l); }
private void PopulateTagsToBaseTags(ITreebankLanguagePack tlp) { int total = tagIndex.Size(); tagsToBaseTags = new int[total]; for (int i = 0; i < total; i++) { string tag = tagIndex.Get(i); string baseTag = tlp.BasicCategory(tag); int j = tagIndex.AddToIndex(baseTag); tagsToBaseTags[i] = j; } }
private void InsertNPinPP(Tree t) { if (tlp.BasicCategory(t.Label().Value()).Equals("PP")) { Tree[] kids = t.Children(); int i = 0; int j = kids.Length - 1; while (i < j && prepositionTags.Contains(tlp.BasicCategory(kids[i].Label().Value()))) { i++; } // i now indexes first dtr of new NP while (i < j && postpositionTags.Contains(tlp.BasicCategory(kids[j].Label().Value()))) { j--; } // j now indexes last dtr of new NP if (i > j) { log.Info("##### Warning -- no NP material here!"); return; } // there is no NP material! int npKidsLength = j - i + 1; Tree[] npKids = new Tree[npKidsLength]; System.Array.Copy(kids, i, npKids, 0, npKidsLength); Tree np = t.TreeFactory().NewTreeNode(t.Label().LabelFactory().NewLabel("NP"), Arrays.AsList(npKids)); Tree[] newPPkids = new Tree[kids.Length - npKidsLength + 1]; System.Array.Copy(kids, 0, newPPkids, 0, i + 1); newPPkids[i] = np; System.Array.Copy(kids, j + 1, newPPkids, i + 1, kids.Length - j - 1); t.SetChildren(newPPkids); System.Console.Out.WriteLine("#### inserted NP in PP"); t.PennPrint(); } }
private void EnsureProbs(int word, bool subtractTagScore) { if (word == lastWord) { return; } lastWord = word; if (functionWordTags.Contains(wordIndex.Get(word))) { logProbs = new ClassicCounter <string>(); string trueTag = functionWordTags[wordIndex.Get(word)]; foreach (string tag in tagIndex.ObjectsList()) { if (ctlp.BasicCategory(tag).Equals(trueTag)) { logProbs.SetCount(tag, 0); } else { logProbs.SetCount(tag, double.NegativeInfinity); } } return; } IDatum datum = new BasicDatum(featExtractor.MakeFeatures(wordIndex.Get(word))); logProbs = scorer.LogProbabilityOf(datum); if (subtractTagScore) { ICollection <string> tagSet = logProbs.KeySet(); foreach (string tag in tagSet) { logProbs.IncrementCount(tag, -Math.Log(tagDist.ProbabilityOf(tag))); } } }
public virtual object ProcessNode(object node) { ISet s = null; if (node is ISet) { s = (ISet)node; } else { if (node is IBlock) { IBlock b = (IBlock)node; s = b.GetMembers(); } else { throw new Exception("Unexpected node class"); } } object sampleNode = s.GetEnumerator().Current; if (s.Count == 1) { if (sampleNode is IBlock) { return(ProcessNode(sampleNode)); } else { return(sampleNode); } } // nope there's a set of things if (sampleNode is string) { string str = (string)sampleNode; if (str[0] != '@') { // passive category... return(tlp.BasicCategory(str) + "-" + s.GetHashCode()); } } // TODO remove b/c there could be collisions // return tlp.basicCategory(str) + "-" + System.identityHashCode(s); return("@NodeSet-" + s.GetHashCode()); }
// only leaves NP-TMP and NP-ADV protected internal virtual string CleanUpLabel(string label) { if (label == null) { return(string.Empty); } // This shouldn't really happen, but can happen if there are unlabeled nodes further down a tree, as apparently happens in at least the 20100730 era American National Corpus bool nptemp = TmpPattern.Matcher(label).Matches(); bool npadv = AdvPattern.Matcher(label).Matches(); label = tlp.BasicCategory(label); if (nptemp) { label = label + "-TMP"; } else { if (npadv) { label = label + "-ADV"; } } return(label); }
public virtual void ProcessTreeHelper(string gP, string p, Tree t) { if (!t.IsLeaf() && (doTags || !t.IsPreTerminal())) { // stop at words/tags IDictionary <string, ClassicCounter <IList <string> > > nr; IDictionary <IList <string>, ClassicCounter <IList <string> > > pr; IDictionary <IList <string>, ClassicCounter <IList <string> > > gpr; if (t.IsPreTerminal()) { nr = tagNodeRules; pr = tagPRules; gpr = tagGPRules; } else { nr = nodeRules; pr = pRules; gpr = gPRules; } string n = t.Label().Value(); if (tlp != null) { p = tlp.BasicCategory(p); gP = tlp.BasicCategory(gP); } IList <string> kidn = KidLabels(t); ClassicCounter <IList <string> > cntr = nr[n]; if (cntr == null) { cntr = new ClassicCounter <IList <string> >(); nr[n] = cntr; } cntr.IncrementCount(kidn); IList <string> pairStr = new List <string>(2); pairStr.Add(n); pairStr.Add(p); cntr = pr[pairStr]; if (cntr == null) { cntr = new ClassicCounter <IList <string> >(); pr[pairStr] = cntr; } cntr.IncrementCount(kidn); IList <string> tripleStr = new List <string>(3); tripleStr.Add(n); tripleStr.Add(p); tripleStr.Add(gP); cntr = gpr[tripleStr]; if (cntr == null) { cntr = new ClassicCounter <IList <string> >(); gpr[tripleStr] = cntr; } cntr.IncrementCount(kidn); Tree[] kids = t.Children(); foreach (Tree kid in kids) { ProcessTreeHelper(p, n, kid); } } }
public virtual bool Parse <_T0>(IList <_T0> sentence) where _T0 : IHasWord { if (op.testOptions.verbose) { Timing.Tick("Starting dependency parse."); } this.sentence = sentence; int length = sentence.Count; if (length > arraySize) { if (length > op.testOptions.maxLength + 1 || length >= myMaxLength) { throw new OutOfMemoryException("Refusal to create such large arrays."); } else { try { CreateArrays(length + 1); } catch (OutOfMemoryException e) { myMaxLength = length; if (arraySize > 0) { try { CreateArrays(arraySize); } catch (OutOfMemoryException) { throw new Exception("CANNOT EVEN CREATE ARRAYS OF ORIGINAL SIZE!!! " + arraySize); } } throw; } arraySize = length + 1; if (op.testOptions.verbose) { log.Info("Created dparser arrays of size " + arraySize); } } } if (op.testOptions.verbose) { log.Info("Initializing..."); } // map to words words = new int[length]; int numTags = dg.NumTagBins(); //tagIndex.size(); //System.out.println("\nNumTags: "+numTags); //System.out.println(tagIndex); bool[][] hasTag = new bool[length][]; for (int i = 0; i < length; i++) { //if (wordIndex.contains(sentence.get(i).toString())) words[i] = wordIndex.AddToIndex(sentence[i].Word()); } //else //words[i] = wordIndex.indexOf(Lexicon.UNKNOWN_WORD); for (int head = 0; head < length; head++) { for (int tag = 0; tag < numTags; tag++) { Arrays.Fill(iScoreH[head][tag], float.NegativeInfinity); Arrays.Fill(oScoreH[head][tag], float.NegativeInfinity); } } for (int head_1 = 0; head_1 < length; head_1++) { for (int loc = 0; loc <= length; loc++) { rawDistance[head_1][loc] = (head_1 >= loc ? head_1 - loc : loc - head_1 - 1); binDistance[head_1][loc] = dg.DistanceBin(rawDistance[head_1][loc]); } } if (Thread.Interrupted()) { throw new RuntimeInterruptedException(); } // do tags for (int start = 0; start + 1 <= length; start++) { //Force tags string trueTagStr = null; if (sentence[start] is IHasTag) { trueTagStr = ((IHasTag)sentence[start]).Tag(); if (string.Empty.Equals(trueTagStr)) { trueTagStr = null; } } //Word context (e.g., morphosyntactic info) string wordContextStr = null; if (sentence[start] is IHasContext) { wordContextStr = ((IHasContext)sentence[start]).OriginalText(); if (string.Empty.Equals(wordContextStr)) { wordContextStr = null; } } int word = words[start]; for (IEnumerator <IntTaggedWord> taggingI = lex.RuleIteratorByWord(word, start, wordContextStr); taggingI.MoveNext();) { IntTaggedWord tagging = taggingI.Current; if (trueTagStr != null) { if (!tlp.BasicCategory(tagging.TagString(tagIndex)).Equals(trueTagStr)) { continue; } } float score = lex.Score(tagging, start, wordIndex.Get(tagging.word), wordContextStr); //iScoreH[start][tag][start] = (op.dcTags ? (float)op.testOptions.depWeight*score : 0.0f); if (score > float.NegativeInfinity) { int tag = tagging.tag; iScoreH[start][dg.TagBin(tag)][start] = 0.0f; iScoreH[start][dg.TagBin(tag)][start + 1] = 0.0f; } } } for (int hWord = 0; hWord < length; hWord++) { for (int hTag = 0; hTag < numTags; hTag++) { hasTag[hWord][hTag] = (iScoreH[hWord][hTag][hWord] + iScoreH[hWord][hTag][hWord + 1] > float.NegativeInfinity); Arrays.Fill(headStop[hWord][hTag], float.NegativeInfinity); for (int aWord = 0; aWord < length; aWord++) { for (int dist = 0; dist < dg.NumDistBins(); dist++) { Arrays.Fill(headScore[dist][hWord][hTag][aWord], float.NegativeInfinity); } } } } // score and cache all pairs -- headScores and stops //int hit = 0; for (int hWord_1 = 0; hWord_1 < length; hWord_1++) { for (int hTag = 0; hTag < numTags; hTag++) { //Arrays.fill(headStopL[hWord][hTag], Float.NEGATIVE_INFINITY); //Arrays.fill(headStopR[hWord][hTag], Float.NEGATIVE_INFINITY); //Arrays.fill(headStop[hWord][hTag], Float.NEGATIVE_INFINITY); if (!hasTag[hWord_1][hTag]) { continue; } for (int split = 0; split <= length; split++) { if (split <= hWord_1) { headStop[hWord_1][hTag][split] = (float)dg.ScoreTB(words[hWord_1], hTag, -2, -2, false, hWord_1 - split); } else { //System.out.println("headstopL " + hWord +" " + hTag + " " + split + " " + headStopL[hWord][hTag][split]); // debugging headStop[hWord_1][hTag][split] = (float)dg.ScoreTB(words[hWord_1], hTag, -2, -2, true, split - hWord_1 - 1); } } //System.out.println("headstopR " + hWord +" " + hTag + " " + split + " " + headStopR[hWord][hTag][split]); // debugging //hit++; //Timing.tick("hWord: "+hWord+" hTag: "+hTag+" piddle count: "+hit); for (int aWord = 0; aWord < length; aWord++) { if (aWord == hWord_1) { continue; } // can't be argument of yourself bool leftHeaded = hWord_1 < aWord; int start_1; int end; if (leftHeaded) { start_1 = hWord_1 + 1; end = aWord + 1; } else { start_1 = aWord + 1; end = hWord_1 + 1; } for (int aTag = 0; aTag < numTags; aTag++) { if (!hasTag[aWord][aTag]) { continue; } for (int split_1 = start_1; split_1 < end; split_1++) { // Moved this stuff out two loops- GMA // for (int split = 0; split <= length; split++) { // if leftHeaded, go from hWord+1 to aWord // else go from aWord+1 to hWord // if ((leftHeaded && (split <= hWord || split > aWord)) || // ((!leftHeaded) && (split <= aWord || split > hWord))) // continue; int headDistance = rawDistance[hWord_1][split_1]; int binDist = binDistance[hWord_1][split_1]; headScore[binDist][hWord_1][hTag][aWord][aTag] = (float)dg.ScoreTB(words[hWord_1], hTag, words[aWord], aTag, leftHeaded, headDistance); //hit++; // skip other splits with same binDist while (split_1 + 1 < end && binDistance[hWord_1][split_1 + 1] == binDist) { split_1++; } } } } } } // end split // end aTag // end aWord // end hTag // end hWord if (op.testOptions.verbose) { Timing.Tick("done."); // displayHeadScores(); log.Info("Starting insides..."); } // do larger spans for (int diff = 2; diff <= length; diff++) { if (Thread.Interrupted()) { throw new RuntimeInterruptedException(); } for (int start_1 = 0; start_1 + diff <= length; start_1++) { int end = start_1 + diff; // left extension int endHead = end - 1; for (int endTag = 0; endTag < numTags; endTag++) { if (!hasTag[endHead][endTag]) { continue; } // bestScore is max for iScoreH float bestScore = float.NegativeInfinity; for (int argHead = start_1; argHead < endHead; argHead++) { for (int argTag = 0; argTag < numTags; argTag++) { if (!hasTag[argHead][argTag]) { continue; } float argLeftScore = iScoreH[argHead][argTag][start_1]; if (argLeftScore == float.NegativeInfinity) { continue; } float stopLeftScore = headStop[argHead][argTag][start_1]; if (stopLeftScore == float.NegativeInfinity) { continue; } for (int split = argHead + 1; split < end; split++) { // short circuit if dependency is impossible float depScore = headScore[binDistance[endHead][split]][endHead][endTag][argHead][argTag]; if (depScore == float.NegativeInfinity) { continue; } float score = iScoreH[endHead][endTag][split] + argLeftScore + iScoreH[argHead][argTag][split] + depScore + stopLeftScore + headStop[argHead][argTag][split]; if (score > bestScore) { bestScore = score; } } } } // end for split // sum for iScoreHSum // end for argTag : tags // end for argHead iScoreH[endHead][endTag][start_1] = bestScore; } // end for endTag : tags // right extension int startHead = start_1; for (int startTag = 0; startTag < numTags; startTag++) { if (!hasTag[startHead][startTag]) { continue; } // bestScore is max for iScoreH float bestScore = float.NegativeInfinity; for (int argHead = start_1 + 1; argHead < end; argHead++) { for (int argTag = 0; argTag < numTags; argTag++) { if (!hasTag[argHead][argTag]) { continue; } float argRightScore = iScoreH[argHead][argTag][end]; if (argRightScore == float.NegativeInfinity) { continue; } float stopRightScore = headStop[argHead][argTag][end]; if (stopRightScore == float.NegativeInfinity) { continue; } for (int split = start_1 + 1; split <= argHead; split++) { // short circuit if dependency is impossible float depScore = headScore[binDistance[startHead][split]][startHead][startTag][argHead][argTag]; if (depScore == float.NegativeInfinity) { continue; } float score = iScoreH[startHead][startTag][split] + iScoreH[argHead][argTag][split] + argRightScore + depScore + stopRightScore + headStop[argHead][argTag][split]; if (score > bestScore) { bestScore = score; } } } } // sum for iScoreHSum // end for argTag: tags // end for argHead iScoreH[startHead][startTag][end] = bestScore; } } } // end for startTag: tags // end for start // end for diff (i.e., span) int goalTag = dg.TagBin(tagIndex.IndexOf(LexiconConstants.BoundaryTag)); if (op.testOptions.verbose) { Timing.Tick("done."); log.Info("Dep parsing " + length + " words (incl. stop): insideScore " + (iScoreH[length - 1][goalTag][0] + iScoreH[length - 1][goalTag][length])); } if (!op.doPCFG) { return(HasParse()); } if (op.testOptions.verbose) { log.Info("Starting outsides..."); } oScoreH[length - 1][goalTag][0] = 0.0f; oScoreH[length - 1][goalTag][length] = 0.0f; for (int diff_1 = length; diff_1 > 1; diff_1--) { if (Thread.Interrupted()) { throw new RuntimeInterruptedException(); } for (int start_1 = 0; start_1 + diff_1 <= length; start_1++) { int end = start_1 + diff_1; // left half int endHead = end - 1; for (int endTag = 0; endTag < numTags; endTag++) { if (!hasTag[endHead][endTag]) { continue; } for (int argHead = start_1; argHead < endHead; argHead++) { for (int argTag = 0; argTag < numTags; argTag++) { if (!hasTag[argHead][argTag]) { continue; } for (int split = argHead; split <= endHead; split++) { float subScore = (oScoreH[endHead][endTag][start_1] + headScore[binDistance[endHead][split]][endHead][endTag][argHead][argTag] + headStop[argHead][argTag][start_1] + headStop[argHead][argTag][split]); float scoreRight = (subScore + iScoreH[argHead][argTag][start_1] + iScoreH[argHead][argTag][split]); float scoreMid = (subScore + iScoreH[argHead][argTag][start_1] + iScoreH[endHead][endTag][split]); float scoreLeft = (subScore + iScoreH[argHead][argTag][split] + iScoreH[endHead][endTag][split]); if (scoreRight > oScoreH[endHead][endTag][split]) { oScoreH[endHead][endTag][split] = scoreRight; } if (scoreMid > oScoreH[argHead][argTag][split]) { oScoreH[argHead][argTag][split] = scoreMid; } if (scoreLeft > oScoreH[argHead][argTag][start_1]) { oScoreH[argHead][argTag][start_1] = scoreLeft; } } } } } // right half int startHead = start_1; for (int startTag = 0; startTag < numTags; startTag++) { if (!hasTag[startHead][startTag]) { continue; } for (int argHead = startHead + 1; argHead < end; argHead++) { for (int argTag = 0; argTag < numTags; argTag++) { if (!hasTag[argHead][argTag]) { continue; } for (int split = startHead + 1; split <= argHead; split++) { float subScore = (oScoreH[startHead][startTag][end] + headScore[binDistance[startHead][split]][startHead][startTag][argHead][argTag] + headStop[argHead][argTag][split] + headStop[argHead][argTag][end]); float scoreLeft = (subScore + iScoreH[argHead][argTag][split] + iScoreH[argHead][argTag][end]); float scoreMid = (subScore + iScoreH[startHead][startTag][split] + iScoreH[argHead][argTag][end]); float scoreRight = (subScore + iScoreH[startHead][startTag][split] + iScoreH[argHead][argTag][split]); if (scoreLeft > oScoreH[startHead][startTag][split]) { oScoreH[startHead][startTag][split] = scoreLeft; } if (scoreMid > oScoreH[argHead][argTag][split]) { oScoreH[argHead][argTag][split] = scoreMid; } if (scoreRight > oScoreH[argHead][argTag][end]) { oScoreH[argHead][argTag][end] = scoreRight; } } } } } } } if (op.testOptions.verbose) { Timing.Tick("done."); log.Info("Starting half-filters..."); } for (int loc_1 = 0; loc_1 <= length; loc_1++) { for (int head_2 = 0; head_2 < length; head_2++) { Arrays.Fill(iPossibleByL[loc_1][head_2], false); Arrays.Fill(iPossibleByR[loc_1][head_2], false); Arrays.Fill(oPossibleByL[loc_1][head_2], false); Arrays.Fill(oPossibleByR[loc_1][head_2], false); } } if (Thread.Interrupted()) { throw new RuntimeInterruptedException(); } for (int head_3 = 0; head_3 < length; head_3++) { for (int tag = 0; tag < numTags; tag++) { if (!hasTag[head_3][tag]) { continue; } for (int start_1 = 0; start_1 <= head_3; start_1++) { for (int end = head_3 + 1; end <= length; end++) { if (iScoreH[head_3][tag][start_1] + iScoreH[head_3][tag][end] > float.NegativeInfinity && oScoreH[head_3][tag][start_1] + oScoreH[head_3][tag][end] > float.NegativeInfinity) { iPossibleByR[end][head_3][tag] = true; iPossibleByL[start_1][head_3][tag] = true; oPossibleByR[end][head_3][tag] = true; oPossibleByL[start_1][head_3][tag] = true; } } } } } if (op.testOptions.verbose) { Timing.Tick("done."); } return(HasParse()); }
public virtual Tree TransformTree(Tree tree) { if (tree == null) { return(null); } ITreeFactory tf = tree.TreeFactory(); string s = tree.Value(); if (tlp.IsStartSymbol(s)) { return(TransformTree(tree.FirstChild())); } if (tree.IsLeaf()) { return(tf.NewLeaf(tree.Label())); } s = tlp.BasicCategory(s); if (((whOption & 1) != 0) && s.StartsWith("WH")) { s = Sharpen.Runtime.Substring(s, 2); } if ((whOption & 2) != 0) { s = s.ReplaceAll("^WP", "PRP"); // does both WP and WP$ !! s = s.ReplaceAll("^WDT", "DT"); s = s.ReplaceAll("^WRB", "RB"); } if (((whOption & 4) != 0) && s.StartsWith("WH")) { s = Sharpen.Runtime.Substring(s, 2); } // wsg2010: Might need a better way to deal with tag ambiguity. This still doesn't handle the // case where the GOLD tree does not label a punctuation mark as such (common in French), and // the guess tree does. if (deletePunct && tree.IsPreTerminal() && (tlp.IsEvalBIgnoredPunctuationTag(s) || tlp.IsPunctuationWord(tree.FirstChild().Value()))) { return(null); } // remove the extra NPs inserted in the collinsBaseNP option if (fixCollinsBaseNP && s.Equals("NP")) { Tree[] kids = tree.Children(); if (kids.Length == 1 && tlp.BasicCategory(kids[0].Value()).Equals("NP")) { return(TransformTree(kids[0])); } } // Magerman erased this distinction, and everyone else has followed like sheep... if (s.Equals("PRT")) { s = "ADVP"; } IList <Tree> children = new List <Tree>(); for (int cNum = 0; cNum < numKids; cNum++) { Tree child = tree.Children()[cNum]; Tree newChild = TransformTree(child); if (newChild != null) { children.Add(newChild); } } if (children.IsEmpty()) { return(null); } Tree node = tf.NewTreeNode(tree.Label(), children); node.SetValue(s); return(node); }
public virtual string Apply(string @in) { return(tlp.BasicCategory(@in)); }
/// <summary> /// Called by determineHead and may be overridden in subclasses /// if special treatment is necessary for particular categories. /// </summary> /// <param name="t">The tre to determine the head daughter of</param> /// <param name="parent">The parent of t (or may be null)</param> /// <returns>The head daughter of t</returns> protected internal virtual Tree DetermineNonTrivialHead(Tree t, Tree parent) { Tree theHead = null; string motherCat = tlp.BasicCategory(t.Label().Value()); if (motherCat.StartsWith("@")) { motherCat = Sharpen.Runtime.Substring(motherCat, 1); } if (Debug) { log.Info("Looking for head of " + t.Label() + "; value is |" + t.Label().Value() + "|, " + " baseCat is |" + motherCat + '|'); } // We know we have nonterminals underneath // (a bit of a Penn Treebank assumption, but). // Look at label. // a total special case.... // first look for POS tag at end // this appears to be redundant in the Collins case since the rule already would do that // Tree lastDtr = t.lastChild(); // if (tlp.basicCategory(lastDtr.label().value()).equals("POS")) { // theHead = lastDtr; // } else { string[][] how = nonTerminalInfo[motherCat]; Tree[] kids = t.Children(); if (how == null) { if (Debug) { log.Info("Warning: No rule found for " + motherCat + " (first char: " + motherCat[0] + ')'); log.Info("Known nonterms are: " + nonTerminalInfo.Keys); } if (defaultRule != null) { if (Debug) { log.Info(" Using defaultRule"); } return(TraverseLocate(kids, defaultRule, true)); } else { // TreePrint because TreeGraphNode only prints the node number, // doesn't print the tree structure TreePrint printer = new TreePrint("penn"); StringWriter buffer = new StringWriter(); printer.PrintTree(t, new PrintWriter(buffer)); // TODO: we could get really fancy and define our own // exception class to represent this throw new ArgumentException("No head rule defined for " + motherCat + " using " + this.GetType() + " in " + buffer.ToString()); } } for (int i = 0; i < how.Length; i++) { bool lastResort = (i == how.Length - 1); theHead = TraverseLocate(kids, how[i], lastResort); if (theHead != null) { break; } } if (Debug) { log.Info(" Chose " + theHead.Label()); } return(theHead); }
/* some documentation for Roger's convenience * {pcfg,dep,combo}{PE,DE,TE} are precision/dep/tagging evals for the models * * parser is the PCFG parser * dparser is the dependency parser * bparser is the combining parser * * during testing: * tree is the test tree (gold tree) * binaryTree is the gold tree binarized * tree2b is the best PCFG paser, binarized * tree2 is the best PCFG parse (debinarized) * tree3 is the dependency parse, binarized * tree3db is the dependency parser, debinarized * tree4 is the best combo parse, binarized and then debinarized * tree4b is the best combo parse, binarized */ public static void Main(string[] args) { Options op = new Options(new EnglishTreebankParserParams()); // op.tlpParams may be changed to something else later, so don't use it till // after options are parsed. StringUtils.LogInvocationString(log, args); string path = "/u/nlp/stuff/corpora/Treebank3/parsed/mrg/wsj"; int trainLow = 200; int trainHigh = 2199; int testLow = 2200; int testHigh = 2219; string serializeFile = null; int i = 0; while (i < args.Length && args[i].StartsWith("-")) { if (Sharpen.Runtime.EqualsIgnoreCase(args[i], "-path") && (i + 1 < args.Length)) { path = args[i + 1]; i += 2; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[i], "-train") && (i + 2 < args.Length)) { trainLow = System.Convert.ToInt32(args[i + 1]); trainHigh = System.Convert.ToInt32(args[i + 2]); i += 3; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[i], "-test") && (i + 2 < args.Length)) { testLow = System.Convert.ToInt32(args[i + 1]); testHigh = System.Convert.ToInt32(args[i + 2]); i += 3; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[i], "-serialize") && (i + 1 < args.Length)) { serializeFile = args[i + 1]; i += 2; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[i], "-tLPP") && (i + 1 < args.Length)) { try { op.tlpParams = (ITreebankLangParserParams)System.Activator.CreateInstance(Sharpen.Runtime.GetType(args[i + 1])); } catch (TypeLoadException e) { log.Info("Class not found: " + args[i + 1]); throw new Exception(e); } catch (InstantiationException e) { log.Info("Couldn't instantiate: " + args[i + 1] + ": " + e.ToString()); throw new Exception(e); } catch (MemberAccessException e) { log.Info("illegal access" + e); throw new Exception(e); } i += 2; } else { if (args[i].Equals("-encoding")) { // sets encoding for TreebankLangParserParams op.tlpParams.SetInputEncoding(args[i + 1]); op.tlpParams.SetOutputEncoding(args[i + 1]); i += 2; } else { i = op.SetOptionOrWarn(args, i); } } } } } } } // System.out.println(tlpParams.getClass()); ITreebankLanguagePack tlp = op.tlpParams.TreebankLanguagePack(); op.trainOptions.sisterSplitters = Generics.NewHashSet(Arrays.AsList(op.tlpParams.SisterSplitters())); // BinarizerFactory.TreeAnnotator.setTreebankLang(tlpParams); PrintWriter pw = op.tlpParams.Pw(); op.testOptions.Display(); op.trainOptions.Display(); op.Display(); op.tlpParams.Display(); // setup tree transforms Treebank trainTreebank = op.tlpParams.MemoryTreebank(); MemoryTreebank testTreebank = op.tlpParams.TestMemoryTreebank(); // Treebank blippTreebank = ((EnglishTreebankParserParams) tlpParams).diskTreebank(); // String blippPath = "/afs/ir.stanford.edu/data/linguistic-data/BLLIP-WSJ/"; // blippTreebank.loadPath(blippPath, "", true); Timing.StartTime(); log.Info("Reading trees..."); testTreebank.LoadPath(path, new NumberRangeFileFilter(testLow, testHigh, true)); if (op.testOptions.increasingLength) { testTreebank.Sort(new TreeLengthComparator()); } trainTreebank.LoadPath(path, new NumberRangeFileFilter(trainLow, trainHigh, true)); Timing.Tick("done."); log.Info("Binarizing trees..."); TreeAnnotatorAndBinarizer binarizer; if (!op.trainOptions.leftToRight) { binarizer = new TreeAnnotatorAndBinarizer(op.tlpParams, op.forceCNF, !op.trainOptions.OutsideFactor(), true, op); } else { binarizer = new TreeAnnotatorAndBinarizer(op.tlpParams.HeadFinder(), new LeftHeadFinder(), op.tlpParams, op.forceCNF, !op.trainOptions.OutsideFactor(), true, op); } CollinsPuncTransformer collinsPuncTransformer = null; if (op.trainOptions.collinsPunc) { collinsPuncTransformer = new CollinsPuncTransformer(tlp); } ITreeTransformer debinarizer = new Debinarizer(op.forceCNF); IList <Tree> binaryTrainTrees = new List <Tree>(); if (op.trainOptions.selectiveSplit) { op.trainOptions.splitters = ParentAnnotationStats.GetSplitCategories(trainTreebank, op.trainOptions.tagSelectiveSplit, 0, op.trainOptions.selectiveSplitCutOff, op.trainOptions.tagSelectiveSplitCutOff, op.tlpParams.TreebankLanguagePack()); if (op.trainOptions.deleteSplitters != null) { IList <string> deleted = new List <string>(); foreach (string del in op.trainOptions.deleteSplitters) { string baseDel = tlp.BasicCategory(del); bool checkBasic = del.Equals(baseDel); for (IEnumerator <string> it = op.trainOptions.splitters.GetEnumerator(); it.MoveNext();) { string elem = it.Current; string baseElem = tlp.BasicCategory(elem); bool delStr = checkBasic && baseElem.Equals(baseDel) || elem.Equals(del); if (delStr) { it.Remove(); deleted.Add(elem); } } } log.Info("Removed from vertical splitters: " + deleted); } } if (op.trainOptions.selectivePostSplit) { ITreeTransformer myTransformer = new TreeAnnotator(op.tlpParams.HeadFinder(), op.tlpParams, op); Treebank annotatedTB = trainTreebank.Transform(myTransformer); op.trainOptions.postSplitters = ParentAnnotationStats.GetSplitCategories(annotatedTB, true, 0, op.trainOptions.selectivePostSplitCutOff, op.trainOptions.tagSelectivePostSplitCutOff, op.tlpParams.TreebankLanguagePack()); } if (op.trainOptions.hSelSplit) { binarizer.SetDoSelectiveSplit(false); foreach (Tree tree in trainTreebank) { if (op.trainOptions.collinsPunc) { tree = collinsPuncTransformer.TransformTree(tree); } //tree.pennPrint(tlpParams.pw()); tree = binarizer.TransformTree(tree); } //binaryTrainTrees.add(tree); binarizer.SetDoSelectiveSplit(true); } foreach (Tree tree_1 in trainTreebank) { if (op.trainOptions.collinsPunc) { tree_1 = collinsPuncTransformer.TransformTree(tree_1); } tree_1 = binarizer.TransformTree(tree_1); binaryTrainTrees.Add(tree_1); } if (op.testOptions.verbose) { binarizer.DumpStats(); } IList <Tree> binaryTestTrees = new List <Tree>(); foreach (Tree tree_2 in testTreebank) { if (op.trainOptions.collinsPunc) { tree_2 = collinsPuncTransformer.TransformTree(tree_2); } tree_2 = binarizer.TransformTree(tree_2); binaryTestTrees.Add(tree_2); } Timing.Tick("done."); // binarization BinaryGrammar bg = null; UnaryGrammar ug = null; IDependencyGrammar dg = null; // DependencyGrammar dgBLIPP = null; ILexicon lex = null; IIndex <string> stateIndex = new HashIndex <string>(); // extract grammars IExtractor <Pair <UnaryGrammar, BinaryGrammar> > bgExtractor = new BinaryGrammarExtractor(op, stateIndex); //Extractor bgExtractor = new SmoothedBinaryGrammarExtractor();//new BinaryGrammarExtractor(); // Extractor lexExtractor = new LexiconExtractor(); //Extractor dgExtractor = new DependencyMemGrammarExtractor(); if (op.doPCFG) { log.Info("Extracting PCFG..."); Pair <UnaryGrammar, BinaryGrammar> bgug = null; if (op.trainOptions.cheatPCFG) { IList <Tree> allTrees = new List <Tree>(binaryTrainTrees); Sharpen.Collections.AddAll(allTrees, binaryTestTrees); bgug = bgExtractor.Extract(allTrees); } else { bgug = bgExtractor.Extract(binaryTrainTrees); } bg = bgug.second; bg.SplitRules(); ug = bgug.first; ug.PurgeRules(); Timing.Tick("done."); } log.Info("Extracting Lexicon..."); IIndex <string> wordIndex = new HashIndex <string>(); IIndex <string> tagIndex = new HashIndex <string>(); lex = op.tlpParams.Lex(op, wordIndex, tagIndex); lex.InitializeTraining(binaryTrainTrees.Count); lex.Train(binaryTrainTrees); lex.FinishTraining(); Timing.Tick("done."); if (op.doDep) { log.Info("Extracting Dependencies..."); binaryTrainTrees.Clear(); IExtractor <IDependencyGrammar> dgExtractor = new MLEDependencyGrammarExtractor(op, wordIndex, tagIndex); // dgBLIPP = (DependencyGrammar) dgExtractor.extract(new ConcatenationIterator(trainTreebank.iterator(),blippTreebank.iterator()),new TransformTreeDependency(tlpParams,true)); // DependencyGrammar dg1 = dgExtractor.extract(trainTreebank.iterator(), new TransformTreeDependency(op.tlpParams, true)); //dgBLIPP=(DependencyGrammar)dgExtractor.extract(blippTreebank.iterator(),new TransformTreeDependency(tlpParams)); //dg = (DependencyGrammar) dgExtractor.extract(new ConcatenationIterator(trainTreebank.iterator(),blippTreebank.iterator()),new TransformTreeDependency(tlpParams)); // dg=new DependencyGrammarCombination(dg1,dgBLIPP,2); dg = dgExtractor.Extract(binaryTrainTrees); //uses information whether the words are known or not, discards unknown words Timing.Tick("done."); //System.out.print("Extracting Unknown Word Model..."); //UnknownWordModel uwm = (UnknownWordModel)uwmExtractor.extract(binaryTrainTrees); //Timing.tick("done."); System.Console.Out.Write("Tuning Dependency Model..."); dg.Tune(binaryTestTrees); //System.out.println("TUNE DEPS: "+tuneDeps); Timing.Tick("done."); } BinaryGrammar boundBG = bg; UnaryGrammar boundUG = ug; IGrammarProjection gp = new NullGrammarProjection(bg, ug); // serialization if (serializeFile != null) { log.Info("Serializing parser..."); LexicalizedParser parser = new LexicalizedParser(lex, bg, ug, dg, stateIndex, wordIndex, tagIndex, op); parser.SaveParserToSerialized(serializeFile); Timing.Tick("done."); } // test: pcfg-parse and output ExhaustivePCFGParser parser_1 = null; if (op.doPCFG) { parser_1 = new ExhaustivePCFGParser(boundBG, boundUG, lex, op, stateIndex, wordIndex, tagIndex); } ExhaustiveDependencyParser dparser = ((op.doDep && !op.testOptions.useFastFactored) ? new ExhaustiveDependencyParser(dg, lex, op, wordIndex, tagIndex) : null); IScorer scorer = (op.doPCFG ? new TwinScorer(new ProjectionScorer(parser_1, gp, op), dparser) : null); //Scorer scorer = parser; BiLexPCFGParser bparser = null; if (op.doPCFG && op.doDep) { bparser = (op.testOptions.useN5) ? new BiLexPCFGParser.N5BiLexPCFGParser(scorer, parser_1, dparser, bg, ug, dg, lex, op, gp, stateIndex, wordIndex, tagIndex) : new BiLexPCFGParser(scorer, parser_1, dparser, bg, ug, dg, lex, op, gp, stateIndex , wordIndex, tagIndex); } Evalb pcfgPE = new Evalb("pcfg PE", true); Evalb comboPE = new Evalb("combo PE", true); AbstractEval pcfgCB = new Evalb.CBEval("pcfg CB", true); AbstractEval pcfgTE = new TaggingEval("pcfg TE"); AbstractEval comboTE = new TaggingEval("combo TE"); AbstractEval pcfgTEnoPunct = new TaggingEval("pcfg nopunct TE"); AbstractEval comboTEnoPunct = new TaggingEval("combo nopunct TE"); AbstractEval depTE = new TaggingEval("depnd TE"); AbstractEval depDE = new UnlabeledAttachmentEval("depnd DE", true, null, tlp.PunctuationWordRejectFilter()); AbstractEval comboDE = new UnlabeledAttachmentEval("combo DE", true, null, tlp.PunctuationWordRejectFilter()); if (op.testOptions.evalb) { EvalbFormatWriter.InitEVALBfiles(op.tlpParams); } // int[] countByLength = new int[op.testOptions.maxLength+1]; // Use a reflection ruse, so one can run this without needing the // tagger. Using a function rather than a MaxentTagger means we // can distribute a version of the parser that doesn't include the // entire tagger. IFunction <IList <IHasWord>, List <TaggedWord> > tagger = null; if (op.testOptions.preTag) { try { Type[] argsClass = new Type[] { typeof(string) }; object[] arguments = new object[] { op.testOptions.taggerSerializedFile }; tagger = (IFunction <IList <IHasWord>, List <TaggedWord> >)Sharpen.Runtime.GetType("edu.stanford.nlp.tagger.maxent.MaxentTagger").GetConstructor(argsClass).NewInstance(arguments); } catch (Exception e) { log.Info(e); log.Info("Warning: No pretagging of sentences will be done."); } } for (int tNum = 0; tNum < ttSize; tNum++) { Tree tree = testTreebank[tNum]; int testTreeLen = tree_2.Yield().Count; if (testTreeLen > op.testOptions.maxLength) { continue; } Tree binaryTree = binaryTestTrees[tNum]; // countByLength[testTreeLen]++; System.Console.Out.WriteLine("-------------------------------------"); System.Console.Out.WriteLine("Number: " + (tNum + 1)); System.Console.Out.WriteLine("Length: " + testTreeLen); //tree.pennPrint(pw); // System.out.println("XXXX The binary tree is"); // binaryTree.pennPrint(pw); //System.out.println("Here are the tags in the lexicon:"); //System.out.println(lex.showTags()); //System.out.println("Here's the tagnumberer:"); //System.out.println(Numberer.getGlobalNumberer("tags").toString()); long timeMil1 = Runtime.CurrentTimeMillis(); Timing.Tick("Starting parse."); if (op.doPCFG) { //log.info(op.testOptions.forceTags); if (op.testOptions.forceTags) { if (tagger != null) { //System.out.println("Using a tagger to set tags"); //System.out.println("Tagged sentence as: " + tagger.processSentence(cutLast(wordify(binaryTree.yield()))).toString(false)); parser_1.Parse(AddLast(tagger.Apply(CutLast(Wordify(binaryTree.Yield()))))); } else { //System.out.println("Forcing tags to match input."); parser_1.Parse(CleanTags(binaryTree.TaggedYield(), tlp)); } } else { // System.out.println("XXXX Parsing " + binaryTree.yield()); parser_1.Parse(binaryTree.YieldHasWord()); } } //Timing.tick("Done with pcfg phase."); if (op.doDep) { dparser.Parse(binaryTree.YieldHasWord()); } //Timing.tick("Done with dependency phase."); bool bothPassed = false; if (op.doPCFG && op.doDep) { bothPassed = bparser.Parse(binaryTree.YieldHasWord()); } //Timing.tick("Done with combination phase."); long timeMil2 = Runtime.CurrentTimeMillis(); long elapsed = timeMil2 - timeMil1; log.Info("Time: " + ((int)(elapsed / 100)) / 10.00 + " sec."); //System.out.println("PCFG Best Parse:"); Tree tree2b = null; Tree tree2 = null; //System.out.println("Got full best parse..."); if (op.doPCFG) { tree2b = parser_1.GetBestParse(); tree2 = debinarizer.TransformTree(tree2b); } //System.out.println("Debinarized parse..."); //tree2.pennPrint(); //System.out.println("DepG Best Parse:"); Tree tree3 = null; Tree tree3db = null; if (op.doDep) { tree3 = dparser.GetBestParse(); // was: but wrong Tree tree3db = debinarizer.transformTree(tree2); tree3db = debinarizer.TransformTree(tree3); tree3.PennPrint(pw); } //tree.pennPrint(); //((Tree)binaryTrainTrees.get(tNum)).pennPrint(); //System.out.println("Combo Best Parse:"); Tree tree4 = null; if (op.doPCFG && op.doDep) { try { tree4 = bparser.GetBestParse(); if (tree4 == null) { tree4 = tree2b; } } catch (ArgumentNullException) { log.Info("Blocked, using PCFG parse!"); tree4 = tree2b; } } if (op.doPCFG && !bothPassed) { tree4 = tree2b; } //tree4.pennPrint(); if (op.doDep) { depDE.Evaluate(tree3, binaryTree, pw); depTE.Evaluate(tree3db, tree_2, pw); } ITreeTransformer tc = op.tlpParams.Collinizer(); ITreeTransformer tcEvalb = op.tlpParams.CollinizerEvalb(); if (op.doPCFG) { // System.out.println("XXXX Best PCFG was: "); // tree2.pennPrint(); // System.out.println("XXXX Transformed best PCFG is: "); // tc.transformTree(tree2).pennPrint(); //System.out.println("True Best Parse:"); //tree.pennPrint(); //tc.transformTree(tree).pennPrint(); pcfgPE.Evaluate(tc.TransformTree(tree2), tc.TransformTree(tree_2), pw); pcfgCB.Evaluate(tc.TransformTree(tree2), tc.TransformTree(tree_2), pw); Tree tree4b = null; if (op.doDep) { comboDE.Evaluate((bothPassed ? tree4 : tree3), binaryTree, pw); tree4b = tree4; tree4 = debinarizer.TransformTree(tree4); if (op.nodePrune) { NodePruner np = new NodePruner(parser_1, debinarizer); tree4 = np.Prune(tree4); } //tree4.pennPrint(); comboPE.Evaluate(tc.TransformTree(tree4), tc.TransformTree(tree_2), pw); } //pcfgTE.evaluate(tree2, tree); pcfgTE.Evaluate(tcEvalb.TransformTree(tree2), tcEvalb.TransformTree(tree_2), pw); pcfgTEnoPunct.Evaluate(tc.TransformTree(tree2), tc.TransformTree(tree_2), pw); if (op.doDep) { comboTE.Evaluate(tcEvalb.TransformTree(tree4), tcEvalb.TransformTree(tree_2), pw); comboTEnoPunct.Evaluate(tc.TransformTree(tree4), tc.TransformTree(tree_2), pw); } System.Console.Out.WriteLine("PCFG only: " + parser_1.ScoreBinarizedTree(tree2b, 0)); //tc.transformTree(tree2).pennPrint(); tree2.PennPrint(pw); if (op.doDep) { System.Console.Out.WriteLine("Combo: " + parser_1.ScoreBinarizedTree(tree4b, 0)); // tc.transformTree(tree4).pennPrint(pw); tree4.PennPrint(pw); } System.Console.Out.WriteLine("Correct:" + parser_1.ScoreBinarizedTree(binaryTree, 0)); /* * if (parser.scoreBinarizedTree(tree2b,true) < parser.scoreBinarizedTree(binaryTree,true)) { * System.out.println("SCORE INVERSION"); * parser.validateBinarizedTree(binaryTree,0); * } */ tree_2.PennPrint(pw); } // end if doPCFG if (op.testOptions.evalb) { if (op.doPCFG && op.doDep) { EvalbFormatWriter.WriteEVALBline(tcEvalb.TransformTree(tree_2), tcEvalb.TransformTree(tree4)); } else { if (op.doPCFG) { EvalbFormatWriter.WriteEVALBline(tcEvalb.TransformTree(tree_2), tcEvalb.TransformTree(tree2)); } else { if (op.doDep) { EvalbFormatWriter.WriteEVALBline(tcEvalb.TransformTree(tree_2), tcEvalb.TransformTree(tree3db)); } } } } } // end for each tree in test treebank if (op.testOptions.evalb) { EvalbFormatWriter.CloseEVALBfiles(); } // op.testOptions.display(); if (op.doPCFG) { pcfgPE.Display(false, pw); System.Console.Out.WriteLine("Grammar size: " + stateIndex.Size()); pcfgCB.Display(false, pw); if (op.doDep) { comboPE.Display(false, pw); } pcfgTE.Display(false, pw); pcfgTEnoPunct.Display(false, pw); if (op.doDep) { comboTE.Display(false, pw); comboTEnoPunct.Display(false, pw); } } if (op.doDep) { depTE.Display(false, pw); depDE.Display(false, pw); } if (op.doPCFG && op.doDep) { comboDE.Display(false, pw); } }
public static ICollection <Constituent> SimplifyConstituents(ITreebankLanguagePack tlp, ICollection <Constituent> constituents) { ICollection <Constituent> newConstituents = new HashSet <Constituent>(); foreach (Constituent con in constituents) { if (!(con is LabeledConstituent)) { throw new AssertionError("Unexpected constituent type " + con.GetType()); } LabeledConstituent labeled = (LabeledConstituent)con; newConstituents.Add(new LabeledConstituent(labeled.Start(), labeled.End(), tlp.BasicCategory(labeled.Value()))); } return(newConstituents); }