protected internal virtual Tree FindSyntacticHead(Mention m, Tree root, IList <CoreLabel> tokens) { // mention ends with 's int endIdx = m.endIndex; if (m.originalSpan.Count > 0) { string lastWord = m.originalSpan[m.originalSpan.Count - 1].Get(typeof(CoreAnnotations.TextAnnotation)); if ((lastWord.Equals("'s") || lastWord.Equals("'")) && m.originalSpan.Count != 1) { endIdx--; } } Tree exactMatch = FindTreeWithSpan(root, m.startIndex, endIdx); // // found an exact match // if (exactMatch != null) { return(SafeHead(exactMatch, endIdx)); } // no exact match found // in this case, we parse the actual extent of the mention, embedded in a sentence // context, so as to make the parser work better :-) if (allowReparsing) { int approximateness = 0; IList <CoreLabel> extentTokens = new List <CoreLabel>(); extentTokens.Add(InitCoreLabel("It")); extentTokens.Add(InitCoreLabel("was")); int AddedWords = 2; for (int i = m.startIndex; i < endIdx; i++) { // Add everything except separated dashes! The separated dashes mess with the parser too badly. CoreLabel label = tokens[i]; if (!"-".Equals(label.Word())) { // necessary to copy tokens in case the parser does things like // put new indices on the tokens extentTokens.Add((CoreLabel)label.LabelFactory().NewLabel(label)); } else { approximateness++; } } extentTokens.Add(InitCoreLabel(".")); // constrain the parse to the part we're interested in. // Starting from ADDED_WORDS comes from skipping "It was". // -1 to exclude the period. // We now let it be any kind of nominal constituent, since there // are VP and S ones ParserConstraint constraint = new ParserConstraint(AddedWords, extentTokens.Count - 1, Pattern.Compile(".*")); IList <ParserConstraint> constraints = Java.Util.Collections.SingletonList(constraint); Tree tree = Parse(extentTokens, constraints); ConvertToCoreLabels(tree); // now unnecessary, as parser uses CoreLabels? tree.IndexSpans(m.startIndex - AddedWords); // remember it has ADDED_WORDS extra words at the beginning Tree subtree = FindPartialSpan(tree, m.startIndex); // There was a possible problem that with a crazy parse, extentHead could be one of the added words, not a real word! // Now we make sure in findPartialSpan that it can't be before the real start, and in safeHead, we disallow something // passed the right end (that is, just that final period). Tree extentHead = SafeHead(subtree, endIdx); System.Diagnostics.Debug.Assert((extentHead != null)); // extentHead is a child in the local extent parse tree. we need to find the corresponding node in the main tree // Because we deleted dashes, it's index will be >= the index in the extent parse tree CoreLabel l = (CoreLabel)extentHead.Label(); Tree realHead = FunkyFindLeafWithApproximateSpan(root, l.Value(), l.Get(typeof(CoreAnnotations.BeginIndexAnnotation)), approximateness); System.Diagnostics.Debug.Assert((realHead != null)); return(realHead); } // If reparsing wasn't allowed, try to find a span in the tree // which happens to have the head Tree wordMatch = FindTreeWithSmallestSpan(root, m.startIndex, endIdx); if (wordMatch != null) { Tree head = SafeHead(wordMatch, endIdx); if (head != null) { int index = ((CoreLabel)head.Label()).Get(typeof(CoreAnnotations.IndexAnnotation)) - 1; if (index >= m.startIndex && index < endIdx) { return(head); } } } // If that didn't work, guess that it's the last word int lastNounIdx = endIdx - 1; for (int i_1 = m.startIndex; i_1 < m.endIndex; i_1++) { if (tokens[i_1].Tag().StartsWith("N")) { lastNounIdx = i_1; } else { if (tokens[i_1].Tag().StartsWith("W")) { break; } } } IList <Tree> leaves = root.GetLeaves(); Tree endLeaf = leaves[lastNounIdx]; return(endLeaf); }