Exemple #1
0
        protected internal virtual Tree FindSyntacticHead(Mention m, Tree root, IList <CoreLabel> tokens)
        {
            // mention ends with 's
            int endIdx = m.endIndex;

            if (m.originalSpan.Count > 0)
            {
                string lastWord = m.originalSpan[m.originalSpan.Count - 1].Get(typeof(CoreAnnotations.TextAnnotation));
                if ((lastWord.Equals("'s") || lastWord.Equals("'")) && m.originalSpan.Count != 1)
                {
                    endIdx--;
                }
            }
            Tree exactMatch = FindTreeWithSpan(root, m.startIndex, endIdx);

            //
            // found an exact match
            //
            if (exactMatch != null)
            {
                return(SafeHead(exactMatch, endIdx));
            }
            // no exact match found
            // in this case, we parse the actual extent of the mention, embedded in a sentence
            // context, so as to make the parser work better :-)
            if (allowReparsing)
            {
                int approximateness            = 0;
                IList <CoreLabel> extentTokens = new List <CoreLabel>();
                extentTokens.Add(InitCoreLabel("It"));
                extentTokens.Add(InitCoreLabel("was"));
                int AddedWords = 2;
                for (int i = m.startIndex; i < endIdx; i++)
                {
                    // Add everything except separated dashes! The separated dashes mess with the parser too badly.
                    CoreLabel label = tokens[i];
                    if (!"-".Equals(label.Word()))
                    {
                        // necessary to copy tokens in case the parser does things like
                        // put new indices on the tokens
                        extentTokens.Add((CoreLabel)label.LabelFactory().NewLabel(label));
                    }
                    else
                    {
                        approximateness++;
                    }
                }
                extentTokens.Add(InitCoreLabel("."));
                // constrain the parse to the part we're interested in.
                // Starting from ADDED_WORDS comes from skipping "It was".
                // -1 to exclude the period.
                // We now let it be any kind of nominal constituent, since there
                // are VP and S ones
                ParserConstraint         constraint  = new ParserConstraint(AddedWords, extentTokens.Count - 1, Pattern.Compile(".*"));
                IList <ParserConstraint> constraints = Java.Util.Collections.SingletonList(constraint);
                Tree tree = Parse(extentTokens, constraints);
                ConvertToCoreLabels(tree);
                // now unnecessary, as parser uses CoreLabels?
                tree.IndexSpans(m.startIndex - AddedWords);
                // remember it has ADDED_WORDS extra words at the beginning
                Tree subtree = FindPartialSpan(tree, m.startIndex);
                // There was a possible problem that with a crazy parse, extentHead could be one of the added words, not a real word!
                // Now we make sure in findPartialSpan that it can't be before the real start, and in safeHead, we disallow something
                // passed the right end (that is, just that final period).
                Tree extentHead = SafeHead(subtree, endIdx);
                System.Diagnostics.Debug.Assert((extentHead != null));
                // extentHead is a child in the local extent parse tree. we need to find the corresponding node in the main tree
                // Because we deleted dashes, it's index will be >= the index in the extent parse tree
                CoreLabel l        = (CoreLabel)extentHead.Label();
                Tree      realHead = FunkyFindLeafWithApproximateSpan(root, l.Value(), l.Get(typeof(CoreAnnotations.BeginIndexAnnotation)), approximateness);
                System.Diagnostics.Debug.Assert((realHead != null));
                return(realHead);
            }
            // If reparsing wasn't allowed, try to find a span in the tree
            // which happens to have the head
            Tree wordMatch = FindTreeWithSmallestSpan(root, m.startIndex, endIdx);

            if (wordMatch != null)
            {
                Tree head = SafeHead(wordMatch, endIdx);
                if (head != null)
                {
                    int index = ((CoreLabel)head.Label()).Get(typeof(CoreAnnotations.IndexAnnotation)) - 1;
                    if (index >= m.startIndex && index < endIdx)
                    {
                        return(head);
                    }
                }
            }
            // If that didn't work, guess that it's the last word
            int lastNounIdx = endIdx - 1;

            for (int i_1 = m.startIndex; i_1 < m.endIndex; i_1++)
            {
                if (tokens[i_1].Tag().StartsWith("N"))
                {
                    lastNounIdx = i_1;
                }
                else
                {
                    if (tokens[i_1].Tag().StartsWith("W"))
                    {
                        break;
                    }
                }
            }
            IList <Tree> leaves  = root.GetLeaves();
            Tree         endLeaf = leaves[lastNounIdx];

            return(endLeaf);
        }