Exemple #1
0
        /// <summary>Main method of mention detection.</summary>
        /// <remarks>
        /// Main method of mention detection.
        /// Extract all NP, PRP or NE, and filter out by manually written patterns.
        /// </remarks>
        public override IList <IList <Mention> > FindMentions(Annotation doc, Dictionaries dict, Properties props)
        {
            IList <IList <Mention> >       predictedMentions  = new List <IList <Mention> >();
            ICollection <string>           neStrings          = Generics.NewHashSet();
            IList <ICollection <IntPair> > mentionSpanSetList = Generics.NewArrayList();
            IList <ICoreMap> sentences = doc.Get(typeof(CoreAnnotations.SentencesAnnotation));

            // extract premarked mentions, NP/PRP, named entity, enumerations
            foreach (ICoreMap s in sentences)
            {
                IList <Mention> mentions = new List <Mention>();
                predictedMentions.Add(mentions);
                ICollection <IntPair> mentionSpanSet     = Generics.NewHashSet();
                ICollection <IntPair> namedEntitySpanSet = Generics.NewHashSet();
                ExtractPremarkedEntityMentions(s, mentions, mentionSpanSet, namedEntitySpanSet);
                ExtractNamedEntityMentions(s, mentions, mentionSpanSet, namedEntitySpanSet);
                ExtractNPorPRP(s, mentions, mentionSpanSet, namedEntitySpanSet);
                ExtractEnumerations(s, mentions, mentionSpanSet, namedEntitySpanSet);
                AddNamedEntityStrings(s, neStrings, namedEntitySpanSet);
                mentionSpanSetList.Add(mentionSpanSet);
            }
            if (CorefProperties.LiberalMD(props))
            {
                ExtractNamedEntityModifiers(sentences, mentionSpanSetList, predictedMentions, neStrings);
            }
            // find head
            for (int i = 0; i < sz; i++)
            {
                FindHead(sentences[i], predictedMentions[i]);
                SetBarePlural(predictedMentions[i]);
            }
            // mention selection based on document-wise info
            if (lang == Locale.English && !CorefProperties.LiberalMD(props))
            {
                RemoveSpuriousMentionsEn(doc, predictedMentions, dict);
            }
            else
            {
                if (lang == Locale.Chinese)
                {
                    if (CorefProperties.LiberalMD(props))
                    {
                        RemoveSpuriousMentionsZhSimple(doc, predictedMentions, dict);
                    }
                    else
                    {
                        RemoveSpuriousMentionsZh(doc, predictedMentions, dict, CorefProperties.RemoveNestedMentions(props));
                    }
                }
            }
            return(predictedMentions);
        }