예제 #1
0
        private void CollectPossessivePronouns(IParse nounPhrase, List <Mention> entities)
        {
            //TODO: Look at how training is done and examine whether this is needed or can be accomidated in a different way.

            /*
             * List snps = np.getSubNounPhrases();
             * if (snps.size() != 0) {
             * for (int si = 0, sl = snps.size(); si < sl; si++) {
             * Parse snp = (Parse) snps.get(si);
             * Extent ppExtent = new Extent(snp.getSpan(), snp.getSpan(), snp.getEntityId(), null,Linker.PRONOUN_MODIFIER);
             * entities.add(ppExtent);
             * }
             * }
             * else {
             */
            List <IParse> nounPhraseTokens = nounPhrase.Tokens;
            IParse        headToken        = mHeadFinder.GetHeadToken(nounPhrase);

            for (int tokenIndex = nounPhraseTokens.Count - 2; tokenIndex >= 0; tokenIndex--)
            {
                IParse token = nounPhraseTokens[tokenIndex];
                if (token == headToken)
                {
                    continue;
                }
                if (PartsOfSpeech.IsPersOrPossPronoun(token.SyntacticType) && IsHandledPronoun(token.ToString()))
                {
                    var possessivePronounExtent = new Mention(token.Span, token.Span, token.EntityId, null, Linker.PronounModifier);
                    entities.Add(possessivePronounExtent);
                    break;
                }
            }
            //}
        }
예제 #2
0
        private Mention[] CollectMentions(List <IParse> nounPhrases, Dictionary <IParse, IParse> headMap)
        {
            List <Mention> mentions = new List <Mention>(nounPhrases.Count);

            Util.Set <IParse> recentMentions = new Util.HashSet <IParse>();
            //System.err.println("AbtractMentionFinder.collectMentions: "+headMap);
            for (int nounPhraseIndex = 0; nounPhraseIndex < nounPhrases.Count; nounPhraseIndex++)
            {
                IParse nounPhrase = nounPhrases[nounPhraseIndex];
                //System.err.println("AbstractMentionFinder: collectMentions: np[" + npi + "]=" + np + " head=" + headMap.get(np));
                if (!IsHeadOfExistingMention(nounPhrase, headMap, recentMentions))
                {
                    ClearMentions(recentMentions, nounPhrase);
                    if (!IsPartOfName(nounPhrase))
                    {
                        IParse  head   = mHeadFinder.GetLastHead(nounPhrase);
                        Mention extent = new Mention(nounPhrase.Span, head.Span, head.EntityId, nounPhrase, null);
                        //System.err.println("adding "+np+" with head "+head);
                        mentions.Add(extent);
                        recentMentions.Add(nounPhrase);
                        // determine name-entity type
                        string entityType = GetEntityType(mHeadFinder.GetHeadToken(head));
                        if (entityType != null)
                        {
                            extent.NameType = entityType;
                        }
                    }
                    else
                    {
                        //System.err.println("AbstractMentionFinder.collectMentions excluding np as part of name. np=" + np);
                    }
                }
                else
                {
                    //System.err.println("AbstractMentionFinder.collectMentions excluding np as head of previous mention. np=" + np);
                }
                if (IsBasalNounPhrase(nounPhrase))
                {
                    if (mPrenominalNamedEntitiesCollection)
                    {
                        CollectPrenominalNamedEntities(nounPhrase, mentions);
                    }
                    if (mCoordinatedNounPhrasesCollection)
                    {
                        CollectCoordinatedNounPhraseMentions(nounPhrase, mentions);
                    }
                    CollectPossessivePronouns(nounPhrase, mentions);
                }
                else
                {
                    // Could use to get NP -> tokens CON structures for basal nps including NP -> NAC tokens
                    //collectComplexNounPhrases(np,mentions);
                }
            }

            mentions.Sort();
            RemoveDuplicates(mentions);
            return(mentions.ToArray());
        }
예제 #3
0
        private void CollectCoordinatedNounPhraseMentions(IParse nounPhrase, List <Mention> entities)
        {
            //System.err.println("collectCoordNp: "+np);
            List <IParse> nounPhraseTokens         = nounPhrase.Tokens;
            bool          inCoordinatedNounPhrase  = false;
            int           lastNounPhraseTokenIndex = mHeadFinder.GetHeadIndex(nounPhrase);

            for (int tokenIndex = lastNounPhraseTokenIndex - 1; tokenIndex >= 0; tokenIndex--)
            {
                IParse token     = nounPhraseTokens[tokenIndex];
                string tokenText = token.ToString();
                if (tokenText == "and" || tokenText == "or")
                {
                    if (lastNounPhraseTokenIndex != tokenIndex)
                    {
                        if (tokenIndex - 1 >= 0 && (nounPhraseTokens[tokenIndex - 1]).SyntacticType.StartsWith("NN"))
                        {
                            Util.Span nounPhraseSpan       = new Util.Span((nounPhraseTokens[tokenIndex + 1]).Span.Start, (nounPhraseTokens[lastNounPhraseTokenIndex]).Span.End);
                            Mention   nounPhraseSpanExtent = new Mention(nounPhraseSpan, nounPhraseSpan, token.EntityId, null, "CNP");
                            entities.Add(nounPhraseSpanExtent);
                            //System.err.println("adding extent for conjunction in: "+np+" preeceeded by "+((Parse) npTokens.get(ti-1)).getSyntacticType());
                            inCoordinatedNounPhrase = true;
                        }
                        else
                        {
                            break;
                        }
                    }
                    lastNounPhraseTokenIndex = tokenIndex - 1;
                }
                else if (inCoordinatedNounPhrase && tokenText.Equals(","))
                {
                    if (lastNounPhraseTokenIndex != tokenIndex)
                    {
                        Util.Span nounPhraseSpan       = new Util.Span((nounPhraseTokens[tokenIndex + 1]).Span.Start, (nounPhraseTokens[lastNounPhraseTokenIndex]).Span.End);
                        Mention   nounPhraseSpanExtent = new Mention(nounPhraseSpan, nounPhraseSpan, token.EntityId, null, "CNP");
                        entities.Add(nounPhraseSpanExtent);
                        //System.err.println("adding extent for comma in: "+np);
                    }
                    lastNounPhraseTokenIndex = tokenIndex - 1;
                }
                else if (inCoordinatedNounPhrase && tokenIndex == 0 && lastNounPhraseTokenIndex >= 0)
                {
                    Util.Span nounPhraseSpan       = new Util.Span((nounPhraseTokens[tokenIndex]).Span.Start, (nounPhraseTokens[lastNounPhraseTokenIndex]).Span.End);
                    Mention   nounPhraseSpanExtent = new Mention(nounPhraseSpan, nounPhraseSpan, token.EntityId, null, "CNP");
                    entities.Add(nounPhraseSpanExtent);
                    //System.err.println("adding extent for start coord in: "+np);
                }
            }
        }
예제 #4
0
        private Mention[] CollectMentions(List <IParse> nounPhrases, Dictionary <IParse, IParse> headMap)
        {
            var mentions = new List <Mention>(nounPhrases.Count);

            Util.Set <IParse> recentMentions = new Util.HashSet <IParse>();
            for (int nounPhraseIndex = 0; nounPhraseIndex < nounPhrases.Count; nounPhraseIndex++)
            {
                IParse nounPhrase = nounPhrases[nounPhraseIndex];
                if (!IsHeadOfExistingMention(nounPhrase, headMap, recentMentions))
                {
                    ClearMentions(recentMentions, nounPhrase);
                    if (!IsPartOfName(nounPhrase))
                    {
                        IParse head   = mHeadFinder.GetLastHead(nounPhrase);
                        var    extent = new Mention(nounPhrase.Span, head.Span, head.EntityId, nounPhrase, null);
                        mentions.Add(extent);
                        recentMentions.Add(nounPhrase);
                        // determine name-entity type
                        string entityType = GetEntityType(mHeadFinder.GetHeadToken(head));
                        if (entityType != null)
                        {
                            extent.NameType = entityType;
                        }
                    }
                }
                if (IsBasalNounPhrase(nounPhrase))
                {
                    if (mPrenominalNamedEntitiesCollection)
                    {
                        CollectPrenominalNamedEntities(nounPhrase, mentions);
                    }
                    if (mCoordinatedNounPhrasesCollection)
                    {
                        CollectCoordinatedNounPhraseMentions(nounPhrase, mentions);
                    }
                    CollectPossessivePronouns(nounPhrase, mentions);
                }
                else
                {
                    // Could use to get NP -> tokens CON structures for basal nps including NP -> NAC tokens
                    //collectComplexNounPhrases(np,mentions);
                }
            }

            mentions.Sort();
            RemoveDuplicates(mentions);
            return(mentions.ToArray());
        }
예제 #5
0
        private static void RemoveDuplicates(List <Mention> extents)
        {
            Mention lastExtent = null;

            foreach (Mention extent in extents)
            {
                if (lastExtent != null && extent.Span.Equals(lastExtent.Span))
                {
                    extents.Remove(extent);
                }
                else
                {
                    lastExtent = extent;
                }
            }
        }
예제 #6
0
        private void CollectCoordinatedNounPhraseMentions(IParse nounPhrase, List <Mention> entities)
        {
            List <IParse> nounPhraseTokens         = nounPhrase.Tokens;
            bool          inCoordinatedNounPhrase  = false;
            int           lastNounPhraseTokenIndex = mHeadFinder.GetHeadIndex(nounPhrase);

            for (int tokenIndex = lastNounPhraseTokenIndex - 1; tokenIndex >= 0; tokenIndex--)
            {
                IParse token     = nounPhraseTokens[tokenIndex];
                string tokenText = token.ToString();
                if (tokenText == "and" || tokenText == "or")
                {
                    if (lastNounPhraseTokenIndex != tokenIndex)
                    {
                        if (tokenIndex - 1 >= 0 && PartsOfSpeech.IsNoun(nounPhraseTokens[tokenIndex - 1].SyntacticType))
                        {
                            var nounPhraseSpan       = new Util.Span((nounPhraseTokens[tokenIndex + 1]).Span.Start, (nounPhraseTokens[lastNounPhraseTokenIndex]).Span.End);
                            var nounPhraseSpanExtent = new Mention(nounPhraseSpan, nounPhraseSpan, token.EntityId, null, "CNP");
                            entities.Add(nounPhraseSpanExtent);
                            inCoordinatedNounPhrase = true;
                        }
                        else
                        {
                            break;
                        }
                    }
                    lastNounPhraseTokenIndex = tokenIndex - 1;
                }
                else if (inCoordinatedNounPhrase && tokenText == PartsOfSpeech.Comma)
                {
                    if (lastNounPhraseTokenIndex != tokenIndex)
                    {
                        var nounPhraseSpan       = new Util.Span((nounPhraseTokens[tokenIndex + 1]).Span.Start, (nounPhraseTokens[lastNounPhraseTokenIndex]).Span.End);
                        var nounPhraseSpanExtent = new Mention(nounPhraseSpan, nounPhraseSpan, token.EntityId, null, "CNP");
                        entities.Add(nounPhraseSpanExtent);
                    }
                    lastNounPhraseTokenIndex = tokenIndex - 1;
                }
                else if (inCoordinatedNounPhrase && tokenIndex == 0 && lastNounPhraseTokenIndex >= 0)
                {
                    var nounPhraseSpan       = new Util.Span((nounPhraseTokens[tokenIndex]).Span.Start, (nounPhraseTokens[lastNounPhraseTokenIndex]).Span.End);
                    var nounPhraseSpanExtent = new Mention(nounPhraseSpan, nounPhraseSpan, token.EntityId, null, "CNP");
                    entities.Add(nounPhraseSpanExtent);
                }
            }
        }
예제 #7
0
        /*/// <summary>
         * /// Adds a mention for the non-treebank-labeled possesive noun phrases.
         * /// </summary>
         * /// <param name="possesiveNounPhrase">
         * /// The possessive noun phase which may require an additional mention.
         * /// </param>
         * /// <param name="mentions">
         * /// The list of mentions into which a new mention can be added.
         * /// </param>
         * private void AddPossessiveMentions(IParse possessiveNounPhrase, List<Mention> mentions)
         * {
         * List<IParse> kids = possessiveNounPhrase.SyntacticChildren;
         * if (kids.Count > 1)
         * {
         * IParse firstToken = kids[1];
         * if (firstToken.IsToken && firstToken.SyntacticType != "POS")
         * {
         *  IParse lastToken = kids[kids.Count - 1];
         *  if (lastToken.IsToken)
         *  {
         *      var extentSpan = new Util.Span(firstToken.Span.Start, lastToken.Span.End);
         *      var extent = new Mention(extentSpan, extentSpan, - 1, null, null);
         *      mentions.Add(extent);
         *  }
         *  else
         *  {
         *      Console.Error.WriteLine("AbstractMentionFinder.AddPossessiveMentions: odd parse structure: " + possessiveNounPhrase);
         *  }
         * }
         * }
         * }*/

        private void CollectPrenominalNamedEntities(IParse nounPhrase, List <Mention> extents)
        {
            IParse        headToken     = mHeadFinder.GetHeadToken(nounPhrase);
            List <IParse> namedEntities = nounPhrase.NamedEntities;

            Util.Span headTokenSpan = headToken.Span;
            for (int namedEntityIndex = 0; namedEntityIndex < namedEntities.Count; namedEntityIndex++)
            {
                IParse namedEntity = namedEntities[namedEntityIndex];
                if (!namedEntity.Span.Contains(headTokenSpan))
                {
                    var extent = new Mention(namedEntity.Span, namedEntity.Span, namedEntity.EntityId, null, "NAME");
                    extent.NameType = namedEntity.EntityType;
                    extents.Add(extent);
                }
            }
        }
예제 #8
0
        private void CollectPossessivePronouns(IParse nounPhrase, List <Mention> entities)
        {
            //TODO: Look at how training is done and examine whether this is needed or can be accomidated in a different way.

            /*
             * List snps = np.getSubNounPhrases();
             * if (snps.size() != 0) {
             * //System.err.println("AbstractMentionFinder: Found existing snps");
             * for (int si = 0, sl = snps.size(); si < sl; si++) {
             * Parse snp = (Parse) snps.get(si);
             * Extent ppExtent = new Extent(snp.getSpan(), snp.getSpan(), snp.getEntityId(), null,Linker.PRONOUN_MODIFIER);
             * entities.add(ppExtent);
             * }
             * }
             * else {
             */
            //System.err.println("AbstractEntityFinder.collectPossesivePronouns: "+np);
            List <IParse> nounPhraseTokens = nounPhrase.Tokens;
            IParse        headToken        = mHeadFinder.GetHeadToken(nounPhrase);

            for (int tokenIndex = nounPhraseTokens.Count - 2; tokenIndex >= 0; tokenIndex--)
            {
                IParse token = nounPhraseTokens[tokenIndex];
                if (token == headToken)
                {
                    continue;
                }
                if (token.SyntacticType.StartsWith("PRP") && IsHandledPronoun(token.ToString()))
                {
                    Mention possessivePronounExtent = new Mention(token.Span, token.Span, token.EntityId, null, OpenNLP.Tools.Coreference.Linker.PronounModifier);
                    //System.err.println("AbstractEntityFinder.collectPossesivePronouns: adding possesive pronoun: "+tok+" "+tok.getEntityId());
                    entities.Add(possessivePronounExtent);
                    //System.err.println("AbstractMentionFinder: adding pos-pro: "+ppExtent);
                    break;
                }
            }
            //}
        }
예제 #9
0
		private void CollectPossessivePronouns(IParse nounPhrase, List<Mention> entities)
		{
			//TODO: Look at how training is done and examine whether this is needed or can be accomidated in a different way.
			/*
			List snps = np.getSubNounPhrases();
			if (snps.size() != 0) {
			for (int si = 0, sl = snps.size(); si < sl; si++) {
			Parse snp = (Parse) snps.get(si);
			Extent ppExtent = new Extent(snp.getSpan(), snp.getSpan(), snp.getEntityId(), null,Linker.PRONOUN_MODIFIER);
			entities.add(ppExtent);
			}
			}
			else {
			*/
			List<IParse> nounPhraseTokens = nounPhrase.Tokens;
			IParse headToken = mHeadFinder.GetHeadToken(nounPhrase);
			for (int tokenIndex = nounPhraseTokens.Count - 2; tokenIndex >= 0; tokenIndex--)
			{
				IParse token = nounPhraseTokens[tokenIndex];
				if (token == headToken)
				{
					continue;
				}
				if (PartsOfSpeech.IsPersOrPossPronoun(token.SyntacticType) && IsHandledPronoun(token.ToString()))
				{
					var possessivePronounExtent = new Mention(token.Span, token.Span, token.EntityId, null, Linker.PronounModifier);
					entities.Add(possessivePronounExtent);
					break;
				}
			}
			//}
		}
예제 #10
0
		private void CollectCoordinatedNounPhraseMentions(IParse nounPhrase, List<Mention> entities)
		{
			List<IParse> nounPhraseTokens = nounPhrase.Tokens;
			bool inCoordinatedNounPhrase = false;
			int lastNounPhraseTokenIndex = mHeadFinder.GetHeadIndex(nounPhrase);
			for (int tokenIndex = lastNounPhraseTokenIndex - 1; tokenIndex >= 0; tokenIndex--)
			{
				IParse token = nounPhraseTokens[tokenIndex];
				string tokenText = token.ToString();
				if (tokenText == "and" || tokenText == "or")
				{
					if (lastNounPhraseTokenIndex != tokenIndex)
					{
						if (tokenIndex - 1 >= 0 && PartsOfSpeech.IsNoun(nounPhraseTokens[tokenIndex - 1].SyntacticType))
						{
                            var nounPhraseSpan = new Util.Span((nounPhraseTokens[tokenIndex + 1]).Span.Start, (nounPhraseTokens[lastNounPhraseTokenIndex]).Span.End);
							var nounPhraseSpanExtent = new Mention(nounPhraseSpan, nounPhraseSpan, token.EntityId, null, "CNP");
							entities.Add(nounPhraseSpanExtent);
							inCoordinatedNounPhrase = true;
						}
						else
						{
							break;
						}
					}
					lastNounPhraseTokenIndex = tokenIndex - 1;
				}
				else if (inCoordinatedNounPhrase && tokenText == PartsOfSpeech.Comma)
				{
					if (lastNounPhraseTokenIndex != tokenIndex)
					{
                        var nounPhraseSpan = new Util.Span((nounPhraseTokens[tokenIndex + 1]).Span.Start, (nounPhraseTokens[lastNounPhraseTokenIndex]).Span.End);
						var nounPhraseSpanExtent = new Mention(nounPhraseSpan, nounPhraseSpan, token.EntityId, null, "CNP");
						entities.Add(nounPhraseSpanExtent);
					}
					lastNounPhraseTokenIndex = tokenIndex - 1;
				}
				else if (inCoordinatedNounPhrase && tokenIndex == 0 && lastNounPhraseTokenIndex >= 0)
				{
                    var nounPhraseSpan = new Util.Span((nounPhraseTokens[tokenIndex]).Span.Start, (nounPhraseTokens[lastNounPhraseTokenIndex]).Span.End);
					var nounPhraseSpanExtent = new Mention(nounPhraseSpan, nounPhraseSpan, token.EntityId, null, "CNP");
					entities.Add(nounPhraseSpanExtent);
				}
			}
		}
예제 #11
0
 /// <summary> 
 /// Constructs context information for the specified mention.
 /// </summary>
 /// <param name="mention">
 /// The mention object on which this object is based.
 /// </param>
 /// <param name="mentionIndexInSentence">
 /// The mention's position in the sentence.
 /// </param>
 /// <param name="mentionsInSentence">
 /// The number of mentions in the sentence.
 /// </param>
 /// <param name="mentionIndexInDocument">
 /// The index of this mention with respect to the document.
 /// </param>
 /// <param name="sentenceIndex">
 /// The index of the sentence which contains this mention.
 /// </param>
 /// <param name="headFinder">
 /// An object which provides head information.
 /// </param>
 public MentionContext(Mention mention, int mentionIndexInSentence, int mentionsInSentence, int mentionIndexInDocument, int sentenceIndex, IHeadFinder headFinder)
     : this(mention.Span, mention.HeadSpan, mention.Id, mention.Parse, mention.Type, mention.NameType, mentionIndexInSentence, mentionsInSentence, mentionIndexInDocument, sentenceIndex, headFinder)
 {
 }
예제 #12
0
 public Mention(Mention mention) : this(mention.mSpan, mention.mHeadSpan, mention.mId, mention.mParse, mention.mType, mention.mNameType)
 {
 }
예제 #13
0
        public virtual MentionContext[] ConstructMentionContexts(Mention.Mention[] mentions)
		{
            if (mentions == null)
            {
                throw new ArgumentNullException("mentions");
            }

			int mentionInSentenceIndex = -1;
			int mentionsInSentenceCount = -1;
			int previousSentenceIndex = -1;
			var contexts = new MentionContext[mentions.Length];
			for (int mentionIndex = 0, mentionCount = mentions.Length; mentionIndex < mentionCount; mentionIndex++)
			{
				IParse mentionParse = mentions[mentionIndex].Parse;
				if (mentionParse == null)
				{
					Console.Error.WriteLine("no parse for " + mentions[mentionIndex]);
				}
				int sentenceIndex = mentionParse.SentenceNumber;
				if (sentenceIndex != previousSentenceIndex)
				{
					mentionInSentenceIndex = 0;
					previousSentenceIndex = sentenceIndex;
					mentionsInSentenceCount = 0;
                    for (int currentMentionInSentence = mentionIndex; currentMentionInSentence < mentions.Length; currentMentionInSentence++)
					{
                        if (sentenceIndex != mentions[currentMentionInSentence].Parse.SentenceNumber)
						{
							break;
						}
						mentionsInSentenceCount++;
					}
				}
				contexts[mentionIndex] = new MentionContext(mentions[mentionIndex], mentionInSentenceIndex, mentionsInSentenceCount, mentionIndex, sentenceIndex, HeadFinder);
				contexts[mentionIndex].Id = mentions[mentionIndex].Id;
				mentionInSentenceIndex++;
				if (mMode != LinkerMode.Sim)
				{
					Gender gender = ComputeGender(contexts[mentionIndex]);
                    contexts[mentionIndex].SetGender(gender.Type, gender.Confidence);
					Number number = ComputeNumber(contexts[mentionIndex]);
                    contexts[mentionIndex].SetNumber(number.Type, number.Confidence);
				}
			}
			return contexts;
		}
예제 #14
0
        public virtual void SetEntitiesFromMentions(Mention.Mention[] mentions)
		{
			GetEntitiesFromMentions(mentions);
		}
 private void CollectPossessivePronouns(IParse nounPhrase, List<Mention> entities)
 {
     //TODO: Look at how training is done and examine whether this is needed or can be accomidated in a different way.
     /*
     List snps = np.getSubNounPhrases();
     if (snps.size() != 0) {
     //System.err.println("AbstractMentionFinder: Found existing snps");
     for (int si = 0, sl = snps.size(); si < sl; si++) {
     Parse snp = (Parse) snps.get(si);
     Extent ppExtent = new Extent(snp.getSpan(), snp.getSpan(), snp.getEntityId(), null,Linker.PRONOUN_MODIFIER);
     entities.add(ppExtent);
     }
     }
     else {
     */
     //System.err.println("AbstractEntityFinder.collectPossesivePronouns: "+np);
     List<IParse> nounPhraseTokens = nounPhrase.Tokens;
     IParse headToken = mHeadFinder.GetHeadToken(nounPhrase);
     for (int tokenIndex = nounPhraseTokens.Count - 2; tokenIndex >= 0; tokenIndex--)
     {
         IParse token = nounPhraseTokens[tokenIndex];
         if (token == headToken)
         {
             continue;
         }
         if (token.SyntacticType.StartsWith("PRP") && IsHandledPronoun(token.ToString()))
         {
             Mention possessivePronounExtent = new Mention(token.Span, token.Span, token.EntityId, null, OpenNLP.Tools.Coreference.Linker.PronounModifier);
             //System.err.println("AbstractEntityFinder.collectPossesivePronouns: adding possesive pronoun: "+tok+" "+tok.getEntityId());
             entities.Add(possessivePronounExtent);
             //System.err.println("AbstractMentionFinder: adding pos-pro: "+ppExtent);
             break;
         }
     }
     //}
 }
 private void CollectCoordinatedNounPhraseMentions(IParse nounPhrase, List<Mention> entities)
 {
     //System.err.println("collectCoordNp: "+np);
     List<IParse> nounPhraseTokens = nounPhrase.Tokens;
     bool inCoordinatedNounPhrase = false;
     int lastNounPhraseTokenIndex = mHeadFinder.GetHeadIndex(nounPhrase);
     for (int tokenIndex = lastNounPhraseTokenIndex - 1; tokenIndex >= 0; tokenIndex--)
     {
         IParse token = nounPhraseTokens[tokenIndex];
         string tokenText = token.ToString();
         if (tokenText == "and" || tokenText == "or")
         {
             if (lastNounPhraseTokenIndex != tokenIndex)
             {
                 if (tokenIndex - 1 >= 0 && (nounPhraseTokens[tokenIndex - 1]).SyntacticType.StartsWith("NN"))
                 {
                     Util.Span nounPhraseSpan = new Util.Span((nounPhraseTokens[tokenIndex + 1]).Span.Start, (nounPhraseTokens[lastNounPhraseTokenIndex]).Span.End);
                     Mention nounPhraseSpanExtent = new Mention(nounPhraseSpan, nounPhraseSpan, token.EntityId, null, "CNP");
                     entities.Add(nounPhraseSpanExtent);
                     //System.err.println("adding extent for conjunction in: "+np+" preeceeded by "+((Parse) npTokens.get(ti-1)).getSyntacticType());
                     inCoordinatedNounPhrase = true;
                 }
                 else
                 {
                     break;
                 }
             }
             lastNounPhraseTokenIndex = tokenIndex - 1;
         }
         else if (inCoordinatedNounPhrase && tokenText.Equals(","))
         {
             if (lastNounPhraseTokenIndex != tokenIndex)
             {
                 Util.Span nounPhraseSpan = new Util.Span((nounPhraseTokens[tokenIndex + 1]).Span.Start, (nounPhraseTokens[lastNounPhraseTokenIndex]).Span.End);
                 Mention nounPhraseSpanExtent = new Mention(nounPhraseSpan, nounPhraseSpan, token.EntityId, null, "CNP");
                 entities.Add(nounPhraseSpanExtent);
                 //System.err.println("adding extent for comma in: "+np);
             }
             lastNounPhraseTokenIndex = tokenIndex - 1;
         }
         else if (inCoordinatedNounPhrase && tokenIndex == 0 && lastNounPhraseTokenIndex >= 0)
         {
             Util.Span nounPhraseSpan = new Util.Span((nounPhraseTokens[tokenIndex]).Span.Start, (nounPhraseTokens[lastNounPhraseTokenIndex]).Span.End);
             Mention nounPhraseSpanExtent = new Mention(nounPhraseSpan, nounPhraseSpan, token.EntityId, null, "CNP");
             entities.Add(nounPhraseSpanExtent);
             //System.err.println("adding extent for start coord in: "+np);
         }
     }
 }
예제 #17
0
 /// <summary>
 /// Constructs context information for the specified mention.
 /// </summary>
 /// <param name="mention">
 /// The mention object on which this object is based.
 /// </param>
 /// <param name="mentionIndexInSentence">
 /// The mention's position in the sentence.
 /// </param>
 /// <param name="mentionsInSentence">
 /// The number of mentions in the sentence.
 /// </param>
 /// <param name="mentionIndexInDocument">
 /// The index of this mention with respect to the document.
 /// </param>
 /// <param name="sentenceIndex">
 /// The index of the sentence which contains this mention.
 /// </param>
 /// <param name="headFinder">
 /// An object which provides head information.
 /// </param>
 public MentionContext(Mention mention, int mentionIndexInSentence, int mentionsInSentence, int mentionIndexInDocument, int sentenceIndex, IHeadFinder headFinder) :
     this(mention.Span, mention.HeadSpan, mention.Id, mention.Parse, mention.Type, mention.NameType, mentionIndexInSentence,
          mentionsInSentence, mentionIndexInDocument, sentenceIndex, headFinder)
 {
 }
        private Mention[] CollectMentions(List<IParse> nounPhrases, Dictionary<IParse, IParse> headMap)
        {
            List<Mention> mentions = new List<Mention>(nounPhrases.Count);
            Util.Set<IParse> recentMentions = new Util.HashSet<IParse>();
            //System.err.println("AbtractMentionFinder.collectMentions: "+headMap);
            for (int nounPhraseIndex = 0; nounPhraseIndex < nounPhrases.Count; nounPhraseIndex++)
            {
                IParse nounPhrase = nounPhrases[nounPhraseIndex];
                //System.err.println("AbstractMentionFinder: collectMentions: np[" + npi + "]=" + np + " head=" + headMap.get(np));
                if (!IsHeadOfExistingMention(nounPhrase, headMap, recentMentions))
                {
                    ClearMentions(recentMentions, nounPhrase);
                    if (!IsPartOfName(nounPhrase))
                    {
                        IParse head = mHeadFinder.GetLastHead(nounPhrase);
                        Mention extent = new Mention(nounPhrase.Span, head.Span, head.EntityId, nounPhrase, null);
                        //System.err.println("adding "+np+" with head "+head);
                        mentions.Add(extent);
                        recentMentions.Add(nounPhrase);
                        // determine name-entity type
                        string entityType = GetEntityType(mHeadFinder.GetHeadToken(head));
                        if (entityType != null)
                        {
                            extent.NameType = entityType;
                        }
                    }
                    else
                    {
                        //System.err.println("AbstractMentionFinder.collectMentions excluding np as part of name. np=" + np);
                    }
                }
                else
                {
                    //System.err.println("AbstractMentionFinder.collectMentions excluding np as head of previous mention. np=" + np);
                }
                if (IsBasalNounPhrase(nounPhrase))
                {
                    if (mPrenominalNamedEntitiesCollection)
                    {
                        CollectPrenominalNamedEntities(nounPhrase, mentions);
                    }
                    if (mCoordinatedNounPhrasesCollection)
                    {
                        CollectCoordinatedNounPhraseMentions(nounPhrase, mentions);
                    }
                    CollectPossessivePronouns(nounPhrase, mentions);
                }
                else
                {
                    // Could use to get NP -> tokens CON structures for basal nps including NP -> NAC tokens
                    //collectComplexNounPhrases(np,mentions);
                }
            }

            mentions.Sort();
            RemoveDuplicates(mentions);
            return mentions.ToArray();
        }
예제 #19
0
        public virtual int CompareTo(object obj)
        {
            Mention e = (Mention)obj;

            return(mSpan.CompareTo(e.Span));
        }
예제 #20
0
 protected internal override Number ComputeNumber(Mention.MentionContext mention)
 {
     return mCompatibilityModel.ComputeNumber(mention);
 }
예제 #21
0
        private Mention[] CollectMentions(List<IParse> nounPhrases, Dictionary<IParse, IParse> headMap)
		{
            var mentions = new List<Mention>(nounPhrases.Count);
			Util.Set<IParse> recentMentions = new Util.HashSet<IParse>();
			for (int nounPhraseIndex = 0; nounPhraseIndex < nounPhrases.Count; nounPhraseIndex++)
			{
				IParse nounPhrase = nounPhrases[nounPhraseIndex];
				if (!IsHeadOfExistingMention(nounPhrase, headMap, recentMentions))
				{
					ClearMentions(recentMentions, nounPhrase);
					if (!IsPartOfName(nounPhrase))
					{
						IParse head = mHeadFinder.GetLastHead(nounPhrase);
						var extent = new Mention(nounPhrase.Span, head.Span, head.EntityId, nounPhrase, null);
						mentions.Add(extent);
						recentMentions.Add(nounPhrase);
						// determine name-entity type
						string entityType = GetEntityType(mHeadFinder.GetHeadToken(head));
						if (entityType != null)
						{
							extent.NameType = entityType;
						}
					}
				}
				if (IsBasalNounPhrase(nounPhrase))
				{
					if (mPrenominalNamedEntitiesCollection)
					{
						CollectPrenominalNamedEntities(nounPhrase, mentions);
					}
					if (mCoordinatedNounPhrasesCollection)
					{
						CollectCoordinatedNounPhraseMentions(nounPhrase, mentions);
					}
					CollectPossessivePronouns(nounPhrase, mentions);
				}
				else
				{
					// Could use to get NP -> tokens CON structures for basal nps including NP -> NAC tokens
					//collectComplexNounPhrases(np,mentions);
				}
			}

            mentions.Sort(); 
			RemoveDuplicates(mentions);
			return mentions.ToArray();
		}
예제 #22
0
		/*/// <summary> 
        /// Adds a mention for the non-treebank-labeled possesive noun phrases.  
        /// </summary>
		/// <param name="possesiveNounPhrase">
        /// The possessive noun phase which may require an additional mention.
		/// </param>
		/// <param name="mentions">
        /// The list of mentions into which a new mention can be added. 
		/// </param>
        private void AddPossessiveMentions(IParse possessiveNounPhrase, List<Mention> mentions)
        {
            List<IParse> kids = possessiveNounPhrase.SyntacticChildren;
            if (kids.Count > 1)
            {
                IParse firstToken = kids[1];
                if (firstToken.IsToken && firstToken.SyntacticType != "POS")
                {
                    IParse lastToken = kids[kids.Count - 1];
                    if (lastToken.IsToken)
                    {
                        var extentSpan = new Util.Span(firstToken.Span.Start, lastToken.Span.End);
                        var extent = new Mention(extentSpan, extentSpan, - 1, null, null);
                        mentions.Add(extent);
                    }
                    else
                    {
                        Console.Error.WriteLine("AbstractMentionFinder.AddPossessiveMentions: odd parse structure: " + possessiveNounPhrase);
                    }
                }
            }
        }*/
		
		private void CollectPrenominalNamedEntities(IParse nounPhrase, List<Mention> extents)
		{
			IParse headToken = mHeadFinder.GetHeadToken(nounPhrase);
            List<IParse> namedEntities = nounPhrase.NamedEntities;
            Util.Span headTokenSpan = headToken.Span;
			for (int namedEntityIndex = 0; namedEntityIndex < namedEntities.Count; namedEntityIndex++)
			{
				IParse namedEntity = namedEntities[namedEntityIndex];
				if (!namedEntity.Span.Contains(headTokenSpan))
				{
					var extent = new Mention(namedEntity.Span, namedEntity.Span, namedEntity.EntityId, null, "NAME");
					extent.NameType = namedEntity.EntityType;
					extents.Add(extent);
				}
			}
		}
예제 #23
0
        public virtual DiscourseEntity[] GetEntitiesFromMentions(Mention.Mention[] mentions)
		{
			MentionContext[] extentContexts = ConstructMentionContexts(mentions);
			var discourseModel = new DiscourseModel();
			for (int extentIndex = 0; extentIndex < extentContexts.Length; extentIndex++)
			{
				Resolve(extentContexts[extentIndex], discourseModel);
			}
			return discourseModel.Entities;
		}
예제 #24
0
 public Mention(Mention mention)
     : this(mention.mSpan, mention.mHeadSpan, mention.mId, mention.mParse, mention.mType, mention.mNameType)
 {
 }
예제 #25
0
 public virtual DiscourseEntity[] GetEntitiesFromMentions(Mention.Mention[] mentions)
 {
     MentionContext[] extentContexts = ConstructMentionContexts(mentions);
     DiscourseModel discourseModel = new DiscourseModel();
     for (int extentIndex = 0; extentIndex < extentContexts.Length; extentIndex++)
     {
         //System.err.println(ei+" "+extentContexts[ei].toText());
         Resolve(extentContexts[extentIndex], discourseModel);
     }
     return discourseModel.Entities;
 }