/// <summary>
        /// Constructs a set of entities which may be semantically compatible with the entity indicated by
        /// the specified entityKey.
        /// </summary>
        /// <param name="entityKey">
        /// The key of the entity for which the set is being constructed.
        /// </param>
        /// <param name="entities">
        /// A mapping between entity keys and their mentions.
        /// </param>
        /// <param name="headSets">
        /// A mapping between entity keys and their head sets.
        /// </param>
        /// <param name="nameSets">
        /// A mapping between entity keys and their name sets.
        /// </param>
        /// <param name="singletons">
        /// A list of all entities which consists of a single mention.
        /// </param>
        /// <returns>
        /// A set of mentions for all the entities which might be semantically compatible
        /// with entity indicated by the specified key.
        /// </returns>
        private Util.Set <Context> ConstructExclusionSet(int entityKey, Util.HashList <int, Context> entities, Dictionary <int, Util.Set <string> > headSets, Dictionary <int, Util.Set <string> > nameSets, List <Context> singletons)
        {
            Util.Set <Context> exclusionSet   = new Util.HashSet <Context>();
            Util.Set <string>  entityHeadSet  = headSets[entityKey];
            Util.Set <string>  entityNameSet  = nameSets[entityKey];
            List <Context>     entityContexts = entities[entityKey];

            //entities
            foreach (int key in entities.Keys)
            {
                List <Context> candidateContexts = entities[key];

                if (key == entityKey)
                {
                    exclusionSet.AddAll(candidateContexts);
                }
                else if (nameSets[key].Count == 0)
                {
                    exclusionSet.AddAll(candidateContexts);
                }
                else if (HasSameHead(entityHeadSet, headSets[key]))
                {
                    exclusionSet.AddAll(candidateContexts);
                }
                else if (HasSameNameType(entityNameSet, nameSets[key]))
                {
                    exclusionSet.AddAll(candidateContexts);
                }
                else if (HasSuperClass(entityContexts, candidateContexts))
                {
                    exclusionSet.AddAll(candidateContexts);
                }
            }

            //singles
            List <Context> singles = new List <Context>(1);

            foreach (Context currentSingleton in singletons)
            {
                singles.Clear();
                singles.Add(currentSingleton);
                if (entityHeadSet.Contains(currentSingleton.HeadTokenText.ToLower()))
                {
                    exclusionSet.Add(currentSingleton);
                }
                else if (currentSingleton.NameType == null)
                {
                    exclusionSet.Add(currentSingleton);
                }
                else if (entityNameSet.Contains(currentSingleton.NameType))
                {
                    exclusionSet.Add(currentSingleton);
                }
                else if (HasSuperClass(entityContexts, singles))
                {
                    exclusionSet.Add(currentSingleton);
                }
            }
            return(exclusionSet);
        }
Beispiel #2
0
 private static Util.Set <string> GetSynsetSet(Context context)
 {
     Util.Set <string>   synsetSet  = new Util.HashSet <string>();
     string[]            lemmas     = GetLemmas(context);
     Mention.IDictionary dictionary = Mention.DictionaryFactory.GetDictionary();
     foreach (string lemma in lemmas)
     {
         synsetSet.Add(dictionary.GetSenseKey(lemma, PartsOfSpeech.NounSingularOrMass, 0));
         string[] synsets = dictionary.GetParentSenseKeys(lemma, PartsOfSpeech.NounSingularOrMass, 0);
         for (int currentSynset = 0, sn = synsets.Length; currentSynset < sn; currentSynset++)
         {
             synsetSet.Add(synsets[currentSynset]);
         }
     }
     return(synsetSet);
 }
        private Mention[] CollectMentions(List <IParse> nounPhrases, Dictionary <IParse, IParse> headMap)
        {
            List <Mention> mentions = new List <Mention>(nounPhrases.Count);

            Util.Set <IParse> recentMentions = new Util.HashSet <IParse>();
            //System.err.println("AbtractMentionFinder.collectMentions: "+headMap);
            for (int nounPhraseIndex = 0; nounPhraseIndex < nounPhrases.Count; nounPhraseIndex++)
            {
                IParse nounPhrase = nounPhrases[nounPhraseIndex];
                //System.err.println("AbstractMentionFinder: collectMentions: np[" + npi + "]=" + np + " head=" + headMap.get(np));
                if (!IsHeadOfExistingMention(nounPhrase, headMap, recentMentions))
                {
                    ClearMentions(recentMentions, nounPhrase);
                    if (!IsPartOfName(nounPhrase))
                    {
                        IParse  head   = mHeadFinder.GetLastHead(nounPhrase);
                        Mention extent = new Mention(nounPhrase.Span, head.Span, head.EntityId, nounPhrase, null);
                        //System.err.println("adding "+np+" with head "+head);
                        mentions.Add(extent);
                        recentMentions.Add(nounPhrase);
                        // determine name-entity type
                        string entityType = GetEntityType(mHeadFinder.GetHeadToken(head));
                        if (entityType != null)
                        {
                            extent.NameType = entityType;
                        }
                    }
                    else
                    {
                        //System.err.println("AbstractMentionFinder.collectMentions excluding np as part of name. np=" + np);
                    }
                }
                else
                {
                    //System.err.println("AbstractMentionFinder.collectMentions excluding np as head of previous mention. np=" + np);
                }
                if (IsBasalNounPhrase(nounPhrase))
                {
                    if (mPrenominalNamedEntitiesCollection)
                    {
                        CollectPrenominalNamedEntities(nounPhrase, mentions);
                    }
                    if (mCoordinatedNounPhrasesCollection)
                    {
                        CollectCoordinatedNounPhraseMentions(nounPhrase, mentions);
                    }
                    CollectPossessivePronouns(nounPhrase, mentions);
                }
                else
                {
                    // Could use to get NP -> tokens CON structures for basal nps including NP -> NAC tokens
                    //collectComplexNounPhrases(np,mentions);
                }
            }

            mentions.Sort();
            RemoveDuplicates(mentions);
            return(mentions.ToArray());
        }
Beispiel #4
0
 private static Util.Set <string> GetSynsetSet(Context context)
 {
     Util.Set <string>   synsetSet  = new Util.HashSet <string>();
     string[]            lemmas     = GetLemmas(context);
     Mention.IDictionary dictionary = Mention.DictionaryFactory.GetDictionary();
     //System.err.println(lemmas.length+" lemmas for "+c.headToken);
     for (int currentLemma = 0; currentLemma < lemmas.Length; currentLemma++)
     {
         synsetSet.Add(dictionary.GetSenseKey(lemmas[currentLemma], "NN", 0));
         string[] synsets = dictionary.GetParentSenseKeys(lemmas[currentLemma], "NN", 0);
         for (int currentSynset = 0, sn = synsets.Length; currentSynset < sn; currentSynset++)
         {
             synsetSet.Add(synsets[currentSynset]);
         }
     }
     return(synsetSet);
 }
Beispiel #5
0
 /// <summary>
 /// Produces a set of head words for the specified list of mentions.
 /// </summary>
 /// <param name="mentions">
 /// The mentions to use to construct the
 /// </param>
 /// <returns>
 /// A set containing the head words of the sepecified mentions.
 /// </returns>
 private Util.Set <string> ConstructHeadSet(IEnumerable <Context> mentions)
 {
     Util.Set <string> headSet = new Util.HashSet <string>();
     foreach (Context currentContext in mentions)
     {
         headSet.Add(currentContext.HeadTokenText.ToLower());
     }
     return(headSet);
 }
Beispiel #6
0
 private Util.Set <string> ConstructModifierSet(Mention.IParse[] tokens, int headIndex)
 {
     Util.Set <string> modifierSet = new Util.HashSet <string>();
     for (int tokenIndex = 0; tokenIndex < headIndex; tokenIndex++)
     {
         Mention.IParse token = tokens[tokenIndex];
         modifierSet.Add(token.ToString().ToLower());
     }
     return(modifierSet);
 }
        private Util.Set <string> ReadNames(string nameFile)
        {
            Util.Set <string> names = new Util.HashSet <string>();

            System.IO.StreamReader nameReader = new System.IO.StreamReader(nameFile, System.Text.Encoding.Default);
            for (string line = nameReader.ReadLine(); line != null; line = nameReader.ReadLine())
            {
                names.Add(line);
            }
            return(names);
        }
Beispiel #8
0
 /// <summary>
 /// Produces the set of name types associated with each of the specified mentions.
 /// </summary>
 /// <param name="mentions">
 /// A list of mentions.
 /// </param>
 /// <returns>
 /// A set of name types assigned to the specified mentions.
 /// </returns>
 private Util.Set <string> ConstructNameSet(IEnumerable <Context> mentions)
 {
     Util.Set <string> nameSet = new Util.HashSet <string>();
     foreach (Context currentContext in mentions)
     {
         if (currentContext.NameType != null)
         {
             nameSet.Add(currentContext.NameType);
         }
     }
     return(nameSet);
 }
        private static /*<V, E>*/ Set <V> Bfs <V, E>(List <V> todo, IGraph <V, E> graph, List <V> verticesLeft)
        {
            Set <V> cc = new Util.HashSet <V>();

            while (todo.Count > 0)
            {
                V node = todo.First();
                todo.RemoveAt(0);
                cc.Add(node);
                foreach (V neighbor in graph.GetNeighbors(node))
                {
                    if (verticesLeft.Contains(neighbor))
                    {
                        cc.Add(neighbor);
                        todo.Add(neighbor);
                        verticesLeft.Remove(neighbor);
                    }
                }
            }

            return(cc);
        }
        private Util.Set <string> ReadNames(string nameFile)
        {
            Util.Set <string> names = new Util.HashSet <string>();
#if DNF
            var nameReader = new StreamReader(nameFile, System.Text.Encoding.Default);
#else
            var stream     = new FileStream(nameFile, FileMode.OpenOrCreate);
            var nameReader = new StreamReader(stream, System.Text.Encoding.GetEncoding(0));
#endif
            for (string line = nameReader.ReadLine(); line != null; line = nameReader.ReadLine())
            {
                names.Add(line);
            }
            return(names);
        }
Beispiel #11
0
        private Mention[] CollectMentions(List <IParse> nounPhrases, Dictionary <IParse, IParse> headMap)
        {
            var mentions = new List <Mention>(nounPhrases.Count);

            Util.Set <IParse> recentMentions = new Util.HashSet <IParse>();
            for (int nounPhraseIndex = 0; nounPhraseIndex < nounPhrases.Count; nounPhraseIndex++)
            {
                IParse nounPhrase = nounPhrases[nounPhraseIndex];
                if (!IsHeadOfExistingMention(nounPhrase, headMap, recentMentions))
                {
                    ClearMentions(recentMentions, nounPhrase);
                    if (!IsPartOfName(nounPhrase))
                    {
                        IParse head   = mHeadFinder.GetLastHead(nounPhrase);
                        var    extent = new Mention(nounPhrase.Span, head.Span, head.EntityId, nounPhrase, null);
                        mentions.Add(extent);
                        recentMentions.Add(nounPhrase);
                        // determine name-entity type
                        string entityType = GetEntityType(mHeadFinder.GetHeadToken(head));
                        if (entityType != null)
                        {
                            extent.NameType = entityType;
                        }
                    }
                }
                if (IsBasalNounPhrase(nounPhrase))
                {
                    if (mPrenominalNamedEntitiesCollection)
                    {
                        CollectPrenominalNamedEntities(nounPhrase, mentions);
                    }
                    if (mCoordinatedNounPhrasesCollection)
                    {
                        CollectCoordinatedNounPhraseMentions(nounPhrase, mentions);
                    }
                    CollectPossessivePronouns(nounPhrase, mentions);
                }
                else
                {
                    // Could use to get NP -> tokens CON structures for basal nps including NP -> NAC tokens
                    //collectComplexNounPhrases(np,mentions);
                }
            }

            mentions.Sort();
            RemoveDuplicates(mentions);
            return(mentions.ToArray());
        }
        /// <summary>
        /// Given a {@code Tree} node {@code t}, attempts to
        /// return a list of nodes to which node {@code t} has this
        /// grammatical relation, with {@code t} as the governor.
        /// </summary>
        /// <param name="t">Target for finding dependents of t related by this GR</param>
        /// <param name="root">The root of the Tree</param>
        /// <returns>A Collection of dependent nodes to which t bears this GR</returns>
        public ICollection <TreeGraphNode> GetRelatedNodes(TreeGraphNode t, TreeGraphNode root, IHeadFinder headFinder)
        {
            Set <TreeGraphNode> nodeList = new Util.HashSet <TreeGraphNode>();

            foreach (TregexPattern p in targetPatterns)
            {
                // cdm: I deleted: && nodeList.isEmpty()
                // Initialize the TregexMatcher with the HeadFinder so that we
                // can use the same HeadFinder through the entire process of
                // building the dependencies
                TregexMatcher m = p.Matcher(root, headFinder);
                while (m.FindAt(t))
                {
                    var target = (TreeGraphNode)m.GetNode("target");
                    if (target == null)
                    {
                        throw new InvalidDataException("Expression has no target: " + p);
                    }
                    nodeList.Add(target);
                }
            }
            return(nodeList);
        }
        /// <summary>
        /// Returns the deepest shared parent of this node and the specified node.
        /// If the nodes are identical then their parent is returned.
        /// If one node is the parent of the other then the parent node is returned.
        /// </summary>
        /// <param name="node">
        /// The node from which parents are compared to this node's parents.
        /// </param>
        /// <returns>
        /// the deepest shared parent of this node and the specified node.
        /// </returns>
        public virtual Parse GetCommonParent(Parse node)
        {
            if (this == node)
            {
                return(this.Parent);
            }
            Util.HashSet <Parse> parents = new Util.HashSet <Parse>();
            Parse parentParse            = this;

            while (parentParse != null)
            {
                parents.Add(parentParse);
                parentParse = parentParse.Parent;
            }
            while (node != null)
            {
                if (parents.Contains(node))
                {
                    return(node);
                }
                node = node.Parent;
            }
            return(null);
        }
        private Mention[] CollectMentions(List<IParse> nounPhrases, Dictionary<IParse, IParse> headMap)
		{
            var mentions = new List<Mention>(nounPhrases.Count);
			Util.Set<IParse> recentMentions = new Util.HashSet<IParse>();
			for (int nounPhraseIndex = 0; nounPhraseIndex < nounPhrases.Count; nounPhraseIndex++)
			{
				IParse nounPhrase = nounPhrases[nounPhraseIndex];
				if (!IsHeadOfExistingMention(nounPhrase, headMap, recentMentions))
				{
					ClearMentions(recentMentions, nounPhrase);
					if (!IsPartOfName(nounPhrase))
					{
						IParse head = mHeadFinder.GetLastHead(nounPhrase);
						var extent = new Mention(nounPhrase.Span, head.Span, head.EntityId, nounPhrase, null);
						mentions.Add(extent);
						recentMentions.Add(nounPhrase);
						// determine name-entity type
						string entityType = GetEntityType(mHeadFinder.GetHeadToken(head));
						if (entityType != null)
						{
							extent.NameType = entityType;
						}
					}
				}
				if (IsBasalNounPhrase(nounPhrase))
				{
					if (mPrenominalNamedEntitiesCollection)
					{
						CollectPrenominalNamedEntities(nounPhrase, mentions);
					}
					if (mCoordinatedNounPhrasesCollection)
					{
						CollectCoordinatedNounPhraseMentions(nounPhrase, mentions);
					}
					CollectPossessivePronouns(nounPhrase, mentions);
				}
				else
				{
					// Could use to get NP -> tokens CON structures for basal nps including NP -> NAC tokens
					//collectComplexNounPhrases(np,mentions);
				}
			}

            mentions.Sort(); 
			RemoveDuplicates(mentions);
			return mentions.ToArray();
		}
        private Mention[] CollectMentions(List<IParse> nounPhrases, Dictionary<IParse, IParse> headMap)
        {
            List<Mention> mentions = new List<Mention>(nounPhrases.Count);
            Util.Set<IParse> recentMentions = new Util.HashSet<IParse>();
            //System.err.println("AbtractMentionFinder.collectMentions: "+headMap);
            for (int nounPhraseIndex = 0; nounPhraseIndex < nounPhrases.Count; nounPhraseIndex++)
            {
                IParse nounPhrase = nounPhrases[nounPhraseIndex];
                //System.err.println("AbstractMentionFinder: collectMentions: np[" + npi + "]=" + np + " head=" + headMap.get(np));
                if (!IsHeadOfExistingMention(nounPhrase, headMap, recentMentions))
                {
                    ClearMentions(recentMentions, nounPhrase);
                    if (!IsPartOfName(nounPhrase))
                    {
                        IParse head = mHeadFinder.GetLastHead(nounPhrase);
                        Mention extent = new Mention(nounPhrase.Span, head.Span, head.EntityId, nounPhrase, null);
                        //System.err.println("adding "+np+" with head "+head);
                        mentions.Add(extent);
                        recentMentions.Add(nounPhrase);
                        // determine name-entity type
                        string entityType = GetEntityType(mHeadFinder.GetHeadToken(head));
                        if (entityType != null)
                        {
                            extent.NameType = entityType;
                        }
                    }
                    else
                    {
                        //System.err.println("AbstractMentionFinder.collectMentions excluding np as part of name. np=" + np);
                    }
                }
                else
                {
                    //System.err.println("AbstractMentionFinder.collectMentions excluding np as head of previous mention. np=" + np);
                }
                if (IsBasalNounPhrase(nounPhrase))
                {
                    if (mPrenominalNamedEntitiesCollection)
                    {
                        CollectPrenominalNamedEntities(nounPhrase, mentions);
                    }
                    if (mCoordinatedNounPhrasesCollection)
                    {
                        CollectCoordinatedNounPhraseMentions(nounPhrase, mentions);
                    }
                    CollectPossessivePronouns(nounPhrase, mentions);
                }
                else
                {
                    // Could use to get NP -> tokens CON structures for basal nps including NP -> NAC tokens
                    //collectComplexNounPhrases(np,mentions);
                }
            }

            mentions.Sort();
            RemoveDuplicates(mentions);
            return mentions.ToArray();
        }
        /// <summary>
        /// Returns string-match features for the the specified mention and entity.</summary>
        /// <param name="mention">
        /// The mention.
        /// </param>
        /// <param name="entity">
        /// The entity.
        /// </param>
        /// <returns>
        /// list of string-match features for the the specified mention and entity.
        /// </returns>
        protected internal virtual List<string> GetStringMatchFeatures(Mention.MentionContext mention, DiscourseEntity entity)
        {
            bool sameHead = false;
            bool modifersMatch = false;
            bool titleMatch = false;
            bool noTheModifiersMatch = false;
            List<string> features = new List<string>();
            Mention.IParse[] mentionTokens = mention.TokenParses;
            OpenNLP.Tools.Util.Set<string> entityContextModifierSet = ConstructModifierSet(mentionTokens, mention.HeadTokenIndex);
            string mentionHeadString = mention.HeadTokenText.ToLower();
            Util.Set<string> featureSet = new Util.HashSet<string>();

            foreach (Mention.MentionContext entityMention in entity.Mentions)
            {
                string exactMatchFeature = GetExactMatchFeature(entityMention, mention);
                if (exactMatchFeature != null)
                {
                    featureSet.Add(exactMatchFeature);
                }
                else if (entityMention.Parse.IsCoordinatedNounPhrase && !mention.Parse.IsCoordinatedNounPhrase)
                {
                    featureSet.Add("cmix");
                }
                else
                {
                    string mentionStrip = StripNounPhrase(mention);
                    string entityMentionStrip = StripNounPhrase(entityMention);
                    if (mentionStrip != null && entityMentionStrip != null)
                    {
                        if (IsSubstring(mentionStrip, entityMentionStrip))
                        {
                            featureSet.Add("substring");
                        }
                    }
                }
                Mention.IParse[] entityMentionTokens = entityMention.TokenParses;
                int headIndex = entityMention.HeadTokenIndex;
                //if (!mention.getHeadTokenTag().equals(entityMention.getHeadTokenTag())) {
                //  //System.err.println("skipping "+mention.headTokenText+" with "+xec.headTokenText+" because "+mention.headTokenTag+" != "+xec.headTokenTag);
                //  continue;
                //}  want to match NN NNP
                string entityMentionHeadString = entityMention.HeadTokenText.ToLower();
                // model lexical similarity
                if (mentionHeadString == entityMentionHeadString)
                {
                    sameHead = true;
                    featureSet.Add("hds=" + mentionHeadString);
                    if (!modifersMatch || !noTheModifiersMatch)
                    {
                        //only check if we haven't already found one which is the same
                        modifersMatch = true;
                        noTheModifiersMatch = true;
                        Util.Set<string> entityMentionModifierSet = ConstructModifierSet(entityMentionTokens, headIndex);
                        foreach (string modifierWord in entityContextModifierSet)
                        {
                            if (!entityMentionModifierSet.Contains(modifierWord))
                            {
                                modifersMatch = false;
                                if (modifierWord != "the")
                                {
                                    noTheModifiersMatch = false;
                                    featureSet.Add("mmw=" + modifierWord);
                                }
                            }
                        }
                    }
                }
                Util.Set<string> descriptorModifierSet = ConstructModifierSet(entityMentionTokens, entityMention.NonDescriptorStart);
                if (descriptorModifierSet.Contains(mentionHeadString))
                {
                    titleMatch = true;
                }
            }
            if (!(featureSet.Count == 0))
            {
                features.AddRange(featureSet);
            }
            if (sameHead)
            {
                features.Add("sameHead");
                if (modifersMatch)
                {
                    features.Add("modsMatch");
                }
                else if (noTheModifiersMatch)
                {
                    features.Add("nonTheModsMatch");
                }
                else
                {
                    features.Add("modsMisMatch");
                }
            }
            if (titleMatch)
            {
                features.Add("titleMatch");
            }
            return features;
        }
 private Util.Set<string> ConstructModifierSet(Mention.IParse[] tokens, int headIndex)
 {
     Util.Set<string> modifierSet = new Util.HashSet<string>();
     for (int tokenIndex = 0; tokenIndex < headIndex; tokenIndex++)
     {
         Mention.IParse token = tokens[tokenIndex];
         modifierSet.Add(token.ToString().ToLower());
     }
     return modifierSet;
 }
Beispiel #18
0
 private static Util.Set<string> GetSynsetSet(Context context)
 {
     Util.Set<string> synsetSet = new Util.HashSet<string>();
     string[] lemmas = GetLemmas(context);
     Mention.IDictionary dictionary = Mention.DictionaryFactory.GetDictionary();
     //System.err.println(lemmas.length+" lemmas for "+c.headToken);
     foreach (string lemma in lemmas)
     {
         synsetSet.Add(dictionary.GetSenseKey(lemma, "NN", 0));
         string[] synsets = dictionary.GetParentSenseKeys(lemma, "NN", 0);
         for (int currentSynset = 0, sn = synsets.Length; currentSynset < sn; currentSynset++)
         {
             synsetSet.Add(synsets[currentSynset]);
         }
     }
     return synsetSet;
 }
Beispiel #19
0
		private static Util.Set<string> GetSynsetSet(Context context)
		{
			Util.Set<string> synsetSet = new Util.HashSet<string>();
			string[] lemmas = GetLemmas(context);
            Mention.IDictionary dictionary = Mention.DictionaryFactory.GetDictionary();
			foreach (string lemma in lemmas)
			{
			    synsetSet.Add(dictionary.GetSenseKey(lemma, PartsOfSpeech.NounSingularOrMass, 0));
                string[] synsets = dictionary.GetParentSenseKeys(lemma, PartsOfSpeech.NounSingularOrMass, 0);
			    for (int currentSynset = 0, sn = synsets.Length; currentSynset < sn; currentSynset++)
			    {
			        synsetSet.Add(synsets[currentSynset]);
			    }
			}
			return synsetSet;
		}
Beispiel #20
0
        /// <summary>
        /// Returns string-match features for the the specified mention and entity.</summary>
        /// <param name="mention">
        /// The mention.
        /// </param>
        /// <param name="entity">
        /// The entity.
        /// </param>
        /// <returns>
        /// list of string-match features for the the specified mention and entity.
        /// </returns>
        protected internal virtual List <string> GetStringMatchFeatures(Mention.MentionContext mention, DiscourseEntity entity)
        {
            var sameHead                 = false;
            var modifersMatch            = false;
            var titleMatch               = false;
            var noTheModifiersMatch      = false;
            var features                 = new List <string>();
            var mentionTokens            = mention.TokenParses;
            var entityContextModifierSet = ConstructModifierSet(mentionTokens, mention.HeadTokenIndex);
            var mentionHeadString        = mention.HeadTokenText.ToLower();

            Util.Set <string> featureSet = new Util.HashSet <string>();

            foreach (var entityMention in entity.Mentions)
            {
                var exactMatchFeature = GetExactMatchFeature(entityMention, mention);
                if (exactMatchFeature != null)
                {
                    featureSet.Add(exactMatchFeature);
                }
                else if (entityMention.Parse.IsCoordinatedNounPhrase && !mention.Parse.IsCoordinatedNounPhrase)
                {
                    featureSet.Add("cmix");
                }
                else
                {
                    var mentionStrip       = StripNounPhrase(mention);
                    var entityMentionStrip = StripNounPhrase(entityMention);
                    if (mentionStrip != null && entityMentionStrip != null)
                    {
                        if (IsSubstring(mentionStrip, entityMentionStrip))
                        {
                            featureSet.Add("substring");
                        }
                    }
                }
                var entityMentionTokens = entityMention.TokenParses;
                var headIndex           = entityMention.HeadTokenIndex;
                //if (!mention.getHeadTokenTag().equals(entityMention.getHeadTokenTag())) {
                //  continue;
                //}  want to match NN NNP
                var entityMentionHeadString = entityMention.HeadTokenText.ToLower();
                // model lexical similarity
                if (mentionHeadString == entityMentionHeadString)
                {
                    sameHead = true;
                    featureSet.Add("hds=" + mentionHeadString);
                    if (!modifersMatch || !noTheModifiersMatch)
                    {
                        //only check if we haven't already found one which is the same
                        modifersMatch       = true;
                        noTheModifiersMatch = true;
                        var entityMentionModifierSet = ConstructModifierSet(entityMentionTokens, headIndex);
                        foreach (var modifierWord in entityContextModifierSet)
                        {
                            if (!entityMentionModifierSet.Contains(modifierWord))
                            {
                                modifersMatch = false;
                                if (modifierWord != "the")
                                {
                                    noTheModifiersMatch = false;
                                    featureSet.Add("mmw=" + modifierWord);
                                }
                            }
                        }
                    }
                }
                var descriptorModifierSet = ConstructModifierSet(entityMentionTokens, entityMention.NonDescriptorStart);
                if (descriptorModifierSet.Contains(mentionHeadString))
                {
                    titleMatch = true;
                }
            }
            if (featureSet.Count != 0)
            {
                features.AddRange(featureSet);
            }
            if (sameHead)
            {
                features.Add("sameHead");
                if (modifersMatch)
                {
                    features.Add("modsMatch");
                }
                else if (noTheModifiersMatch)
                {
                    features.Add("nonTheModsMatch");
                }
                else
                {
                    features.Add("modsMisMatch");
                }
            }
            if (titleMatch)
            {
                features.Add("titleMatch");
            }
            return(features);
        }
Beispiel #21
0
 /// <summary>
 /// Returns the deepest shared parent of this node and the specified node. 
 /// If the nodes are identical then their parent is returned.  
 /// If one node is the parent of the other then the parent node is returned.
 /// </summary>
 /// <param name="node">
 /// The node from which parents are compared to this node's parents.
 /// </param>
 /// <returns>
 /// the deepest shared parent of this node and the specified node.
 /// </returns>
 public virtual Parse GetCommonParent(Parse node)
 {
     if (this == node)
     {
         return this.Parent;
     }
     Util.HashSet<Parse> parents = new Util.HashSet<Parse>();
     Parse parentParse = this;
     while (parentParse != null)
     {
         parents.Add(parentParse);
         parentParse = parentParse.Parent;
     }
     while (node != null)
     {
         if (parents.Contains(node))
         {
             return node;
         }
         node = node.Parent;
     }
     return null;
 }
        public static /*<V, E>*/ List <V> GetShortestPath <V, E>(IGraph <V, E> graph, V node1, V node2, bool directionSensitive)
        {
            if (node1.Equals(node2))
            {
                //return Collections.singletonList(node2);
                return(new List <V>()
                {
                    node2
                });
            }

            Set <V> visited        = new Util.HashSet <V>();
            var     previous       = new Dictionary <V, V>();
            var     unsettledNodes = new BinaryHeapPriorityQueue <V>();

            unsettledNodes.Add(node1, 0);

            while (unsettledNodes.Size() > 0)
            {
                var distance = unsettledNodes.GetPriority();
                var u        = unsettledNodes.RemoveFirst();
                visited.Add(u);

                if (u.Equals(node2))
                {
                    break;
                }

                unsettledNodes.Remove(u);

                var candidates = ((directionSensitive) ? graph.GetChildren(u) : new ReadOnlyCollection <V>(graph.GetNeighbors(u)));
                foreach (var candidate in candidates)
                {
                    var alt = distance - 1;
                    // nodes not already present will have a priority of -inf
                    if (alt > unsettledNodes.GetPriority(candidate) && !visited.Contains(candidate))
                    {
                        unsettledNodes.RelaxPriority(candidate, alt);
                        previous[candidate] = u;
                    }
                }
            }

            if (!previous.ContainsKey(node2))
            {
                return(null);
            }
            var path = new List <V>
            {
                node2
            };
            var n = node2;

            while (previous.ContainsKey(n))
            {
                path.Add(previous[n]);
                n = previous[n];
            }
            path.Reverse();
            return(path);
        }
        /// <summary>
        /// Constructs a set of entities which may be semantically compatible with the entity indicated by
        /// the specified entityKey.
        /// </summary>
        /// <param name="entityKey">
        /// The key of the entity for which the set is being constructed. 
        /// </param>
        /// <param name="entities">
        /// A mapping between entity keys and their mentions. 
        /// </param>
        /// <param name="headSets">
        /// A mapping between entity keys and their head sets.
        /// </param>
        /// <param name="nameSets">
        /// A mapping between entity keys and their name sets.
        /// </param>
        /// <param name="singletons">
        /// A list of all entities which consists of a single mention.
        /// </param>
        /// <returns>
        /// A set of mentions for all the entities which might be semantically compatible 
        /// with entity indicated by the specified key. 
        /// </returns>
        private Util.Set<Context> ConstructExclusionSet(int entityKey, Util.HashList<int, Context> entities, Dictionary<int, Util.Set<string>> headSets, Dictionary<int, Util.Set<string>> nameSets, List<Context> singletons)
        {
            Util.Set<Context> exclusionSet = new Util.HashSet<Context>();
            Util.Set<string> entityHeadSet = headSets[entityKey];
            Util.Set<string> entityNameSet = nameSets[entityKey];
            List<Context> entityContexts = entities[entityKey];

            //entities
            foreach (int key in entities.Keys)
            {
                List<Context> candidateContexts = entities[key];

                if (key == entityKey)
                {
                    exclusionSet.AddAll(candidateContexts);
                }
                else if (nameSets[key].Count == 0)
                {
                    exclusionSet.AddAll(candidateContexts);
                }
                else if (HasSameHead(entityHeadSet, headSets[key]))
                {
                    exclusionSet.AddAll(candidateContexts);
                }
                else if (HasSameNameType(entityNameSet, nameSets[key]))
                {
                    exclusionSet.AddAll(candidateContexts);
                }
                else if (HasSuperClass(entityContexts, candidateContexts))
                {
                    exclusionSet.AddAll(candidateContexts);
                }
            }

            //singles
            List<Context> singles = new List<Context>(1);
            foreach (Context currentSingleton in singletons)
            {
                singles.Clear();
                singles.Add(currentSingleton);
                if (entityHeadSet.Contains(currentSingleton.HeadTokenText.ToLower()))
                {
                    exclusionSet.Add(currentSingleton);
                }
                else if (currentSingleton.NameType == null)
                {
                    exclusionSet.Add(currentSingleton);
                }
                else if (entityNameSet.Contains(currentSingleton.NameType))
                {
                    exclusionSet.Add(currentSingleton);
                }
                else if (HasSuperClass(entityContexts, singles))
                {
                    exclusionSet.Add(currentSingleton);
                }
            }
            return exclusionSet;
        }
 /// <summary> 
 /// Produces a set of head words for the specified list of mentions.
 /// </summary>
 /// <param name="mentions">
 /// The mentions to use to construct the 
 /// </param>
 /// <returns> 
 /// A set containing the head words of the sepecified mentions.
 /// </returns>
 private Util.Set<string> ConstructHeadSet(List<Context> mentions)
 {
     Util.Set<string> headSet = new Util.HashSet<string>();
     foreach (Context currentContext in mentions)
     {
         headSet.Add(currentContext.HeadTokenText.ToLower());
     }
     return headSet;
 }
Beispiel #25
0
        private Util.Set<string> ReadNames(string nameFile)
		{
			Util.Set<string> names = new Util.HashSet<string>();
			
            var nameReader = new StreamReader(nameFile, System.Text.Encoding.Default);
			for (string line = nameReader.ReadLine(); line != null; line = nameReader.ReadLine())
			{
				names.Add(line);
			}
			return names;
		}
 /// <summary> 
 /// Produces the set of name types associated with each of the specified mentions.
 /// </summary>
 /// <param name="mentions">
 /// A list of mentions.
 /// </param>
 /// <returns>
 /// A set of name types assigned to the specified mentions.
 /// </returns>
 private Util.Set<string> ConstructNameSet(List<Context> mentions)
 {
     Util.Set<string> nameSet = new Util.HashSet<string>();
     foreach (Context currentContext in mentions)
     {
         if (currentContext.NameType != null)
         {
             nameSet.Add(currentContext.NameType);
         }
     }
     return nameSet;
 }
Beispiel #27
0
        /// <summary>
        /// Returns string-match features for the the specified mention and entity.</summary>
        /// <param name="mention">
        /// The mention.
        /// </param>
        /// <param name="entity">
        /// The entity.
        /// </param>
        /// <returns>
        /// list of string-match features for the the specified mention and entity.
        /// </returns>
        protected internal virtual List <string> GetStringMatchFeatures(Mention.MentionContext mention, DiscourseEntity entity)
        {
            bool          sameHead            = false;
            bool          modifersMatch       = false;
            bool          titleMatch          = false;
            bool          noTheModifiersMatch = false;
            List <string> features            = new List <string>();

            Mention.IParse[] mentionTokens = mention.TokenParses;
            OpenNLP.Tools.Util.Set <string> entityContextModifierSet = ConstructModifierSet(mentionTokens, mention.HeadTokenIndex);
            string mentionHeadString = mention.HeadTokenText.ToLower();

            Util.Set <string> featureSet = new Util.HashSet <string>();

            foreach (Mention.MentionContext entityMention in entity.Mentions)
            {
                string exactMatchFeature = GetExactMatchFeature(entityMention, mention);
                if (exactMatchFeature != null)
                {
                    featureSet.Add(exactMatchFeature);
                }
                else if (entityMention.Parse.IsCoordinatedNounPhrase && !mention.Parse.IsCoordinatedNounPhrase)
                {
                    featureSet.Add("cmix");
                }
                else
                {
                    string mentionStrip       = StripNounPhrase(mention);
                    string entityMentionStrip = StripNounPhrase(entityMention);
                    if (mentionStrip != null && entityMentionStrip != null)
                    {
                        if (IsSubstring(mentionStrip, entityMentionStrip))
                        {
                            featureSet.Add("substring");
                        }
                    }
                }
                Mention.IParse[] entityMentionTokens = entityMention.TokenParses;
                int headIndex = entityMention.HeadTokenIndex;
                //if (!mention.getHeadTokenTag().equals(entityMention.getHeadTokenTag())) {
                //  //System.err.println("skipping "+mention.headTokenText+" with "+xec.headTokenText+" because "+mention.headTokenTag+" != "+xec.headTokenTag);
                //  continue;
                //}  want to match NN NNP
                string entityMentionHeadString = entityMention.HeadTokenText.ToLower();
                // model lexical similarity
                if (mentionHeadString == entityMentionHeadString)
                {
                    sameHead = true;
                    featureSet.Add("hds=" + mentionHeadString);
                    if (!modifersMatch || !noTheModifiersMatch)
                    {
                        //only check if we haven't already found one which is the same
                        modifersMatch       = true;
                        noTheModifiersMatch = true;
                        Util.Set <string> entityMentionModifierSet = ConstructModifierSet(entityMentionTokens, headIndex);
                        foreach (string modifierWord in entityContextModifierSet)
                        {
                            if (!entityMentionModifierSet.Contains(modifierWord))
                            {
                                modifersMatch = false;
                                if (modifierWord != "the")
                                {
                                    noTheModifiersMatch = false;
                                    featureSet.Add("mmw=" + modifierWord);
                                }
                            }
                        }
                    }
                }
                Util.Set <string> descriptorModifierSet = ConstructModifierSet(entityMentionTokens, entityMention.NonDescriptorStart);
                if (descriptorModifierSet.Contains(mentionHeadString))
                {
                    titleMatch = true;
                }
            }
            if (!(featureSet.Count == 0))
            {
                features.AddRange(featureSet);
            }
            if (sameHead)
            {
                features.Add("sameHead");
                if (modifersMatch)
                {
                    features.Add("modsMatch");
                }
                else if (noTheModifiersMatch)
                {
                    features.Add("nonTheModsMatch");
                }
                else
                {
                    features.Add("modsMisMatch");
                }
            }
            if (titleMatch)
            {
                features.Add("titleMatch");
            }
            return(features);
        }
        private static void TreatCc(List<TypedDependency> list)
        {
            // Construct a map from tree nodes to the set of typed
            // dependencies in which the node appears as dependent.
            var map = new Dictionary<IndexedWord, Set<TypedDependency>>();
            // Construct a map of tree nodes being governor of a subject grammatical
            // relation to that relation
            var subjectMap = new Dictionary<IndexedWord, TypedDependency>();
            // Construct a set of TreeGraphNodes with a passive auxiliary on them
            Set<IndexedWord> withPassiveAuxiliary = new Util.HashSet<IndexedWord>();
            // Construct a map of tree nodes being governor of an object grammatical
            // relation to that relation
            // Map<TreeGraphNode, TypedDependency> objectMap = new
            // HashMap<TreeGraphNode, TypedDependency>();

            var rcmodHeads = new List<IndexedWord>();
            var prepcDep = new List<IndexedWord>();

            foreach (TypedDependency typedDep in list)
            {
                if (!map.ContainsKey(typedDep.Dep))
                {
                    // NB: Here and in other places below, we use a TreeSet (which extends
                    // SortedSet) to guarantee that results are deterministic)
                    map.Add(typedDep.Dep, new TreeSet<TypedDependency>());
                }
                map[typedDep.Dep].Add(typedDep);

                if (typedDep.Reln.Equals(EnglishGrammaticalRelations.AuxPassiveModifier))
                {
                    withPassiveAuxiliary.Add(typedDep.Gov);
                }

                // look for subjects
                if (typedDep.Reln.GetParent() == EnglishGrammaticalRelations.NominalSubject ||
                    typedDep.Reln.GetParent() == EnglishGrammaticalRelations.Subject ||
                    typedDep.Reln.GetParent() == EnglishGrammaticalRelations.ClausalSubject)
                {
                    if (!subjectMap.ContainsKey(typedDep.Gov))
                    {
                        subjectMap.Add(typedDep.Gov, typedDep);
                    }
                }

                // look for objects
                // this map was only required by the code commented out below, so comment
                // it out too
                // if (typedDep.reln() == DIRECT_OBJECT) {
                // if (!objectMap.containsKey(typedDep.gov())) {
                // objectMap.put(typedDep.gov(), typedDep);
                // }
                // }

                // look for rcmod relations
                if (typedDep.Reln == EnglishGrammaticalRelations.RelativeClauseModifier)
                {
                    rcmodHeads.Add(typedDep.Gov);
                }
                // look for prepc relations: put the dependent of such a relation in the
                // list
                // to avoid wrong propagation of dobj
                if (typedDep.Reln.ToString().StartsWith("prepc"))
                {
                    prepcDep.Add(typedDep.Dep);
                }
            }
            
            // create a new list of typed dependencies
            var newTypedDeps = new List<TypedDependency>(list);

            // find typed deps of form conj(gov,dep)
            foreach (TypedDependency td in list)
            {
                if (EnglishGrammaticalRelations.GetConjs().Contains(td.Reln))
                {
                    IndexedWord gov = td.Gov;
                    IndexedWord dep = td.Dep;

                    // look at the dep in the conjunct
                    Set<TypedDependency> govRelations = map[gov];
                    if (govRelations != null)
                    {
                        foreach (TypedDependency td1 in govRelations)
                        {
                            IndexedWord newGov = td1.Gov;
                            // in the case of errors in the basic dependencies, it
                            // is possible to have overlapping newGov & dep
                            if (newGov.Equals(dep))
                            {
                                continue;
                            }
                            GrammaticalRelation newRel = td1.Reln;
                            if (newRel != GrammaticalRelation.Root)
                            {
                                if (rcmodHeads.Contains(gov) && rcmodHeads.Contains(dep))
                                {
                                    // to prevent wrong propagation in the case of long dependencies in relative clauses
                                    if (newRel != EnglishGrammaticalRelations.DirectObject &&
                                        newRel != EnglishGrammaticalRelations.NominalSubject)
                                    {
                                        newTypedDeps.Add(new TypedDependency(newRel, newGov, dep));
                                    }
                                }
                                else
                                {
                                    newTypedDeps.Add(new TypedDependency(newRel, newGov, dep));
                                }
                            }
                        }
                    }

                    // propagate subjects
                    // look at the gov in the conjunct: if it is has a subject relation,
                    // the dep is a verb and the dep doesn't have a subject relation
                    // then we want to add a subject relation for the dep.
                    // (By testing for the dep to be a verb, we are going to miss subject of
                    // copular verbs! but
                    // is it safe to relax this assumption?? i.e., just test for the subject
                    // part)
                    // CDM 2008: I also added in JJ, since participial verbs are often
                    // tagged JJ
                    string tag = dep.Tag();
                    if (subjectMap.ContainsKey(gov) && (PartsOfSpeech.IsVerb(tag) || PartsOfSpeech.IsAdjective(tag)) &&
                        ! subjectMap.ContainsKey(dep))
                    {
                        TypedDependency tdsubj = subjectMap[gov];
                        // check for wrong nsubjpass: if the new verb is VB or VBZ or VBP or JJ, then
                        // add nsubj (if it is tagged correctly, should do this for VBD too, but we don't)
                        GrammaticalRelation relation = tdsubj.Reln;
                        if (relation == EnglishGrammaticalRelations.NominalPassiveSubject)
                        {
                            if (IsDefinitelyActive(tag))
                            {
                                relation = EnglishGrammaticalRelations.NominalSubject;
                            }
                        }
                        else if (relation == EnglishGrammaticalRelations.ClausalPassiveSubject)
                        {
                            if (IsDefinitelyActive(tag))
                            {
                                relation = EnglishGrammaticalRelations.ClausalSubject;
                            }
                        }
                        else if (relation == EnglishGrammaticalRelations.NominalSubject)
                        {
                            if (withPassiveAuxiliary.Contains(dep))
                            {
                                relation = EnglishGrammaticalRelations.NominalPassiveSubject;
                            }
                        }
                        else if (relation == EnglishGrammaticalRelations.ClausalSubject)
                        {
                            if (withPassiveAuxiliary.Contains(dep))
                            {
                                relation = EnglishGrammaticalRelations.ClausalPassiveSubject;
                            }
                        }
                        newTypedDeps.Add(new TypedDependency(relation, dep, tdsubj.Dep));
                    }

                    // propagate objects
                    // cdm july 2010: This bit of code would copy a dobj from the first
                    // clause to a later conjoined clause if it didn't
                    // contain its own dobj or prepc. But this is too aggressive and wrong
                    // if the later clause is intransitive
                    // (including passivized cases) and so I think we have to not have this
                    // done always, and see no good "sometimes" heuristic.
                    // IF WE WERE TO REINSTATE, SHOULD ALSO NOT ADD OBJ IF THERE IS A ccomp
                    // (SBAR).
                    // if (objectMap.containsKey(gov) &&
                    // dep.tag().startsWith("VB") && ! objectMap.containsKey(dep)
                    // && ! prepcDep.contains(gov)) {
                    // TypedDependency tdobj = objectMap.get(gov);
                    // newTypedDeps.add(new TypedDependency(tdobj.reln(), dep,
                    // tdobj.dep()));
                    // }
                }
            }
            list.Clear();
            list.AddRange(newTypedDeps);
        }