/// <summary> /// Constructs a set of entities which may be semantically compatible with the entity indicated by /// the specified entityKey. /// </summary> /// <param name="entityKey"> /// The key of the entity for which the set is being constructed. /// </param> /// <param name="entities"> /// A mapping between entity keys and their mentions. /// </param> /// <param name="headSets"> /// A mapping between entity keys and their head sets. /// </param> /// <param name="nameSets"> /// A mapping between entity keys and their name sets. /// </param> /// <param name="singletons"> /// A list of all entities which consists of a single mention. /// </param> /// <returns> /// A set of mentions for all the entities which might be semantically compatible /// with entity indicated by the specified key. /// </returns> private Util.Set <Context> ConstructExclusionSet(int entityKey, Util.HashList <int, Context> entities, Dictionary <int, Util.Set <string> > headSets, Dictionary <int, Util.Set <string> > nameSets, List <Context> singletons) { Util.Set <Context> exclusionSet = new Util.HashSet <Context>(); Util.Set <string> entityHeadSet = headSets[entityKey]; Util.Set <string> entityNameSet = nameSets[entityKey]; List <Context> entityContexts = entities[entityKey]; //entities foreach (int key in entities.Keys) { List <Context> candidateContexts = entities[key]; if (key == entityKey) { exclusionSet.AddAll(candidateContexts); } else if (nameSets[key].Count == 0) { exclusionSet.AddAll(candidateContexts); } else if (HasSameHead(entityHeadSet, headSets[key])) { exclusionSet.AddAll(candidateContexts); } else if (HasSameNameType(entityNameSet, nameSets[key])) { exclusionSet.AddAll(candidateContexts); } else if (HasSuperClass(entityContexts, candidateContexts)) { exclusionSet.AddAll(candidateContexts); } } //singles List <Context> singles = new List <Context>(1); foreach (Context currentSingleton in singletons) { singles.Clear(); singles.Add(currentSingleton); if (entityHeadSet.Contains(currentSingleton.HeadTokenText.ToLower())) { exclusionSet.Add(currentSingleton); } else if (currentSingleton.NameType == null) { exclusionSet.Add(currentSingleton); } else if (entityNameSet.Contains(currentSingleton.NameType)) { exclusionSet.Add(currentSingleton); } else if (HasSuperClass(entityContexts, singles)) { exclusionSet.Add(currentSingleton); } } return(exclusionSet); }
private Mention[] CollectMentions(List <IParse> nounPhrases, Dictionary <IParse, IParse> headMap) { List <Mention> mentions = new List <Mention>(nounPhrases.Count); Util.Set <IParse> recentMentions = new Util.HashSet <IParse>(); //System.err.println("AbtractMentionFinder.collectMentions: "+headMap); for (int nounPhraseIndex = 0; nounPhraseIndex < nounPhrases.Count; nounPhraseIndex++) { IParse nounPhrase = nounPhrases[nounPhraseIndex]; //System.err.println("AbstractMentionFinder: collectMentions: np[" + npi + "]=" + np + " head=" + headMap.get(np)); if (!IsHeadOfExistingMention(nounPhrase, headMap, recentMentions)) { ClearMentions(recentMentions, nounPhrase); if (!IsPartOfName(nounPhrase)) { IParse head = mHeadFinder.GetLastHead(nounPhrase); Mention extent = new Mention(nounPhrase.Span, head.Span, head.EntityId, nounPhrase, null); //System.err.println("adding "+np+" with head "+head); mentions.Add(extent); recentMentions.Add(nounPhrase); // determine name-entity type string entityType = GetEntityType(mHeadFinder.GetHeadToken(head)); if (entityType != null) { extent.NameType = entityType; } } else { //System.err.println("AbstractMentionFinder.collectMentions excluding np as part of name. np=" + np); } } else { //System.err.println("AbstractMentionFinder.collectMentions excluding np as head of previous mention. np=" + np); } if (IsBasalNounPhrase(nounPhrase)) { if (mPrenominalNamedEntitiesCollection) { CollectPrenominalNamedEntities(nounPhrase, mentions); } if (mCoordinatedNounPhrasesCollection) { CollectCoordinatedNounPhraseMentions(nounPhrase, mentions); } CollectPossessivePronouns(nounPhrase, mentions); } else { // Could use to get NP -> tokens CON structures for basal nps including NP -> NAC tokens //collectComplexNounPhrases(np,mentions); } } mentions.Sort(); RemoveDuplicates(mentions); return(mentions.ToArray()); }
private void initAcronyms(string name) { //UPGRADE_TODO: Class 'java.util.HashMap' was converted to 'System.Collections.Hashtable' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javautilHashMap'" _acroMap = new Hashtable(15000); try { StreamReader str; //if (MaxentResolver.loadAsResource()) //{ //UPGRADE_TODO: The differences in the expected value of parameters for constructor 'java.io.BufferedReader.BufferedReader' may cause compilation errors. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1092'" //UPGRADE_WARNING: At least one expression was used more than once in the target code. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1181'" //UPGRADE_ISSUE: Method 'java.lang.Class.getResourceAsStream' was not converted. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1000_javalangClassgetResourceAsStream_javalangString'" //str = new System.IO.StreamReader(new System.IO.StreamReader(this.GetType().getResourceAsStream(name), System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(this.GetType().getResourceAsStream(name), System.Text.Encoding.Default).CurrentEncoding); //} //else //{ //UPGRADE_TODO: The differences in the expected value of parameters for constructor 'java.io.BufferedReader.BufferedReader' may cause compilation errors. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1092'" //UPGRADE_WARNING: At least one expression was used more than once in the target code. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1181'" //UPGRADE_TODO: Constructor 'java.io.FileReader.FileReader' was converted to 'System.IO.StreamReader' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073'" #if DNF str = new StreamReader(new StreamReader(name, Encoding.Default).BaseStream, new StreamReader(name, Encoding.Default).CurrentEncoding); #else str = new StreamReader( new StreamReader(new FileStream(name, FileMode.OpenOrCreate), Encoding.GetEncoding(0)).BaseStream, new StreamReader(new FileStream(name, FileMode.OpenOrCreate), Encoding.GetEncoding(0)).CurrentEncoding); #endif //} string line; while (null != (line = str.ReadLine())) { var st = new Util.StringTokenizer(line, "\t"); string acro = st.NextToken(); string full = st.NextToken(); var exSet = (Util.Set <string>)_acroMap[acro]; if (exSet == null) { //UPGRADE_TODO: Class 'java.util.HashSet' was converted to 'SupportClass.HashSetSupport' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javautilHashSet'" exSet = new Util.HashSet <string>(); _acroMap[acro] = exSet; } exSet.Add(full); exSet = (Util.Set <string>)_acroMap[full]; if (exSet == null) { //UPGRADE_TODO: Class 'java.util.HashSet' was converted to 'SupportClass.HashSetSupport' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javautilHashSet'" exSet = new Util.HashSet <string>(); _acroMap[full] = exSet; } exSet.Add(acro); } } catch (IOException e) { //UPGRADE_TODO: The equivalent in .NET for method 'java.lang.Throwable.ToString' may return a different value. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1043'" Console.Error.WriteLine("ProperNounResolver.initAcronyms: Acronym Database not found: " + e); } }
/// <summary> /// Produces a set of head words for the specified list of mentions. /// </summary> /// <param name="mentions"> /// The mentions to use to construct the /// </param> /// <returns> /// A set containing the head words of the sepecified mentions. /// </returns> private Util.Set <string> ConstructHeadSet(IEnumerable <Context> mentions) { Util.Set <string> headSet = new Util.HashSet <string>(); foreach (Context currentContext in mentions) { headSet.Add(currentContext.HeadTokenText.ToLower()); } return(headSet); }
private Util.Set <string> ConstructModifierSet(Mention.IParse[] tokens, int headIndex) { Util.Set <string> modifierSet = new Util.HashSet <string>(); for (int tokenIndex = 0; tokenIndex < headIndex; tokenIndex++) { Mention.IParse token = tokens[tokenIndex]; modifierSet.Add(token.ToString().ToLower()); } return(modifierSet); }
private Util.Set <string> ReadNames(string nameFile) { Util.Set <string> names = new Util.HashSet <string>(); System.IO.StreamReader nameReader = new System.IO.StreamReader(nameFile, System.Text.Encoding.Default); for (string line = nameReader.ReadLine(); line != null; line = nameReader.ReadLine()) { names.Add(line); } return(names); }
/// <summary> /// Produces the set of name types associated with each of the specified mentions. /// </summary> /// <param name="mentions"> /// A list of mentions. /// </param> /// <returns> /// A set of name types assigned to the specified mentions. /// </returns> private Util.Set <string> ConstructNameSet(IEnumerable <Context> mentions) { Util.Set <string> nameSet = new Util.HashSet <string>(); foreach (Context currentContext in mentions) { if (currentContext.NameType != null) { nameSet.Add(currentContext.NameType); } } return(nameSet); }
private Util.Set <string> ReadNames(string nameFile) { Util.Set <string> names = new Util.HashSet <string>(); #if DNF var nameReader = new StreamReader(nameFile, System.Text.Encoding.Default); #else var stream = new FileStream(nameFile, FileMode.OpenOrCreate); var nameReader = new StreamReader(stream, System.Text.Encoding.GetEncoding(0)); #endif for (string line = nameReader.ReadLine(); line != null; line = nameReader.ReadLine()) { names.Add(line); } return(names); }
private Mention[] CollectMentions(List <IParse> nounPhrases, Dictionary <IParse, IParse> headMap) { var mentions = new List <Mention>(nounPhrases.Count); Util.Set <IParse> recentMentions = new Util.HashSet <IParse>(); for (int nounPhraseIndex = 0; nounPhraseIndex < nounPhrases.Count; nounPhraseIndex++) { IParse nounPhrase = nounPhrases[nounPhraseIndex]; if (!IsHeadOfExistingMention(nounPhrase, headMap, recentMentions)) { ClearMentions(recentMentions, nounPhrase); if (!IsPartOfName(nounPhrase)) { IParse head = mHeadFinder.GetLastHead(nounPhrase); var extent = new Mention(nounPhrase.Span, head.Span, head.EntityId, nounPhrase, null); mentions.Add(extent); recentMentions.Add(nounPhrase); // determine name-entity type string entityType = GetEntityType(mHeadFinder.GetHeadToken(head)); if (entityType != null) { extent.NameType = entityType; } } } if (IsBasalNounPhrase(nounPhrase)) { if (mPrenominalNamedEntitiesCollection) { CollectPrenominalNamedEntities(nounPhrase, mentions); } if (mCoordinatedNounPhrasesCollection) { CollectCoordinatedNounPhraseMentions(nounPhrase, mentions); } CollectPossessivePronouns(nounPhrase, mentions); } else { // Could use to get NP -> tokens CON structures for basal nps including NP -> NAC tokens //collectComplexNounPhrases(np,mentions); } } mentions.Sort(); RemoveDuplicates(mentions); return(mentions.ToArray()); }
private static Util.Set <string> GetSynsetSet(Context context) { Util.Set <string> synsetSet = new Util.HashSet <string>(); string[] lemmas = GetLemmas(context); Mention.IDictionary dictionary = Mention.DictionaryFactory.GetDictionary(); foreach (string lemma in lemmas) { synsetSet.Add(dictionary.GetSenseKey(lemma, PartsOfSpeech.NounSingularOrMass, 0)); string[] synsets = dictionary.GetParentSenseKeys(lemma, PartsOfSpeech.NounSingularOrMass, 0); for (int currentSynset = 0, sn = synsets.Length; currentSynset < sn; currentSynset++) { synsetSet.Add(synsets[currentSynset]); } } return(synsetSet); }
private static Util.Set <string> GetSynsetSet(Context context) { Util.Set <string> synsetSet = new Util.HashSet <string>(); string[] lemmas = GetLemmas(context); Mention.IDictionary dictionary = Mention.DictionaryFactory.GetDictionary(); //System.err.println(lemmas.length+" lemmas for "+c.headToken); for (int currentLemma = 0; currentLemma < lemmas.Length; currentLemma++) { synsetSet.Add(dictionary.GetSenseKey(lemmas[currentLemma], "NN", 0)); string[] synsets = dictionary.GetParentSenseKeys(lemmas[currentLemma], "NN", 0); for (int currentSynset = 0, sn = synsets.Length; currentSynset < sn; currentSynset++) { synsetSet.Add(synsets[currentSynset]); } } return(synsetSet); }
private void Initialize(Mention.IHeadFinder headFinder) { var head = headFinder.GetLastHead(Parse); var tokenList = head.Tokens; HeadTokenIndex = headFinder.GetHeadIndex(head); var headToken = headFinder.GetHeadToken(head); _tokens = tokenList.ToArray(); HeadTokenTag = headToken.SyntacticType; HeadTokenText = headToken.ToString(); if (PartsOfSpeech.IsNoun(HeadTokenTag) && !PartsOfSpeech.IsProperNoun(HeadTokenTag)) { Synsets = GetSynsetSet(this); } else { Synsets = new Util.HashSet <string>(); } }
private static /*<V, E>*/ Set <V> Bfs <V, E>(List <V> todo, IGraph <V, E> graph, List <V> verticesLeft) { Set <V> cc = new Util.HashSet <V>(); while (todo.Count > 0) { V node = todo.First(); todo.RemoveAt(0); cc.Add(node); foreach (V neighbor in graph.GetNeighbors(node)) { if (verticesLeft.Contains(neighbor)) { cc.Add(neighbor); todo.Add(neighbor); verticesLeft.Remove(neighbor); } } } return(cc); }
/// <summary> /// Given a {@code Tree} node {@code t}, attempts to /// return a list of nodes to which node {@code t} has this /// grammatical relation, with {@code t} as the governor. /// </summary> /// <param name="t">Target for finding dependents of t related by this GR</param> /// <param name="root">The root of the Tree</param> /// <returns>A Collection of dependent nodes to which t bears this GR</returns> public ICollection <TreeGraphNode> GetRelatedNodes(TreeGraphNode t, TreeGraphNode root, IHeadFinder headFinder) { Set <TreeGraphNode> nodeList = new Util.HashSet <TreeGraphNode>(); foreach (TregexPattern p in targetPatterns) { // cdm: I deleted: && nodeList.isEmpty() // Initialize the TregexMatcher with the HeadFinder so that we // can use the same HeadFinder through the entire process of // building the dependencies TregexMatcher m = p.Matcher(root, headFinder); while (m.FindAt(t)) { var target = (TreeGraphNode)m.GetNode("target"); if (target == null) { throw new InvalidDataException("Expression has no target: " + p); } nodeList.Add(target); } } return(nodeList); }
/// <summary> /// Returns the deepest shared parent of this node and the specified node. /// If the nodes are identical then their parent is returned. /// If one node is the parent of the other then the parent node is returned. /// </summary> /// <param name="node"> /// The node from which parents are compared to this node's parents. /// </param> /// <returns> /// the deepest shared parent of this node and the specified node. /// </returns> public virtual Parse GetCommonParent(Parse node) { if (this == node) { return(this.Parent); } Util.HashSet <Parse> parents = new Util.HashSet <Parse>(); Parse parentParse = this; while (parentParse != null) { parents.Add(parentParse); parentParse = parentParse.Parent; } while (node != null) { if (parents.Contains(node)) { return(node); } node = node.Parent; } return(null); }
private void initAcronyms(string name) { //UPGRADE_TODO: Class 'java.util.HashMap' was converted to 'System.Collections.Hashtable' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javautilHashMap'" acroMap = new System.Collections.Hashtable(15000); try { System.IO.StreamReader str; //if (MaxentResolver.loadAsResource()) //{ //UPGRADE_TODO: The differences in the expected value of parameters for constructor 'java.io.BufferedReader.BufferedReader' may cause compilation errors. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1092'" //UPGRADE_WARNING: At least one expression was used more than once in the target code. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1181'" //UPGRADE_ISSUE: Method 'java.lang.Class.getResourceAsStream' was not converted. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1000_javalangClassgetResourceAsStream_javalangString'" //str = new System.IO.StreamReader(new System.IO.StreamReader(this.GetType().getResourceAsStream(name), System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(this.GetType().getResourceAsStream(name), System.Text.Encoding.Default).CurrentEncoding); //} //else //{ //UPGRADE_TODO: The differences in the expected value of parameters for constructor 'java.io.BufferedReader.BufferedReader' may cause compilation errors. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1092'" //UPGRADE_WARNING: At least one expression was used more than once in the target code. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1181'" //UPGRADE_TODO: Constructor 'java.io.FileReader.FileReader' was converted to 'System.IO.StreamReader' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073'" str = new System.IO.StreamReader(new System.IO.StreamReader(name, System.Text.Encoding.Default).BaseStream, new System.IO.StreamReader(name, System.Text.Encoding.Default).CurrentEncoding); //} //System.err.println("Reading acronyms database: " + file + " "); string line; while (null != (line = str.ReadLine())) { Util.StringTokenizer st = new Util.StringTokenizer(line, "\t"); string acro = st.NextToken(); string full = st.NextToken(); Util.Set<string> exSet = (Util.Set<string>) acroMap[acro]; if (exSet == null) { //UPGRADE_TODO: Class 'java.util.HashSet' was converted to 'SupportClass.HashSetSupport' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javautilHashSet'" exSet = new Util.HashSet<string>(); acroMap[acro] = exSet; } exSet.Add(full); exSet = (Util.Set<string>) acroMap[full]; if (exSet == null) { //UPGRADE_TODO: Class 'java.util.HashSet' was converted to 'SupportClass.HashSetSupport' which has a different behavior. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1073_javautilHashSet'" exSet = new Util.HashSet<string>(); acroMap[full] = exSet; } exSet.Add(acro); } } catch (System.IO.IOException e) { //UPGRADE_TODO: The equivalent in .NET for method 'java.lang.Throwable.toString' may return a different value. "ms-help://MS.VSCC.v80/dv_commoner/local/redirect.htm?index='!DefaultContextWindowIndex'&keyword='jlca1043'" System.Console.Error.WriteLine("ProperNounResolver.initAcronyms: Acronym Database not found: " + e); } }
private Util.Set<string> ConstructModifierSet(Mention.IParse[] tokens, int headIndex) { Util.Set<string> modifierSet = new Util.HashSet<string>(); for (int tokenIndex = 0; tokenIndex < headIndex; tokenIndex++) { Mention.IParse token = tokens[tokenIndex]; modifierSet.Add(token.ToString().ToLower()); } return modifierSet; }
/// <summary> /// Returns string-match features for the the specified mention and entity.</summary> /// <param name="mention"> /// The mention. /// </param> /// <param name="entity"> /// The entity. /// </param> /// <returns> /// list of string-match features for the the specified mention and entity. /// </returns> protected internal virtual List<string> GetStringMatchFeatures(Mention.MentionContext mention, DiscourseEntity entity) { bool sameHead = false; bool modifersMatch = false; bool titleMatch = false; bool noTheModifiersMatch = false; List<string> features = new List<string>(); Mention.IParse[] mentionTokens = mention.TokenParses; OpenNLP.Tools.Util.Set<string> entityContextModifierSet = ConstructModifierSet(mentionTokens, mention.HeadTokenIndex); string mentionHeadString = mention.HeadTokenText.ToLower(); Util.Set<string> featureSet = new Util.HashSet<string>(); foreach (Mention.MentionContext entityMention in entity.Mentions) { string exactMatchFeature = GetExactMatchFeature(entityMention, mention); if (exactMatchFeature != null) { featureSet.Add(exactMatchFeature); } else if (entityMention.Parse.IsCoordinatedNounPhrase && !mention.Parse.IsCoordinatedNounPhrase) { featureSet.Add("cmix"); } else { string mentionStrip = StripNounPhrase(mention); string entityMentionStrip = StripNounPhrase(entityMention); if (mentionStrip != null && entityMentionStrip != null) { if (IsSubstring(mentionStrip, entityMentionStrip)) { featureSet.Add("substring"); } } } Mention.IParse[] entityMentionTokens = entityMention.TokenParses; int headIndex = entityMention.HeadTokenIndex; //if (!mention.getHeadTokenTag().equals(entityMention.getHeadTokenTag())) { // //System.err.println("skipping "+mention.headTokenText+" with "+xec.headTokenText+" because "+mention.headTokenTag+" != "+xec.headTokenTag); // continue; //} want to match NN NNP string entityMentionHeadString = entityMention.HeadTokenText.ToLower(); // model lexical similarity if (mentionHeadString == entityMentionHeadString) { sameHead = true; featureSet.Add("hds=" + mentionHeadString); if (!modifersMatch || !noTheModifiersMatch) { //only check if we haven't already found one which is the same modifersMatch = true; noTheModifiersMatch = true; Util.Set<string> entityMentionModifierSet = ConstructModifierSet(entityMentionTokens, headIndex); foreach (string modifierWord in entityContextModifierSet) { if (!entityMentionModifierSet.Contains(modifierWord)) { modifersMatch = false; if (modifierWord != "the") { noTheModifiersMatch = false; featureSet.Add("mmw=" + modifierWord); } } } } } Util.Set<string> descriptorModifierSet = ConstructModifierSet(entityMentionTokens, entityMention.NonDescriptorStart); if (descriptorModifierSet.Contains(mentionHeadString)) { titleMatch = true; } } if (!(featureSet.Count == 0)) { features.AddRange(featureSet); } if (sameHead) { features.Add("sameHead"); if (modifersMatch) { features.Add("modsMatch"); } else if (noTheModifiersMatch) { features.Add("nonTheModsMatch"); } else { features.Add("modsMisMatch"); } } if (titleMatch) { features.Add("titleMatch"); } return features; }
private static Util.Set<string> GetSynsetSet(Context context) { Util.Set<string> synsetSet = new Util.HashSet<string>(); string[] lemmas = GetLemmas(context); Mention.IDictionary dictionary = Mention.DictionaryFactory.GetDictionary(); //System.err.println(lemmas.length+" lemmas for "+c.headToken); foreach (string lemma in lemmas) { synsetSet.Add(dictionary.GetSenseKey(lemma, "NN", 0)); string[] synsets = dictionary.GetParentSenseKeys(lemma, "NN", 0); for (int currentSynset = 0, sn = synsets.Length; currentSynset < sn; currentSynset++) { synsetSet.Add(synsets[currentSynset]); } } return synsetSet; }
private static Util.Set<string> GetSynsetSet(Context context) { Util.Set<string> synsetSet = new Util.HashSet<string>(); string[] lemmas = GetLemmas(context); Mention.IDictionary dictionary = Mention.DictionaryFactory.GetDictionary(); foreach (string lemma in lemmas) { synsetSet.Add(dictionary.GetSenseKey(lemma, PartsOfSpeech.NounSingularOrMass, 0)); string[] synsets = dictionary.GetParentSenseKeys(lemma, PartsOfSpeech.NounSingularOrMass, 0); for (int currentSynset = 0, sn = synsets.Length; currentSynset < sn; currentSynset++) { synsetSet.Add(synsets[currentSynset]); } } return synsetSet; }
/// <summary> /// Returns string-match features for the the specified mention and entity.</summary> /// <param name="mention"> /// The mention. /// </param> /// <param name="entity"> /// The entity. /// </param> /// <returns> /// list of string-match features for the the specified mention and entity. /// </returns> protected internal virtual List <string> GetStringMatchFeatures(Mention.MentionContext mention, DiscourseEntity entity) { var sameHead = false; var modifersMatch = false; var titleMatch = false; var noTheModifiersMatch = false; var features = new List <string>(); var mentionTokens = mention.TokenParses; var entityContextModifierSet = ConstructModifierSet(mentionTokens, mention.HeadTokenIndex); var mentionHeadString = mention.HeadTokenText.ToLower(); Util.Set <string> featureSet = new Util.HashSet <string>(); foreach (var entityMention in entity.Mentions) { var exactMatchFeature = GetExactMatchFeature(entityMention, mention); if (exactMatchFeature != null) { featureSet.Add(exactMatchFeature); } else if (entityMention.Parse.IsCoordinatedNounPhrase && !mention.Parse.IsCoordinatedNounPhrase) { featureSet.Add("cmix"); } else { var mentionStrip = StripNounPhrase(mention); var entityMentionStrip = StripNounPhrase(entityMention); if (mentionStrip != null && entityMentionStrip != null) { if (IsSubstring(mentionStrip, entityMentionStrip)) { featureSet.Add("substring"); } } } var entityMentionTokens = entityMention.TokenParses; var headIndex = entityMention.HeadTokenIndex; //if (!mention.getHeadTokenTag().equals(entityMention.getHeadTokenTag())) { // continue; //} want to match NN NNP var entityMentionHeadString = entityMention.HeadTokenText.ToLower(); // model lexical similarity if (mentionHeadString == entityMentionHeadString) { sameHead = true; featureSet.Add("hds=" + mentionHeadString); if (!modifersMatch || !noTheModifiersMatch) { //only check if we haven't already found one which is the same modifersMatch = true; noTheModifiersMatch = true; var entityMentionModifierSet = ConstructModifierSet(entityMentionTokens, headIndex); foreach (var modifierWord in entityContextModifierSet) { if (!entityMentionModifierSet.Contains(modifierWord)) { modifersMatch = false; if (modifierWord != "the") { noTheModifiersMatch = false; featureSet.Add("mmw=" + modifierWord); } } } } } var descriptorModifierSet = ConstructModifierSet(entityMentionTokens, entityMention.NonDescriptorStart); if (descriptorModifierSet.Contains(mentionHeadString)) { titleMatch = true; } } if (featureSet.Count != 0) { features.AddRange(featureSet); } if (sameHead) { features.Add("sameHead"); if (modifersMatch) { features.Add("modsMatch"); } else if (noTheModifiersMatch) { features.Add("nonTheModsMatch"); } else { features.Add("modsMisMatch"); } } if (titleMatch) { features.Add("titleMatch"); } return(features); }
private Util.Set<string> ReadNames(string nameFile) { Util.Set<string> names = new Util.HashSet<string>(); var nameReader = new StreamReader(nameFile, System.Text.Encoding.Default); for (string line = nameReader.ReadLine(); line != null; line = nameReader.ReadLine()) { names.Add(line); } return names; }
/// <summary> /// Returns the deepest shared parent of this node and the specified node. /// If the nodes are identical then their parent is returned. /// If one node is the parent of the other then the parent node is returned. /// </summary> /// <param name="node"> /// The node from which parents are compared to this node's parents. /// </param> /// <returns> /// the deepest shared parent of this node and the specified node. /// </returns> public virtual Parse GetCommonParent(Parse node) { if (this == node) { return this.Parent; } Util.HashSet<Parse> parents = new Util.HashSet<Parse>(); Parse parentParse = this; while (parentParse != null) { parents.Add(parentParse); parentParse = parentParse.Parent; } while (node != null) { if (parents.Contains(node)) { return node; } node = node.Parent; } return null; }
private Mention[] CollectMentions(List<IParse> nounPhrases, Dictionary<IParse, IParse> headMap) { var mentions = new List<Mention>(nounPhrases.Count); Util.Set<IParse> recentMentions = new Util.HashSet<IParse>(); for (int nounPhraseIndex = 0; nounPhraseIndex < nounPhrases.Count; nounPhraseIndex++) { IParse nounPhrase = nounPhrases[nounPhraseIndex]; if (!IsHeadOfExistingMention(nounPhrase, headMap, recentMentions)) { ClearMentions(recentMentions, nounPhrase); if (!IsPartOfName(nounPhrase)) { IParse head = mHeadFinder.GetLastHead(nounPhrase); var extent = new Mention(nounPhrase.Span, head.Span, head.EntityId, nounPhrase, null); mentions.Add(extent); recentMentions.Add(nounPhrase); // determine name-entity type string entityType = GetEntityType(mHeadFinder.GetHeadToken(head)); if (entityType != null) { extent.NameType = entityType; } } } if (IsBasalNounPhrase(nounPhrase)) { if (mPrenominalNamedEntitiesCollection) { CollectPrenominalNamedEntities(nounPhrase, mentions); } if (mCoordinatedNounPhrasesCollection) { CollectCoordinatedNounPhraseMentions(nounPhrase, mentions); } CollectPossessivePronouns(nounPhrase, mentions); } else { // Could use to get NP -> tokens CON structures for basal nps including NP -> NAC tokens //collectComplexNounPhrases(np,mentions); } } mentions.Sort(); RemoveDuplicates(mentions); return mentions.ToArray(); }
/// <summary> /// Returns string-match features for the the specified mention and entity.</summary> /// <param name="mention"> /// The mention. /// </param> /// <param name="entity"> /// The entity. /// </param> /// <returns> /// list of string-match features for the the specified mention and entity. /// </returns> protected internal virtual List <string> GetStringMatchFeatures(Mention.MentionContext mention, DiscourseEntity entity) { bool sameHead = false; bool modifersMatch = false; bool titleMatch = false; bool noTheModifiersMatch = false; List <string> features = new List <string>(); Mention.IParse[] mentionTokens = mention.TokenParses; OpenNLP.Tools.Util.Set <string> entityContextModifierSet = ConstructModifierSet(mentionTokens, mention.HeadTokenIndex); string mentionHeadString = mention.HeadTokenText.ToLower(); Util.Set <string> featureSet = new Util.HashSet <string>(); foreach (Mention.MentionContext entityMention in entity.Mentions) { string exactMatchFeature = GetExactMatchFeature(entityMention, mention); if (exactMatchFeature != null) { featureSet.Add(exactMatchFeature); } else if (entityMention.Parse.IsCoordinatedNounPhrase && !mention.Parse.IsCoordinatedNounPhrase) { featureSet.Add("cmix"); } else { string mentionStrip = StripNounPhrase(mention); string entityMentionStrip = StripNounPhrase(entityMention); if (mentionStrip != null && entityMentionStrip != null) { if (IsSubstring(mentionStrip, entityMentionStrip)) { featureSet.Add("substring"); } } } Mention.IParse[] entityMentionTokens = entityMention.TokenParses; int headIndex = entityMention.HeadTokenIndex; //if (!mention.getHeadTokenTag().equals(entityMention.getHeadTokenTag())) { // //System.err.println("skipping "+mention.headTokenText+" with "+xec.headTokenText+" because "+mention.headTokenTag+" != "+xec.headTokenTag); // continue; //} want to match NN NNP string entityMentionHeadString = entityMention.HeadTokenText.ToLower(); // model lexical similarity if (mentionHeadString == entityMentionHeadString) { sameHead = true; featureSet.Add("hds=" + mentionHeadString); if (!modifersMatch || !noTheModifiersMatch) { //only check if we haven't already found one which is the same modifersMatch = true; noTheModifiersMatch = true; Util.Set <string> entityMentionModifierSet = ConstructModifierSet(entityMentionTokens, headIndex); foreach (string modifierWord in entityContextModifierSet) { if (!entityMentionModifierSet.Contains(modifierWord)) { modifersMatch = false; if (modifierWord != "the") { noTheModifiersMatch = false; featureSet.Add("mmw=" + modifierWord); } } } } } Util.Set <string> descriptorModifierSet = ConstructModifierSet(entityMentionTokens, entityMention.NonDescriptorStart); if (descriptorModifierSet.Contains(mentionHeadString)) { titleMatch = true; } } if (!(featureSet.Count == 0)) { features.AddRange(featureSet); } if (sameHead) { features.Add("sameHead"); if (modifersMatch) { features.Add("modsMatch"); } else if (noTheModifiersMatch) { features.Add("nonTheModsMatch"); } else { features.Add("modsMisMatch"); } } if (titleMatch) { features.Add("titleMatch"); } return(features); }
/// <summary> /// Remove duplicate relations: it can happen when collapsing stranded /// prepositions. E.g., "What does CPR stand for?" we get dep(stand, what), and /// after collapsing we also get prep_for(stand, what). /// </summary> /// <param name="list">A list of typed dependencies to check through</param> private static void RemoveDep(List<TypedDependency> list) { Set<GrammaticalRelation> prepRels = new Util.HashSet<GrammaticalRelation>(EnglishGrammaticalRelations.GetPreps()); prepRels.AddAll(EnglishGrammaticalRelations.GetPrepsC()); foreach (TypedDependency td1 in list) { if (prepRels.Contains(td1.Reln)) { // if we have a prep_ relation IndexedWord gov = td1.Gov; IndexedWord dep = td1.Dep; foreach (TypedDependency td2 in list) { if (td2.Reln == GrammaticalRelation.Dependent && td2.Gov.Equals(gov) && td2.Dep.Equals(dep)) { td2.Reln = GrammaticalRelation.Kill; } } } } // now remove typed dependencies with reln "kill" /*for (Iterator<TypedDependency> iter = list.iterator(); iter.hasNext();) { TypedDependency td = iter.next(); if (td.reln() == KILL) { iter.remove(); } }*/ list.RemoveAll(td => td.Reln == GrammaticalRelation.Kill); }
public static /*<V, E>*/ List <V> GetShortestPath <V, E>(IGraph <V, E> graph, V node1, V node2, bool directionSensitive) { if (node1.Equals(node2)) { //return Collections.singletonList(node2); return(new List <V>() { node2 }); } Set <V> visited = new Util.HashSet <V>(); var previous = new Dictionary <V, V>(); var unsettledNodes = new BinaryHeapPriorityQueue <V>(); unsettledNodes.Add(node1, 0); while (unsettledNodes.Size() > 0) { var distance = unsettledNodes.GetPriority(); var u = unsettledNodes.RemoveFirst(); visited.Add(u); if (u.Equals(node2)) { break; } unsettledNodes.Remove(u); var candidates = ((directionSensitive) ? graph.GetChildren(u) : new ReadOnlyCollection <V>(graph.GetNeighbors(u))); foreach (var candidate in candidates) { var alt = distance - 1; // nodes not already present will have a priority of -inf if (alt > unsettledNodes.GetPriority(candidate) && !visited.Contains(candidate)) { unsettledNodes.RelaxPriority(candidate, alt); previous[candidate] = u; } } } if (!previous.ContainsKey(node2)) { return(null); } var path = new List <V> { node2 }; var n = node2; while (previous.ContainsKey(n)) { path.Add(previous[n]); n = previous[n]; } path.Reverse(); return(path); }
/// <summary> /// Produces the set of name types associated with each of the specified mentions. /// </summary> /// <param name="mentions"> /// A list of mentions. /// </param> /// <returns> /// A set of name types assigned to the specified mentions. /// </returns> private Util.Set<string> ConstructNameSet(List<Context> mentions) { Util.Set<string> nameSet = new Util.HashSet<string>(); foreach (Context currentContext in mentions) { if (currentContext.NameType != null) { nameSet.Add(currentContext.NameType); } } return nameSet; }
/// <summary> /// Constructs a set of entities which may be semantically compatible with the entity indicated by /// the specified entityKey. /// </summary> /// <param name="entityKey"> /// The key of the entity for which the set is being constructed. /// </param> /// <param name="entities"> /// A mapping between entity keys and their mentions. /// </param> /// <param name="headSets"> /// A mapping between entity keys and their head sets. /// </param> /// <param name="nameSets"> /// A mapping between entity keys and their name sets. /// </param> /// <param name="singletons"> /// A list of all entities which consists of a single mention. /// </param> /// <returns> /// A set of mentions for all the entities which might be semantically compatible /// with entity indicated by the specified key. /// </returns> private Util.Set<Context> ConstructExclusionSet(int entityKey, Util.HashList<int, Context> entities, Dictionary<int, Util.Set<string>> headSets, Dictionary<int, Util.Set<string>> nameSets, List<Context> singletons) { Util.Set<Context> exclusionSet = new Util.HashSet<Context>(); Util.Set<string> entityHeadSet = headSets[entityKey]; Util.Set<string> entityNameSet = nameSets[entityKey]; List<Context> entityContexts = entities[entityKey]; //entities foreach (int key in entities.Keys) { List<Context> candidateContexts = entities[key]; if (key == entityKey) { exclusionSet.AddAll(candidateContexts); } else if (nameSets[key].Count == 0) { exclusionSet.AddAll(candidateContexts); } else if (HasSameHead(entityHeadSet, headSets[key])) { exclusionSet.AddAll(candidateContexts); } else if (HasSameNameType(entityNameSet, nameSets[key])) { exclusionSet.AddAll(candidateContexts); } else if (HasSuperClass(entityContexts, candidateContexts)) { exclusionSet.AddAll(candidateContexts); } } //singles List<Context> singles = new List<Context>(1); foreach (Context currentSingleton in singletons) { singles.Clear(); singles.Add(currentSingleton); if (entityHeadSet.Contains(currentSingleton.HeadTokenText.ToLower())) { exclusionSet.Add(currentSingleton); } else if (currentSingleton.NameType == null) { exclusionSet.Add(currentSingleton); } else if (entityNameSet.Contains(currentSingleton.NameType)) { exclusionSet.Add(currentSingleton); } else if (HasSuperClass(entityContexts, singles)) { exclusionSet.Add(currentSingleton); } } return exclusionSet; }
private Mention[] CollectMentions(List<IParse> nounPhrases, Dictionary<IParse, IParse> headMap) { List<Mention> mentions = new List<Mention>(nounPhrases.Count); Util.Set<IParse> recentMentions = new Util.HashSet<IParse>(); //System.err.println("AbtractMentionFinder.collectMentions: "+headMap); for (int nounPhraseIndex = 0; nounPhraseIndex < nounPhrases.Count; nounPhraseIndex++) { IParse nounPhrase = nounPhrases[nounPhraseIndex]; //System.err.println("AbstractMentionFinder: collectMentions: np[" + npi + "]=" + np + " head=" + headMap.get(np)); if (!IsHeadOfExistingMention(nounPhrase, headMap, recentMentions)) { ClearMentions(recentMentions, nounPhrase); if (!IsPartOfName(nounPhrase)) { IParse head = mHeadFinder.GetLastHead(nounPhrase); Mention extent = new Mention(nounPhrase.Span, head.Span, head.EntityId, nounPhrase, null); //System.err.println("adding "+np+" with head "+head); mentions.Add(extent); recentMentions.Add(nounPhrase); // determine name-entity type string entityType = GetEntityType(mHeadFinder.GetHeadToken(head)); if (entityType != null) { extent.NameType = entityType; } } else { //System.err.println("AbstractMentionFinder.collectMentions excluding np as part of name. np=" + np); } } else { //System.err.println("AbstractMentionFinder.collectMentions excluding np as head of previous mention. np=" + np); } if (IsBasalNounPhrase(nounPhrase)) { if (mPrenominalNamedEntitiesCollection) { CollectPrenominalNamedEntities(nounPhrase, mentions); } if (mCoordinatedNounPhrasesCollection) { CollectCoordinatedNounPhraseMentions(nounPhrase, mentions); } CollectPossessivePronouns(nounPhrase, mentions); } else { // Could use to get NP -> tokens CON structures for basal nps including NP -> NAC tokens //collectComplexNounPhrases(np,mentions); } } mentions.Sort(); RemoveDuplicates(mentions); return mentions.ToArray(); }
/// <summary> /// Produces a set of head words for the specified list of mentions. /// </summary> /// <param name="mentions"> /// The mentions to use to construct the /// </param> /// <returns> /// A set containing the head words of the sepecified mentions. /// </returns> private Util.Set<string> ConstructHeadSet(List<Context> mentions) { Util.Set<string> headSet = new Util.HashSet<string>(); foreach (Context currentContext in mentions) { headSet.Add(currentContext.HeadTokenText.ToLower()); } return headSet; }
private static void TreatCc(List<TypedDependency> list) { // Construct a map from tree nodes to the set of typed // dependencies in which the node appears as dependent. var map = new Dictionary<IndexedWord, Set<TypedDependency>>(); // Construct a map of tree nodes being governor of a subject grammatical // relation to that relation var subjectMap = new Dictionary<IndexedWord, TypedDependency>(); // Construct a set of TreeGraphNodes with a passive auxiliary on them Set<IndexedWord> withPassiveAuxiliary = new Util.HashSet<IndexedWord>(); // Construct a map of tree nodes being governor of an object grammatical // relation to that relation // Map<TreeGraphNode, TypedDependency> objectMap = new // HashMap<TreeGraphNode, TypedDependency>(); var rcmodHeads = new List<IndexedWord>(); var prepcDep = new List<IndexedWord>(); foreach (TypedDependency typedDep in list) { if (!map.ContainsKey(typedDep.Dep)) { // NB: Here and in other places below, we use a TreeSet (which extends // SortedSet) to guarantee that results are deterministic) map.Add(typedDep.Dep, new TreeSet<TypedDependency>()); } map[typedDep.Dep].Add(typedDep); if (typedDep.Reln.Equals(EnglishGrammaticalRelations.AuxPassiveModifier)) { withPassiveAuxiliary.Add(typedDep.Gov); } // look for subjects if (typedDep.Reln.GetParent() == EnglishGrammaticalRelations.NominalSubject || typedDep.Reln.GetParent() == EnglishGrammaticalRelations.Subject || typedDep.Reln.GetParent() == EnglishGrammaticalRelations.ClausalSubject) { if (!subjectMap.ContainsKey(typedDep.Gov)) { subjectMap.Add(typedDep.Gov, typedDep); } } // look for objects // this map was only required by the code commented out below, so comment // it out too // if (typedDep.reln() == DIRECT_OBJECT) { // if (!objectMap.containsKey(typedDep.gov())) { // objectMap.put(typedDep.gov(), typedDep); // } // } // look for rcmod relations if (typedDep.Reln == EnglishGrammaticalRelations.RelativeClauseModifier) { rcmodHeads.Add(typedDep.Gov); } // look for prepc relations: put the dependent of such a relation in the // list // to avoid wrong propagation of dobj if (typedDep.Reln.ToString().StartsWith("prepc")) { prepcDep.Add(typedDep.Dep); } } // create a new list of typed dependencies var newTypedDeps = new List<TypedDependency>(list); // find typed deps of form conj(gov,dep) foreach (TypedDependency td in list) { if (EnglishGrammaticalRelations.GetConjs().Contains(td.Reln)) { IndexedWord gov = td.Gov; IndexedWord dep = td.Dep; // look at the dep in the conjunct Set<TypedDependency> govRelations = map[gov]; if (govRelations != null) { foreach (TypedDependency td1 in govRelations) { IndexedWord newGov = td1.Gov; // in the case of errors in the basic dependencies, it // is possible to have overlapping newGov & dep if (newGov.Equals(dep)) { continue; } GrammaticalRelation newRel = td1.Reln; if (newRel != GrammaticalRelation.Root) { if (rcmodHeads.Contains(gov) && rcmodHeads.Contains(dep)) { // to prevent wrong propagation in the case of long dependencies in relative clauses if (newRel != EnglishGrammaticalRelations.DirectObject && newRel != EnglishGrammaticalRelations.NominalSubject) { newTypedDeps.Add(new TypedDependency(newRel, newGov, dep)); } } else { newTypedDeps.Add(new TypedDependency(newRel, newGov, dep)); } } } } // propagate subjects // look at the gov in the conjunct: if it is has a subject relation, // the dep is a verb and the dep doesn't have a subject relation // then we want to add a subject relation for the dep. // (By testing for the dep to be a verb, we are going to miss subject of // copular verbs! but // is it safe to relax this assumption?? i.e., just test for the subject // part) // CDM 2008: I also added in JJ, since participial verbs are often // tagged JJ string tag = dep.Tag(); if (subjectMap.ContainsKey(gov) && (PartsOfSpeech.IsVerb(tag) || PartsOfSpeech.IsAdjective(tag)) && ! subjectMap.ContainsKey(dep)) { TypedDependency tdsubj = subjectMap[gov]; // check for wrong nsubjpass: if the new verb is VB or VBZ or VBP or JJ, then // add nsubj (if it is tagged correctly, should do this for VBD too, but we don't) GrammaticalRelation relation = tdsubj.Reln; if (relation == EnglishGrammaticalRelations.NominalPassiveSubject) { if (IsDefinitelyActive(tag)) { relation = EnglishGrammaticalRelations.NominalSubject; } } else if (relation == EnglishGrammaticalRelations.ClausalPassiveSubject) { if (IsDefinitelyActive(tag)) { relation = EnglishGrammaticalRelations.ClausalSubject; } } else if (relation == EnglishGrammaticalRelations.NominalSubject) { if (withPassiveAuxiliary.Contains(dep)) { relation = EnglishGrammaticalRelations.NominalPassiveSubject; } } else if (relation == EnglishGrammaticalRelations.ClausalSubject) { if (withPassiveAuxiliary.Contains(dep)) { relation = EnglishGrammaticalRelations.ClausalPassiveSubject; } } newTypedDeps.Add(new TypedDependency(relation, dep, tdsubj.Dep)); } // propagate objects // cdm july 2010: This bit of code would copy a dobj from the first // clause to a later conjoined clause if it didn't // contain its own dobj or prepc. But this is too aggressive and wrong // if the later clause is intransitive // (including passivized cases) and so I think we have to not have this // done always, and see no good "sometimes" heuristic. // IF WE WERE TO REINSTATE, SHOULD ALSO NOT ADD OBJ IF THERE IS A ccomp // (SBAR). // if (objectMap.containsKey(gov) && // dep.tag().startsWith("VB") && ! objectMap.containsKey(dep) // && ! prepcDep.contains(gov)) { // TypedDependency tdobj = objectMap.get(gov); // newTypedDeps.add(new TypedDependency(tdobj.reln(), dep, // tdobj.dep())); // } } } list.Clear(); list.AddRange(newTypedDeps); }