/// <summary> /// Returns distance features for the specified mention and entity. /// </summary> /// <param name="mention"> /// The mention. /// </param> /// <param name="entity"> /// The entity. /// </param> /// <returns> /// list of distance features for the specified mention and entity. /// </returns> protected internal virtual List <string> GetDistanceFeatures(Mention.MentionContext mention, DiscourseEntity entity) { List <string> features = new List <string>(); Mention.MentionContext currentEntityContext = entity.LastExtent; int entityDistance = mention.NounPhraseDocumentIndex - currentEntityContext.NounPhraseDocumentIndex; int sentenceDistance = mention.SentenceNumber - currentEntityContext.SentenceNumber; int hobbsEntityDistance; if (sentenceDistance == 0) { hobbsEntityDistance = currentEntityContext.NounPhraseSentenceIndex; } else { //hobbsEntityDistance = entityDistance - (entities within sentence from mention to end) + (entities within sentence form start to mention) //hobbsEntityDistance = entityDistance - (cec.maxNounLocation - cec.getNounPhraseSentenceIndex) + cec.getNounPhraseSentenceIndex; hobbsEntityDistance = entityDistance + (2 * currentEntityContext.NounPhraseSentenceIndex) - currentEntityContext.MaxNounPhraseSentenceIndex; } features.Add("hd=" + hobbsEntityDistance); features.Add("de=" + entityDistance); features.Add("ds=" + sentenceDistance); //features.add("ds=" + sdist + pronoun); //features.add("dn=" + cec.sentenceNumber); //features.add("ep=" + cec.nounLocation); return(features); }
public override bool CanResolve(Mention.MentionContext mention) { string firstToken = mention.FirstTokenText.ToLower(); string firstTokenTag = mention.FirstToken.SyntacticType; bool canResolve = mention.HeadTokenTag == "NN" && !IsDefiniteArticle(firstToken, firstTokenTag); return(canResolve); }
/// <summary> /// Returns a list of features for deciding whether the specified mention refers to the specified discourse entity. /// </summary> /// <param name="mention"> /// the mention being considers as possibly referential. /// </param> /// <param name="entity"> /// The discourse entity with which the mention is being considered referential. /// </param> /// <returns> /// a list of features used to predict reference between the specified mention and entity. /// </returns> protected internal virtual List <string> GetFeatures(Mention.MentionContext mention, DiscourseEntity entity) { List <string> features = new List <string>(); features.Add(Default); features.AddRange(GetCompatibilityFeatures(mention, entity)); return(features); }
protected internal virtual List <string> GetFeatures(Mention.MentionContext mention) { List <string> features = new List <string>(); features.Add(MaximumEntropyResolver.Default); features.AddRange(GetNonReferentialFeatures(mention)); return(features); }
/// <summary> /// Creates a new entity based on the specified mention and its specified gender and number properties. /// </summary> /// <param name="mention"> /// The first mention of this entity. /// </param> /// <param name="gender"> /// The gender of this entity. /// </param> /// <param name="genderProbability"> /// The probability that the specified gender is correct. /// </param> /// <param name="number"> /// The number for this entity. /// </param> /// <param name="numberProbability"> /// The probability that the specified number is correct. /// </param> public DiscourseEntity(Mention.MentionContext mention, Similarity.GenderEnum gender, double genderProbability, Similarity.NumberEnum number, double numberProbability) : base(mention) { Gender = gender; GenderProbability = genderProbability; Number = number; NumberProbability = numberProbability; }
/// <summary> /// Returns features indicating whether the specified mention is compatible with the pronouns /// of the specified entity. /// </summary> /// <param name="mention"> /// The mention. /// </param> /// <param name="entity"> /// The entity. /// </param> /// <returns> /// list of features indicating whether the specified mention is compatible with the pronouns /// of the specified entity. /// </returns> protected internal virtual List <string> GetPronounMatchFeatures(Mention.MentionContext mention, DiscourseEntity entity) { bool foundCompatiblePronoun = false; bool foundIncompatiblePronoun = false; if (mention.HeadTokenTag.StartsWith("PRP")) { Dictionary <string, string> pronounMap = GetPronounFeatureMap(mention.HeadTokenText); //System.err.println("getPronounMatchFeatures.pronounMap:"+pronounMap); foreach (Mention.MentionContext candidateMention in entity.Mentions) { if (candidateMention.HeadTokenTag.StartsWith("PRP")) { if (mention.HeadTokenText.ToUpper() == candidateMention.HeadTokenText.ToUpper()) { foundCompatiblePronoun = true; break; } else { Dictionary <string, string> candidatePronounMap = GetPronounFeatureMap(candidateMention.HeadTokenText); //System.err.println("getPronounMatchFeatures.candidatePronounMap:"+candidatePronounMap); bool allKeysMatch = true; foreach (string key in pronounMap.Keys) { if (candidatePronounMap.ContainsKey(key)) { if (pronounMap[key] != candidatePronounMap[key]) { foundIncompatiblePronoun = true; allKeysMatch = false; } } else { allKeysMatch = false; } } if (allKeysMatch) { foundCompatiblePronoun = true; } } } } } List <string> pronounFeatures = new List <string>(); if (foundCompatiblePronoun) { pronounFeatures.Add("compatiblePronoun"); } if (foundIncompatiblePronoun) { pronounFeatures.Add("incompatiblePronoun"); } return(pronounFeatures); }
/* * protected double getNonReferentialProbability(MentionContext ec) { * if (useFixedNonReferentialProbability) { * if (debugOn) { * System.err.println(this +".resolve: " + ec.toText() + " -> " + null +" " + fixedNonReferentialProbability); * System.err.println(); * } * return fixedNonReferentialProbability; * } * List lfeatures = getFeatures(ec, null); * String[] features = (String[]) lfeatures.toArray(new String[lfeatures.size()]); * * if (features == null) { * System.err.println("features=null in " + this); * } * if (model == null) { * System.err.println("model=null in " + this); * } * double[] dist = nrModel.eval(features); * * if (dist == null) { * System.err.println("dist=null in " + this); * } * if (debugOn) { * System.err.println(this +".resolve: " + ec.toText() + " -> " + null +" " + dist[nrSameIndex] + " " + lfeatures); * System.err.println(); * } * return (dist[nrSameIndex]); * } */ /// <summary> /// Returns whether the specified entity satisfies the criteria for being a default referent. /// This criteria is used to perform sample selection on the training data and to select a single /// non-referent entity. Typically the criteria is a hueristic for a likely referent. /// </summary> /// <param name="discourseEntity"> /// The discourse entity being considered for non-reference. /// </param> /// <returns> /// True if the entity should be used as a default referent, false otherwise. /// </returns> protected internal virtual bool defaultReferent(DiscourseEntity discourseEntity) { Mention.MentionContext entityContext = discourseEntity.LastExtent; if (entityContext.NounPhraseSentenceIndex == 0) { return(true); } return(false); }
public override bool CanResolve(Mention.MentionContext mention) { var firstToken = mention.FirstTokenText.ToLower(); var firstTokenTag = mention.FirstToken.SyntacticType; var canResolve = mention.HeadTokenTag == PartsOfSpeechStrings.NounSingularOrMass && !IsDefiniteArticle(firstToken, firstTokenTag); return(canResolve); }
/// <summary> /// Returns features indicating whether the specified mention is compatible with the pronouns /// of the specified entity. /// </summary> /// <param name="mention"> /// The mention. /// </param> /// <param name="entity"> /// The entity. /// </param> /// <returns> /// list of features indicating whether the specified mention is compatible with the pronouns /// of the specified entity. /// </returns> protected internal virtual List <string> GetPronounMatchFeatures(Mention.MentionContext mention, DiscourseEntity entity) { bool foundCompatiblePronoun = false; bool foundIncompatiblePronoun = false; if (PartsOfSpeech.IsPersOrPossPronoun(mention.HeadTokenTag)) { Dictionary <string, string> pronounMap = GetPronounFeatureMap(mention.HeadTokenText); foreach (Mention.MentionContext candidateMention in entity.Mentions) { if (PartsOfSpeech.IsPersOrPossPronoun(candidateMention.HeadTokenTag)) { if (mention.HeadTokenText.ToUpper() == candidateMention.HeadTokenText.ToUpper()) { foundCompatiblePronoun = true; break; } else { Dictionary <string, string> candidatePronounMap = GetPronounFeatureMap(candidateMention.HeadTokenText); bool allKeysMatch = true; foreach (string key in pronounMap.Keys) { if (candidatePronounMap.ContainsKey(key)) { if (pronounMap[key] != candidatePronounMap[key]) { foundIncompatiblePronoun = true; allKeysMatch = false; } } else { allKeysMatch = false; } } if (allKeysMatch) { foundCompatiblePronoun = true; } } } } } var pronounFeatures = new List <string>(); if (foundCompatiblePronoun) { pronounFeatures.Add("compatiblePronoun"); } if (foundIncompatiblePronoun) { pronounFeatures.Add("incompatiblePronoun"); } return(pronounFeatures); }
protected internal override List <string> GetFeatures(Mention.MentionContext mention, DiscourseEntity entity) { List <string> features = base.GetFeatures(mention, entity); if (entity != null) { features.AddRange(GetContextFeatures(mention)); features.AddRange(GetStringMatchFeatures(mention, entity)); } return(features); }
public virtual double GetNonReferentialProbability(Mention.MentionContext mention) { var features = GetFeatures(mention); var probability = mModel.Evaluate(features.ToArray())[mNonReferentialIndex]; if (mDebugOn) { Console.Error.WriteLine(this + " " + mention.ToText() + " -> null " + probability + " " + string.Join(",", features.ToArray())); } return(probability); }
/// <summary> /// Returns the string of "_" delimited tokens for the specified mention. /// </summary> /// <param name="mention"> /// The mention. /// </param> /// <returns> /// the string of "_" delimited tokens for the specified mention. /// </returns> protected internal virtual string GetFeatureString(Mention.MentionContext mention) { System.Text.StringBuilder output = new System.Text.StringBuilder(); object[] mentionTokens = mention.Tokens; output.Append(mentionTokens[0].ToString()); for (int currentToken = 1; currentToken < mentionTokens.Length; currentToken++) { output.Append("_").Append(mentionTokens[currentToken].ToString()); } return(output.ToString()); }
protected internal override bool IsExcluded(Mention.MentionContext entityContext, DiscourseEntity discourseEntity) { if (base.IsExcluded(entityContext, discourseEntity)) { return(true); } else { Mention.MentionContext currentEntityContext = discourseEntity.LastExtent; return(!CanResolve(currentEntityContext) || base.IsExcluded(entityContext, discourseEntity)); } }
public virtual void AddEvent(Mention.MentionContext context) { var features = GetFeatures(context); if (context.Id == -1) { mEvents.Add(new SharpEntropy.TrainingEvent(MaximumEntropyResolver.Same, features.ToArray())); } else { mEvents.Add(new SharpEntropy.TrainingEvent(MaximumEntropyResolver.Diff, features.ToArray())); } }
/// <summary> /// Returns the index for the head word for the specified mention. /// </summary> /// <param name="mention"> /// The mention. /// </param> /// <returns> /// the index for the head word for the specified mention. /// </returns> protected internal virtual int GetHeadIndex(Mention.MentionContext mention) { Mention.IParse[] mentionTokens = mention.TokenParses; for (int currentToken = mentionTokens.Length - 1; currentToken >= 0; currentToken--) { Mention.IParse token = mentionTokens[currentToken]; if (token.SyntacticType != "POS" && token.SyntacticType != "," && token.SyntacticType != ".") { return(currentToken); } } return(mentionTokens.Length - 1); }
private string MentionString(Mention.MentionContext entityContext) { System.Text.StringBuilder output = new System.Text.StringBuilder(); object[] mentionTokens = entityContext.Tokens; output.Append(mentionTokens[0].ToString()); for (int tokenIndex = 1; tokenIndex < mentionTokens.Length; tokenIndex++) { string token = mentionTokens[tokenIndex].ToString(); output.Append(" ").Append(token); } //System.err.println("mentionString "+ec+" == "+sb.toString()+" mtokens.length="+mtokens.length); return(output.ToString()); }
private string MentionString(Mention.MentionContext entityContext) { var output = new StringBuilder(); object[] mentionTokens = entityContext.Tokens; output.Append(mentionTokens[0].ToString()); for (int tokenIndex = 1; tokenIndex < mentionTokens.Length; tokenIndex++) { string token = mentionTokens[tokenIndex].ToString(); output.Append(" ").Append(token); } return(output.ToString()); }
/// <summary> /// Returns the index for the head word for the specified mention. /// </summary> /// <param name="mention"> /// The mention. /// </param> /// <returns> /// the index for the head word for the specified mention. /// </returns> protected internal virtual int GetHeadIndex(Mention.MentionContext mention) { Mention.IParse[] mentionTokens = mention.TokenParses; for (int currentToken = mentionTokens.Length - 1; currentToken >= 0; currentToken--) { Mention.IParse token = mentionTokens[currentToken]; if (token.SyntacticType != PartsOfSpeech.PossessiveEnding && token.SyntacticType != PartsOfSpeech.Comma && token.SyntacticType != PartsOfSpeech.SentenceFinalPunctuation) { return(currentToken); } } return(mentionTokens.Length - 1); }
private string GetNumberCompatibilityFeature(Mention.MentionContext entityContext, DiscourseEntity discourseEntity) { Similarity.NumberEnum entityNumber = discourseEntity.Number; if (entityNumber == Similarity.NumberEnum.Unknown || entityContext.GetNumber() == Similarity.NumberEnum.Unknown) { return(mNumberUnknown); } else if (entityContext.GetNumber() == entityNumber) { return(mNumberCompatible); } else { return(mNumberIncompatible); } }
private string GetGenderCompatibilityFeature(Mention.MentionContext entityContext, DiscourseEntity discourseEntity) { Similarity.GenderEnum entityGender = discourseEntity.Gender; //System.err.println("getGenderCompatibility: mention="+ec.getGender()+" entity="+eg); if (entityGender == Similarity.GenderEnum.Unknown || entityContext.GetGender() == Similarity.GenderEnum.Unknown) { return(mGenderUnknown); } else if (entityContext.GetGender() == entityGender) { return(mGenderCompatible); } else { return(mGenderIncompatible); } }
/// <summary> /// Returns a list of features used to predict whether the specified mention is non-referential. /// </summary> /// <param name="mention"> /// The mention under considereation. /// </param> /// <returns> /// a list of featues used to predict whether the specified mention is non-referential. /// </returns> protected internal virtual List <string> GetNonReferentialFeatures(Mention.MentionContext mention) { var features = new List <string>(); var mentionTokens = mention.TokenParses; for (var tokenIndex = 0; tokenIndex <= mention.HeadTokenIndex; tokenIndex++) { var token = mentionTokens[tokenIndex]; var wordFeatureList = MaximumEntropyResolver.GetWordFeatures(token); for (var wordFeatureIndex = 0; wordFeatureIndex < wordFeatureList.Count; wordFeatureIndex++) { features.Add("nr" + (wordFeatureList[wordFeatureIndex])); } } features.AddRange(MaximumEntropyResolver.GetContextFeatures(mention)); return(features); }
/// <summary> /// Returns a list of features used to predict whether the specified mention is non-referential. /// </summary> /// <param name="mention"> /// The mention under considereation. /// </param> /// <returns> /// a list of featues used to predict whether the specified mention is non-referential. /// </returns> protected internal virtual List <string> GetNonReferentialFeatures(Mention.MentionContext mention) { List <string> features = new List <string>(); Mention.IParse[] mentionTokens = mention.TokenParses; //System.err.println("getNonReferentialFeatures: mention has "+mtokens.length+" tokens"); for (int tokenIndex = 0; tokenIndex <= mention.HeadTokenIndex; tokenIndex++) { Mention.IParse token = mentionTokens[tokenIndex]; List <string> wordFeatureList = MaximumEntropyResolver.GetWordFeatures(token); for (int wordFeatureIndex = 0; wordFeatureIndex < wordFeatureList.Count; wordFeatureIndex++) { features.Add("nr" + (wordFeatureList[wordFeatureIndex])); } } features.AddRange(MaximumEntropyResolver.GetContextFeatures(mention)); return(features); }
public virtual DiscourseEntity Retain(Mention.MentionContext mention, DiscourseModel discourseModel) { int entityIndex = 0; if (mention.Id == -1) { return(null); } for (; entityIndex < discourseModel.EntityCount; entityIndex++) { DiscourseEntity currentDiscourseEntity = discourseModel.GetEntity(entityIndex); Mention.MentionContext candidateExtentContext = currentDiscourseEntity.LastExtent; if (candidateExtentContext.Id == mention.Id) { Distances.Add(entityIndex); return(currentDiscourseEntity); } } return(null); }
private string GetExactMatchFeature(Mention.MentionContext entityContext, Mention.MentionContext compareContext) { if (MentionString(entityContext).Equals(MentionString(compareContext))) { return("exactMatch"); } else if (ExcludedHonorificMentionString(entityContext).Equals(ExcludedHonorificMentionString(compareContext))) { return("exactMatchNoHonor"); } else if (ExcludedTheMentionString(entityContext).Equals(ExcludedTheMentionString(compareContext))) { return("exactMatchNoThe"); } else if (ExcludedDeterminerMentionString(entityContext).Equals(ExcludedDeterminerMentionString(compareContext))) { return("exactMatchNoDT"); } return(null); }
private string ExcludedHonorificMentionString(Mention.MentionContext entityContext) { var output = new StringBuilder(); var first = true; var mentionTokens = entityContext.Tokens; for (var tokenIndex = 0; tokenIndex < mentionTokens.Length; tokenIndex++) { var token = mentionTokens[tokenIndex].ToString(); if (Linker.HonorificsPattern.Match(token).Value != token) { if (!first) { output.Append(" "); } output.Append(token); first = false; } } return(output.ToString()); }
private string GetExactMatchFeature(Mention.MentionContext entityContext, Mention.MentionContext compareContext) { //System.err.println("getExactMatchFeature: ec="+mentionString(ec)+" mc="+mentionString(xec)); if (MentionString(entityContext).Equals(MentionString(compareContext))) { return("exactMatch"); } else if (ExcludedHonorificMentionString(entityContext).Equals(ExcludedHonorificMentionString(compareContext))) { return("exactMatchNoHonor"); } else if (ExcludedTheMentionString(entityContext).Equals(ExcludedTheMentionString(compareContext))) { return("exactMatchNoThe"); } else if (ExcludedDeterminerMentionString(entityContext).Equals(ExcludedDeterminerMentionString(compareContext))) { return("exactMatchNoDT"); } return(null); }
private string ExcludedHonorificMentionString(Mention.MentionContext entityContext) { System.Text.StringBuilder output = new System.Text.StringBuilder(); bool first = true; object[] mentionTokens = entityContext.Tokens; for (int tokenIndex = 0; tokenIndex < mentionTokens.Length; tokenIndex++) { string token = mentionTokens[tokenIndex].ToString(); if (!(Linker.HonorificsPattern.Match(token).Value == token)) { if (!first) { output.Append(" "); } output.Append(token); first = false; } } return(output.ToString()); }
private string ExcludedTheMentionString(Mention.MentionContext entityContext) { System.Text.StringBuilder output = new System.Text.StringBuilder(); bool first = true; object[] mentionTokens = entityContext.Tokens; for (int tokenIndex = 0; tokenIndex < mentionTokens.Length; tokenIndex++) { string token = mentionTokens[tokenIndex].ToString(); if (token != "the" && token != "The" && token != "THE") { if (!first) { output.Append(" "); } output.Append(token); first = false; } } return(output.ToString()); }
private string ExcludedTheMentionString(Mention.MentionContext entityContext) { var output = new StringBuilder(); bool first = true; object[] mentionTokens = entityContext.Tokens; foreach (object tokenObj in mentionTokens) { string token = tokenObj.ToString(); if (token != "the" && token != "The" && token != "THE") { if (!first) { output.Append(" "); } output.Append(token); first = false; } } return(output.ToString()); }
public virtual DiscourseEntity Retain(Mention.MentionContext mention, DiscourseModel discourseModel) { int entityIndex = 0; if (mention.Id == -1) { return(null); } for (; entityIndex < discourseModel.EntityCount; entityIndex++) { DiscourseEntity currentDiscourseEntity = discourseModel.GetEntity(entityIndex); Mention.MentionContext candidateExtentContext = currentDiscourseEntity.LastExtent; if (candidateExtentContext.Id == mention.Id) { Distances.Add(entityIndex); return(currentDiscourseEntity); } } //System.err.println("AbstractResolver.Retain: non-referring entity with id: "+ec.toText()+" id="+ec.id); return(null); }