private Util.Set <string> ConstructModifierSet(Mention.IParse[] tokens, int headIndex) { Util.Set <string> modifierSet = new Util.HashSet <string>(); for (int tokenIndex = 0; tokenIndex < headIndex; tokenIndex++) { Mention.IParse token = tokens[tokenIndex]; modifierSet.Add(token.ToString().ToLower()); } return(modifierSet); }
/// <summary> /// Returns the index for the head word for the specified mention. /// </summary> /// <param name="mention"> /// The mention. /// </param> /// <returns> /// the index for the head word for the specified mention. /// </returns> protected internal virtual int GetHeadIndex(Mention.MentionContext mention) { Mention.IParse[] mentionTokens = mention.TokenParses; for (int currentToken = mentionTokens.Length - 1; currentToken >= 0; currentToken--) { Mention.IParse token = mentionTokens[currentToken]; if (token.SyntacticType != "POS" && token.SyntacticType != "," && token.SyntacticType != ".") { return(currentToken); } } return(mentionTokens.Length - 1); }
/// <summary> /// Returns the index for the head word for the specified mention. /// </summary> /// <param name="mention"> /// The mention. /// </param> /// <returns> /// the index for the head word for the specified mention. /// </returns> protected internal virtual int GetHeadIndex(Mention.MentionContext mention) { Mention.IParse[] mentionTokens = mention.TokenParses; for (int currentToken = mentionTokens.Length - 1; currentToken >= 0; currentToken--) { Mention.IParse token = mentionTokens[currentToken]; if (token.SyntacticType != PartsOfSpeech.PossessiveEnding && token.SyntacticType != PartsOfSpeech.Comma && token.SyntacticType != PartsOfSpeech.SentenceFinalPunctuation) { return(currentToken); } } return(mentionTokens.Length - 1); }
/// <summary> /// Returns a list of word features for the specified tokens. /// </summary> /// <param name="token"> /// The token for which features are to be computed. /// </param> /// <returns> /// a list of word features for the specified tokens. /// </returns> public static List <string> GetWordFeatures(Mention.IParse token) { List <string> wordFeatures = new List <string>(); string word = token.ToString().ToLower(); string wordFeature = string.Empty; if (mEndsWithPeriod.IsMatch(word)) { wordFeature = @",endWithPeriod"; } string tokenTag = token.SyntacticType; wordFeatures.Add("w=" + word + ",t=" + tokenTag + wordFeature); wordFeatures.Add("t=" + tokenTag + wordFeature); return(wordFeatures); }
/// <summary> /// Returns a list of features used to predict whether the specified mention is non-referential. /// </summary> /// <param name="mention"> /// The mention under considereation. /// </param> /// <returns> /// a list of featues used to predict whether the specified mention is non-referential. /// </returns> protected internal virtual List <string> GetNonReferentialFeatures(Mention.MentionContext mention) { var features = new List <string>(); Mention.IParse[] mentionTokens = mention.TokenParses; for (int tokenIndex = 0; tokenIndex <= mention.HeadTokenIndex; tokenIndex++) { Mention.IParse token = mentionTokens[tokenIndex]; List <string> wordFeatureList = MaximumEntropyResolver.GetWordFeatures(token); for (int wordFeatureIndex = 0; wordFeatureIndex < wordFeatureList.Count; wordFeatureIndex++) { features.Add("nr" + (wordFeatureList[wordFeatureIndex])); } } features.AddRange(MaximumEntropyResolver.GetContextFeatures(mention)); return(features); }
private void Initialize(Mention.IHeadFinder headFinder) { Mention.IParse head = headFinder.GetLastHead(Parse); List <Mention.IParse> tokenList = head.Tokens; this.HeadTokenIndex = headFinder.GetHeadIndex(head); Mention.IParse headToken = headFinder.GetHeadToken(head); _tokens = tokenList.ToArray(); this.HeadTokenTag = headToken.SyntacticType; this.HeadTokenText = headToken.ToString(); if (PartsOfSpeech.IsNoun(this.HeadTokenTag) && !PartsOfSpeech.IsProperNoun(this.HeadTokenTag)) { this.Synsets = GetSynsetSet(this); } else { this.Synsets = new Util.HashSet <string>(); } }
private void Initialize(Mention.IHeadFinder headFinder) { Mention.IParse head = headFinder.GetLastHead(Parse); List <Mention.IParse> tokenList = head.Tokens; mHeadTokenIndex = headFinder.GetHeadIndex(head); Mention.IParse headToken = headFinder.GetHeadToken(head); mTokens = tokenList.ToArray(); mHeadTokenTag = headToken.SyntacticType; mHeadTokenText = headToken.ToString(); if (mHeadTokenTag.StartsWith("NN") && !mHeadTokenTag.StartsWith("NNP")) { mSynsets = GetSynsetSet(this); } else { mSynsets = new Util.HashSet <string>(); } }
private string ExcludedDeterminerMentionString(Mention.MentionContext entityContext) { System.Text.StringBuilder output = new System.Text.StringBuilder(); bool first = true; Mention.IParse[] mentionTokenParses = entityContext.TokenParses; for (int tokenIndex = 0; tokenIndex < mentionTokenParses.Length; tokenIndex++) { Mention.IParse token = mentionTokenParses[tokenIndex]; string tag = token.SyntacticType; if (tag != "DT") { if (!first) { output.Append(" "); } output.Append(token.ToString()); first = false; } } return(output.ToString()); }
// Constructors -------------------- public Context(Util.Span span, Util.Span headSpan, int entityId, Mention.IParse parse, string extentType, string nameType, Mention.IHeadFinder headFinder) : base(span, headSpan, entityId, parse, extentType, nameType) { Initialize(headFinder); }