예제 #1
0
        /// <summary>
        /// Returns distance features for the specified mention and entity.
        /// </summary>
        /// <param name="mention">
        /// The mention.
        /// </param>
        /// <param name="entity">
        /// The entity.
        /// </param>
        /// <returns>
        /// list of distance features for the specified mention and entity.
        /// </returns>
        protected internal virtual List <string> GetDistanceFeatures(Mention.MentionContext mention, DiscourseEntity entity)
        {
            List <string> features = new List <string>();

            Mention.MentionContext currentEntityContext = entity.LastExtent;
            int entityDistance   = mention.NounPhraseDocumentIndex - currentEntityContext.NounPhraseDocumentIndex;
            int sentenceDistance = mention.SentenceNumber - currentEntityContext.SentenceNumber;
            int hobbsEntityDistance;

            if (sentenceDistance == 0)
            {
                hobbsEntityDistance = currentEntityContext.NounPhraseSentenceIndex;
            }
            else
            {
                //hobbsEntityDistance = entityDistance - (entities within sentence from mention to end) + (entities within sentence form start to mention)
                //hobbsEntityDistance = entityDistance - (cec.maxNounLocation - cec.getNounPhraseSentenceIndex) + cec.getNounPhraseSentenceIndex;
                hobbsEntityDistance = entityDistance + (2 * currentEntityContext.NounPhraseSentenceIndex) - currentEntityContext.MaxNounPhraseSentenceIndex;
            }
            features.Add("hd=" + hobbsEntityDistance);
            features.Add("de=" + entityDistance);
            features.Add("ds=" + sentenceDistance);
            //features.add("ds=" + sdist + pronoun);
            //features.add("dn=" + cec.sentenceNumber);
            //features.add("ep=" + cec.nounLocation);
            return(features);
        }
예제 #2
0
        public override bool CanResolve(Mention.MentionContext mention)
        {
            string firstToken    = mention.FirstTokenText.ToLower();
            string firstTokenTag = mention.FirstToken.SyntacticType;
            bool   canResolve    = mention.HeadTokenTag == "NN" && !IsDefiniteArticle(firstToken, firstTokenTag);

            return(canResolve);
        }
예제 #3
0
        /// <summary>
        /// Returns a list of features for deciding whether the specified mention refers to the specified discourse entity.
        /// </summary>
        /// <param name="mention">
        /// the mention being considers as possibly referential.
        /// </param>
        /// <param name="entity">
        /// The discourse entity with which the mention is being considered referential.
        /// </param>
        /// <returns>
        /// a list of features used to predict reference between the specified mention and entity.
        /// </returns>
        protected internal virtual List <string> GetFeatures(Mention.MentionContext mention, DiscourseEntity entity)
        {
            List <string> features = new List <string>();

            features.Add(Default);
            features.AddRange(GetCompatibilityFeatures(mention, entity));
            return(features);
        }
        protected internal virtual List <string> GetFeatures(Mention.MentionContext mention)
        {
            List <string> features = new List <string>();

            features.Add(MaximumEntropyResolver.Default);
            features.AddRange(GetNonReferentialFeatures(mention));
            return(features);
        }
예제 #5
0
 /// <summary>
 /// Creates a new entity based on the specified mention and its specified gender and number properties.
 /// </summary>
 /// <param name="mention">
 /// The first mention of this entity.
 /// </param>
 /// <param name="gender">
 /// The gender of this entity.
 /// </param>
 /// <param name="genderProbability">
 /// The probability that the specified gender is correct.
 /// </param>
 /// <param name="number">
 /// The number for this entity.
 /// </param>
 /// <param name="numberProbability">
 /// The probability that the specified number is correct.
 /// </param>
 public DiscourseEntity(Mention.MentionContext mention, Similarity.GenderEnum gender, double genderProbability,
                        Similarity.NumberEnum number, double numberProbability) : base(mention)
 {
     Gender            = gender;
     GenderProbability = genderProbability;
     Number            = number;
     NumberProbability = numberProbability;
 }
예제 #6
0
        /// <summary>
        /// Returns features indicating whether the specified mention is compatible with the pronouns
        /// of the specified entity.
        /// </summary>
        /// <param name="mention">
        /// The mention.
        /// </param>
        /// <param name="entity">
        /// The entity.
        /// </param>
        /// <returns>
        /// list of features indicating whether the specified mention is compatible with the pronouns
        /// of the specified entity.
        /// </returns>
        protected internal virtual List <string> GetPronounMatchFeatures(Mention.MentionContext mention, DiscourseEntity entity)
        {
            bool foundCompatiblePronoun   = false;
            bool foundIncompatiblePronoun = false;

            if (mention.HeadTokenTag.StartsWith("PRP"))
            {
                Dictionary <string, string> pronounMap = GetPronounFeatureMap(mention.HeadTokenText);
                //System.err.println("getPronounMatchFeatures.pronounMap:"+pronounMap);
                foreach (Mention.MentionContext candidateMention in entity.Mentions)
                {
                    if (candidateMention.HeadTokenTag.StartsWith("PRP"))
                    {
                        if (mention.HeadTokenText.ToUpper() == candidateMention.HeadTokenText.ToUpper())
                        {
                            foundCompatiblePronoun = true;
                            break;
                        }
                        else
                        {
                            Dictionary <string, string> candidatePronounMap = GetPronounFeatureMap(candidateMention.HeadTokenText);
                            //System.err.println("getPronounMatchFeatures.candidatePronounMap:"+candidatePronounMap);
                            bool allKeysMatch = true;
                            foreach (string key in pronounMap.Keys)
                            {
                                if (candidatePronounMap.ContainsKey(key))
                                {
                                    if (pronounMap[key] != candidatePronounMap[key])
                                    {
                                        foundIncompatiblePronoun = true;
                                        allKeysMatch             = false;
                                    }
                                }
                                else
                                {
                                    allKeysMatch = false;
                                }
                            }
                            if (allKeysMatch)
                            {
                                foundCompatiblePronoun = true;
                            }
                        }
                    }
                }
            }
            List <string> pronounFeatures = new List <string>();

            if (foundCompatiblePronoun)
            {
                pronounFeatures.Add("compatiblePronoun");
            }
            if (foundIncompatiblePronoun)
            {
                pronounFeatures.Add("incompatiblePronoun");
            }
            return(pronounFeatures);
        }
예제 #7
0
        /*
         * protected double getNonReferentialProbability(MentionContext ec) {
         * if (useFixedNonReferentialProbability) {
         * if (debugOn) {
         * System.err.println(this +".resolve: " + ec.toText() + " -> " + null +" " + fixedNonReferentialProbability);
         * System.err.println();
         * }
         * return fixedNonReferentialProbability;
         * }
         * List lfeatures = getFeatures(ec, null);
         * String[] features = (String[]) lfeatures.toArray(new String[lfeatures.size()]);
         *
         * if (features == null) {
         * System.err.println("features=null in " + this);
         * }
         * if (model == null) {
         * System.err.println("model=null in " + this);
         * }
         * double[] dist = nrModel.eval(features);
         *
         * if (dist == null) {
         * System.err.println("dist=null in " + this);
         * }
         * if (debugOn) {
         * System.err.println(this +".resolve: " + ec.toText() + " -> " + null +" " + dist[nrSameIndex] + " " + lfeatures);
         * System.err.println();
         * }
         * return (dist[nrSameIndex]);
         * }
         */

        /// <summary>
        /// Returns whether the specified entity satisfies the criteria for being a default referent.
        /// This criteria is used to perform sample selection on the training data and to select a single
        /// non-referent entity. Typically the criteria is a hueristic for a likely referent.
        /// </summary>
        /// <param name="discourseEntity">
        /// The discourse entity being considered for non-reference.
        /// </param>
        /// <returns>
        /// True if the entity should be used as a default referent, false otherwise.
        /// </returns>
        protected internal virtual bool defaultReferent(DiscourseEntity discourseEntity)
        {
            Mention.MentionContext entityContext = discourseEntity.LastExtent;
            if (entityContext.NounPhraseSentenceIndex == 0)
            {
                return(true);
            }
            return(false);
        }
예제 #8
0
        public override bool CanResolve(Mention.MentionContext mention)
        {
            var firstToken    = mention.FirstTokenText.ToLower();
            var firstTokenTag = mention.FirstToken.SyntacticType;
            var canResolve    = mention.HeadTokenTag == PartsOfSpeechStrings.NounSingularOrMass &&
                                !IsDefiniteArticle(firstToken, firstTokenTag);

            return(canResolve);
        }
예제 #9
0
        /// <summary>
        /// Returns features indicating whether the specified mention is compatible with the pronouns
        /// of the specified entity.
        /// </summary>
        /// <param name="mention">
        /// The mention.
        /// </param>
        /// <param name="entity">
        /// The entity.
        /// </param>
        /// <returns>
        /// list of features indicating whether the specified mention is compatible with the pronouns
        /// of the specified entity.
        /// </returns>
        protected internal virtual List <string> GetPronounMatchFeatures(Mention.MentionContext mention, DiscourseEntity entity)
        {
            bool foundCompatiblePronoun   = false;
            bool foundIncompatiblePronoun = false;

            if (PartsOfSpeech.IsPersOrPossPronoun(mention.HeadTokenTag))
            {
                Dictionary <string, string> pronounMap = GetPronounFeatureMap(mention.HeadTokenText);
                foreach (Mention.MentionContext candidateMention in entity.Mentions)
                {
                    if (PartsOfSpeech.IsPersOrPossPronoun(candidateMention.HeadTokenTag))
                    {
                        if (mention.HeadTokenText.ToUpper() == candidateMention.HeadTokenText.ToUpper())
                        {
                            foundCompatiblePronoun = true;
                            break;
                        }
                        else
                        {
                            Dictionary <string, string> candidatePronounMap = GetPronounFeatureMap(candidateMention.HeadTokenText);
                            bool allKeysMatch = true;
                            foreach (string key in pronounMap.Keys)
                            {
                                if (candidatePronounMap.ContainsKey(key))
                                {
                                    if (pronounMap[key] != candidatePronounMap[key])
                                    {
                                        foundIncompatiblePronoun = true;
                                        allKeysMatch             = false;
                                    }
                                }
                                else
                                {
                                    allKeysMatch = false;
                                }
                            }
                            if (allKeysMatch)
                            {
                                foundCompatiblePronoun = true;
                            }
                        }
                    }
                }
            }
            var pronounFeatures = new List <string>();

            if (foundCompatiblePronoun)
            {
                pronounFeatures.Add("compatiblePronoun");
            }
            if (foundIncompatiblePronoun)
            {
                pronounFeatures.Add("incompatiblePronoun");
            }
            return(pronounFeatures);
        }
예제 #10
0
        protected internal override List <string> GetFeatures(Mention.MentionContext mention, DiscourseEntity entity)
        {
            List <string> features = base.GetFeatures(mention, entity);

            if (entity != null)
            {
                features.AddRange(GetContextFeatures(mention));
                features.AddRange(GetStringMatchFeatures(mention, entity));
            }
            return(features);
        }
        public virtual double GetNonReferentialProbability(Mention.MentionContext mention)
        {
            var features    = GetFeatures(mention);
            var probability = mModel.Evaluate(features.ToArray())[mNonReferentialIndex];

            if (mDebugOn)
            {
                Console.Error.WriteLine(this + " " + mention.ToText() + " ->  null " + probability + " " + string.Join(",", features.ToArray()));
            }
            return(probability);
        }
예제 #12
0
 /// <summary>
 /// Returns the string of "_" delimited tokens for the specified mention.
 /// </summary>
 /// <param name="mention">
 /// The mention.
 /// </param>
 /// <returns>
 /// the string of "_" delimited tokens for the specified mention.
 /// </returns>
 protected internal virtual string GetFeatureString(Mention.MentionContext mention)
 {
     System.Text.StringBuilder output = new System.Text.StringBuilder();
     object[] mentionTokens           = mention.Tokens;
     output.Append(mentionTokens[0].ToString());
     for (int currentToken = 1; currentToken < mentionTokens.Length; currentToken++)
     {
         output.Append("_").Append(mentionTokens[currentToken].ToString());
     }
     return(output.ToString());
 }
예제 #13
0
 protected internal override bool IsExcluded(Mention.MentionContext entityContext, DiscourseEntity discourseEntity)
 {
     if (base.IsExcluded(entityContext, discourseEntity))
     {
         return(true);
     }
     else
     {
         Mention.MentionContext currentEntityContext = discourseEntity.LastExtent;
         return(!CanResolve(currentEntityContext) || base.IsExcluded(entityContext, discourseEntity));
     }
 }
        public virtual void AddEvent(Mention.MentionContext context)
        {
            var features = GetFeatures(context);

            if (context.Id == -1)
            {
                mEvents.Add(new SharpEntropy.TrainingEvent(MaximumEntropyResolver.Same, features.ToArray()));
            }
            else
            {
                mEvents.Add(new SharpEntropy.TrainingEvent(MaximumEntropyResolver.Diff, features.ToArray()));
            }
        }
예제 #15
0
 /// <summary>
 /// Returns the index for the head word for the specified mention.
 /// </summary>
 /// <param name="mention">
 /// The mention.
 /// </param>
 /// <returns>
 /// the index for the head word for the specified mention.
 /// </returns>
 protected internal virtual int GetHeadIndex(Mention.MentionContext mention)
 {
     Mention.IParse[] mentionTokens = mention.TokenParses;
     for (int currentToken = mentionTokens.Length - 1; currentToken >= 0; currentToken--)
     {
         Mention.IParse token = mentionTokens[currentToken];
         if (token.SyntacticType != "POS" && token.SyntacticType != "," && token.SyntacticType != ".")
         {
             return(currentToken);
         }
     }
     return(mentionTokens.Length - 1);
 }
예제 #16
0
 private string MentionString(Mention.MentionContext entityContext)
 {
     System.Text.StringBuilder output = new System.Text.StringBuilder();
     object[] mentionTokens           = entityContext.Tokens;
     output.Append(mentionTokens[0].ToString());
     for (int tokenIndex = 1; tokenIndex < mentionTokens.Length; tokenIndex++)
     {
         string token = mentionTokens[tokenIndex].ToString();
         output.Append(" ").Append(token);
     }
     //System.err.println("mentionString "+ec+" == "+sb.toString()+" mtokens.length="+mtokens.length);
     return(output.ToString());
 }
예제 #17
0
        private string MentionString(Mention.MentionContext entityContext)
        {
            var output = new StringBuilder();

            object[] mentionTokens = entityContext.Tokens;
            output.Append(mentionTokens[0].ToString());
            for (int tokenIndex = 1; tokenIndex < mentionTokens.Length; tokenIndex++)
            {
                string token = mentionTokens[tokenIndex].ToString();
                output.Append(" ").Append(token);
            }
            return(output.ToString());
        }
예제 #18
0
 /// <summary>
 /// Returns the index for the head word for the specified mention.
 /// </summary>
 /// <param name="mention">
 /// The mention.
 /// </param>
 /// <returns>
 /// the index for the head word for the specified mention.
 /// </returns>
 protected internal virtual int GetHeadIndex(Mention.MentionContext mention)
 {
     Mention.IParse[] mentionTokens = mention.TokenParses;
     for (int currentToken = mentionTokens.Length - 1; currentToken >= 0; currentToken--)
     {
         Mention.IParse token = mentionTokens[currentToken];
         if (token.SyntacticType != PartsOfSpeech.PossessiveEnding &&
             token.SyntacticType != PartsOfSpeech.Comma &&
             token.SyntacticType != PartsOfSpeech.SentenceFinalPunctuation)
         {
             return(currentToken);
         }
     }
     return(mentionTokens.Length - 1);
 }
예제 #19
0
 private string GetNumberCompatibilityFeature(Mention.MentionContext entityContext, DiscourseEntity discourseEntity)
 {
     Similarity.NumberEnum entityNumber = discourseEntity.Number;
     if (entityNumber == Similarity.NumberEnum.Unknown || entityContext.GetNumber() == Similarity.NumberEnum.Unknown)
     {
         return(mNumberUnknown);
     }
     else if (entityContext.GetNumber() == entityNumber)
     {
         return(mNumberCompatible);
     }
     else
     {
         return(mNumberIncompatible);
     }
 }
예제 #20
0
 private string GetGenderCompatibilityFeature(Mention.MentionContext entityContext, DiscourseEntity discourseEntity)
 {
     Similarity.GenderEnum entityGender = discourseEntity.Gender;
     //System.err.println("getGenderCompatibility: mention="+ec.getGender()+" entity="+eg);
     if (entityGender == Similarity.GenderEnum.Unknown || entityContext.GetGender() == Similarity.GenderEnum.Unknown)
     {
         return(mGenderUnknown);
     }
     else if (entityContext.GetGender() == entityGender)
     {
         return(mGenderCompatible);
     }
     else
     {
         return(mGenderIncompatible);
     }
 }
        /// <summary>
        /// Returns a list of features used to predict whether the specified mention is non-referential.
        /// </summary>
        /// <param name="mention">
        /// The mention under considereation.
        /// </param>
        /// <returns>
        /// a list of featues used to predict whether the specified mention is non-referential.
        /// </returns>
        protected internal virtual List <string> GetNonReferentialFeatures(Mention.MentionContext mention)
        {
            var features      = new List <string>();
            var mentionTokens = mention.TokenParses;

            for (var tokenIndex = 0; tokenIndex <= mention.HeadTokenIndex; tokenIndex++)
            {
                var token           = mentionTokens[tokenIndex];
                var wordFeatureList = MaximumEntropyResolver.GetWordFeatures(token);
                for (var wordFeatureIndex = 0; wordFeatureIndex < wordFeatureList.Count; wordFeatureIndex++)
                {
                    features.Add("nr" + (wordFeatureList[wordFeatureIndex]));
                }
            }
            features.AddRange(MaximumEntropyResolver.GetContextFeatures(mention));
            return(features);
        }
        /// <summary>
        /// Returns a list of features used to predict whether the specified mention is non-referential.
        /// </summary>
        /// <param name="mention">
        /// The mention under considereation.
        /// </param>
        /// <returns>
        /// a list of featues used to predict whether the specified mention is non-referential.
        /// </returns>
        protected internal virtual List <string> GetNonReferentialFeatures(Mention.MentionContext mention)
        {
            List <string> features = new List <string>();

            Mention.IParse[] mentionTokens = mention.TokenParses;
            //System.err.println("getNonReferentialFeatures: mention has "+mtokens.length+" tokens");
            for (int tokenIndex = 0; tokenIndex <= mention.HeadTokenIndex; tokenIndex++)
            {
                Mention.IParse token           = mentionTokens[tokenIndex];
                List <string>  wordFeatureList = MaximumEntropyResolver.GetWordFeatures(token);
                for (int wordFeatureIndex = 0; wordFeatureIndex < wordFeatureList.Count; wordFeatureIndex++)
                {
                    features.Add("nr" + (wordFeatureList[wordFeatureIndex]));
                }
            }
            features.AddRange(MaximumEntropyResolver.GetContextFeatures(mention));
            return(features);
        }
예제 #23
0
        public virtual DiscourseEntity Retain(Mention.MentionContext mention, DiscourseModel discourseModel)
        {
            int entityIndex = 0;

            if (mention.Id == -1)
            {
                return(null);
            }
            for (; entityIndex < discourseModel.EntityCount; entityIndex++)
            {
                DiscourseEntity        currentDiscourseEntity = discourseModel.GetEntity(entityIndex);
                Mention.MentionContext candidateExtentContext = currentDiscourseEntity.LastExtent;
                if (candidateExtentContext.Id == mention.Id)
                {
                    Distances.Add(entityIndex);
                    return(currentDiscourseEntity);
                }
            }
            return(null);
        }
예제 #24
0
 private string GetExactMatchFeature(Mention.MentionContext entityContext, Mention.MentionContext compareContext)
 {
     if (MentionString(entityContext).Equals(MentionString(compareContext)))
     {
         return("exactMatch");
     }
     else if (ExcludedHonorificMentionString(entityContext).Equals(ExcludedHonorificMentionString(compareContext)))
     {
         return("exactMatchNoHonor");
     }
     else if (ExcludedTheMentionString(entityContext).Equals(ExcludedTheMentionString(compareContext)))
     {
         return("exactMatchNoThe");
     }
     else if (ExcludedDeterminerMentionString(entityContext).Equals(ExcludedDeterminerMentionString(compareContext)))
     {
         return("exactMatchNoDT");
     }
     return(null);
 }
예제 #25
0
        private string ExcludedHonorificMentionString(Mention.MentionContext entityContext)
        {
            var output        = new StringBuilder();
            var first         = true;
            var mentionTokens = entityContext.Tokens;

            for (var tokenIndex = 0; tokenIndex < mentionTokens.Length; tokenIndex++)
            {
                var token = mentionTokens[tokenIndex].ToString();
                if (Linker.HonorificsPattern.Match(token).Value != token)
                {
                    if (!first)
                    {
                        output.Append(" ");
                    }
                    output.Append(token);
                    first = false;
                }
            }
            return(output.ToString());
        }
예제 #26
0
 private string GetExactMatchFeature(Mention.MentionContext entityContext, Mention.MentionContext compareContext)
 {
     //System.err.println("getExactMatchFeature: ec="+mentionString(ec)+" mc="+mentionString(xec));
     if (MentionString(entityContext).Equals(MentionString(compareContext)))
     {
         return("exactMatch");
     }
     else if (ExcludedHonorificMentionString(entityContext).Equals(ExcludedHonorificMentionString(compareContext)))
     {
         return("exactMatchNoHonor");
     }
     else if (ExcludedTheMentionString(entityContext).Equals(ExcludedTheMentionString(compareContext)))
     {
         return("exactMatchNoThe");
     }
     else if (ExcludedDeterminerMentionString(entityContext).Equals(ExcludedDeterminerMentionString(compareContext)))
     {
         return("exactMatchNoDT");
     }
     return(null);
 }
예제 #27
0
        private string ExcludedHonorificMentionString(Mention.MentionContext entityContext)
        {
            System.Text.StringBuilder output = new System.Text.StringBuilder();
            bool first = true;

            object[] mentionTokens = entityContext.Tokens;
            for (int tokenIndex = 0; tokenIndex < mentionTokens.Length; tokenIndex++)
            {
                string token = mentionTokens[tokenIndex].ToString();
                if (!(Linker.HonorificsPattern.Match(token).Value == token))
                {
                    if (!first)
                    {
                        output.Append(" ");
                    }
                    output.Append(token);
                    first = false;
                }
            }
            return(output.ToString());
        }
예제 #28
0
        private string ExcludedTheMentionString(Mention.MentionContext entityContext)
        {
            System.Text.StringBuilder output = new System.Text.StringBuilder();
            bool first = true;

            object[] mentionTokens = entityContext.Tokens;
            for (int tokenIndex = 0; tokenIndex < mentionTokens.Length; tokenIndex++)
            {
                string token = mentionTokens[tokenIndex].ToString();
                if (token != "the" && token != "The" && token != "THE")
                {
                    if (!first)
                    {
                        output.Append(" ");
                    }
                    output.Append(token);
                    first = false;
                }
            }
            return(output.ToString());
        }
예제 #29
0
        private string ExcludedTheMentionString(Mention.MentionContext entityContext)
        {
            var  output = new StringBuilder();
            bool first  = true;

            object[] mentionTokens = entityContext.Tokens;
            foreach (object tokenObj in mentionTokens)
            {
                string token = tokenObj.ToString();
                if (token != "the" && token != "The" && token != "THE")
                {
                    if (!first)
                    {
                        output.Append(" ");
                    }
                    output.Append(token);
                    first = false;
                }
            }
            return(output.ToString());
        }
예제 #30
0
        public virtual DiscourseEntity Retain(Mention.MentionContext mention, DiscourseModel discourseModel)
        {
            int entityIndex = 0;

            if (mention.Id == -1)
            {
                return(null);
            }
            for (; entityIndex < discourseModel.EntityCount; entityIndex++)
            {
                DiscourseEntity        currentDiscourseEntity = discourseModel.GetEntity(entityIndex);
                Mention.MentionContext candidateExtentContext = currentDiscourseEntity.LastExtent;
                if (candidateExtentContext.Id == mention.Id)
                {
                    Distances.Add(entityIndex);
                    return(currentDiscourseEntity);
                }
            }
            //System.err.println("AbstractResolver.Retain: non-referring entity with id: "+ec.toText()+" id="+ec.id);
            return(null);
        }