private List <string> GetFeatures(Context nounPhrase)
        {
            var features = new List <string>();

            features.Add("default");
            for (int tokenIndex = 0; tokenIndex < nounPhrase.HeadTokenIndex; tokenIndex++)
            {
                features.Add("mw=" + nounPhrase.Tokens[tokenIndex].ToString());
            }
            features.Add("hw=" + nounPhrase.HeadTokenText);
            features.Add("n=" + nounPhrase.NameType);
            if (nounPhrase.NameType != null && nounPhrase.NameType == "person")
            {
                object[] tokens = nounPhrase.Tokens;
                for (int tokenIndex = 0; tokenIndex < nounPhrase.HeadTokenIndex || tokenIndex == 0; tokenIndex++)
                {
                    string name = tokens[tokenIndex].ToString().ToLower();
                    if (mFemaleNames.Contains(name))
                    {
                        features.Add("fem");
                    }
                    if (mMaleNames.Contains(name))
                    {
                        features.Add("mas");
                    }
                }
            }

            foreach (string synset in nounPhrase.Synsets)
            {
                features.Add("ss=" + synset);
            }

            return(features);
        }
        /// <summary>
        /// Constructs a set of entities which may be semantically compatible with the entity indicated by
        /// the specified entityKey.
        /// </summary>
        /// <param name="entityKey">
        /// The key of the entity for which the set is being constructed.
        /// </param>
        /// <param name="entities">
        /// A mapping between entity keys and their mentions.
        /// </param>
        /// <param name="headSets">
        /// A mapping between entity keys and their head sets.
        /// </param>
        /// <param name="nameSets">
        /// A mapping between entity keys and their name sets.
        /// </param>
        /// <param name="singletons">
        /// A list of all entities which consists of a single mention.
        /// </param>
        /// <returns>
        /// A set of mentions for all the entities which might be semantically compatible
        /// with entity indicated by the specified key.
        /// </returns>
        private Util.Set <Context> ConstructExclusionSet(int entityKey, Util.HashList <int, Context> entities, Dictionary <int, Util.Set <string> > headSets, Dictionary <int, Util.Set <string> > nameSets, List <Context> singletons)
        {
            Util.Set <Context> exclusionSet   = new Util.HashSet <Context>();
            Util.Set <string>  entityHeadSet  = headSets[entityKey];
            Util.Set <string>  entityNameSet  = nameSets[entityKey];
            List <Context>     entityContexts = entities[entityKey];

            //entities
            foreach (int key in entities.Keys)
            {
                List <Context> candidateContexts = entities[key];

                if (key == entityKey)
                {
                    exclusionSet.AddAll(candidateContexts);
                }
                else if (nameSets[key].Count == 0)
                {
                    exclusionSet.AddAll(candidateContexts);
                }
                else if (HasSameHead(entityHeadSet, headSets[key]))
                {
                    exclusionSet.AddAll(candidateContexts);
                }
                else if (HasSameNameType(entityNameSet, nameSets[key]))
                {
                    exclusionSet.AddAll(candidateContexts);
                }
                else if (HasSuperClass(entityContexts, candidateContexts))
                {
                    exclusionSet.AddAll(candidateContexts);
                }
            }

            //singles
            List <Context> singles = new List <Context>(1);

            foreach (Context currentSingleton in singletons)
            {
                singles.Clear();
                singles.Add(currentSingleton);
                if (entityHeadSet.Contains(currentSingleton.HeadTokenText.ToLower()))
                {
                    exclusionSet.Add(currentSingleton);
                }
                else if (currentSingleton.NameType == null)
                {
                    exclusionSet.Add(currentSingleton);
                }
                else if (entityNameSet.Contains(currentSingleton.NameType))
                {
                    exclusionSet.Add(currentSingleton);
                }
                else if (HasSuperClass(entityContexts, singles))
                {
                    exclusionSet.Add(currentSingleton);
                }
            }
            return(exclusionSet);
        }
示例#3
0
 private bool isAcronym(string ecStrip, string xecStrip)
 {
     Util.Set <string> exSet = (Util.Set <string>)acroMap[ecStrip];
     if (exSet != null && exSet.Contains(xecStrip))
     {
         return(true);
     }
     return(false);
 }
 private bool HasSameNameType(Util.Set <string> entityNameSet, Util.Set <string> candidateNameSet)
 {
     foreach (string currentName in entityNameSet)
     {
         if (candidateNameSet.Contains(currentName))
         {
             return(true);
         }
     }
     return(false);
 }
 private bool HasSameHead(Util.Set <string> entityHeadSet, Util.Set <string> candidateHeadSet)
 {
     foreach (string currentHead in entityHeadSet)
     {
         if (candidateHeadSet.Contains(currentHead))
         {
             return(true);
         }
     }
     return(false);
 }
示例#6
0
        private static bool IsHeadOfExistingMention(IParse nounPhrase, Dictionary <IParse, IParse> headMap, Util.Set <IParse> mentions)
        {
            IParse head = nounPhrase;

            while (headMap.ContainsKey(head))
            {
                head = headMap[head];
                if (mentions.Contains(head))
                {
                    return(true);
                }
            }
            return(false);
        }
        public int GetHeadIndex(IParse parse)
        {
            List <IParse> syntacticChildren = parse.SyntacticChildren;
            bool          countTokens       = false;
            int           tokenCount        = 0;

            //check for NP -> NN S type structures and return last token before S as head.
            for (int currentSyntacticChild = 0; currentSyntacticChild < syntacticChildren.Count; currentSyntacticChild++)
            {
                IParse syntacticChild = syntacticChildren[currentSyntacticChild];
                //System.err.println("PTBHeadFinder.getHeadIndex "+p+" "+p.getSyntacticType()+" sChild "+sci+" type = "+sc.getSyntacticType());
                if (syntacticChild.SyntacticType.StartsWith("S"))
                {
                    if (currentSyntacticChild != 0)
                    {
                        countTokens = true;
                    }
                    else
                    {
                        //System.err.println("PTBHeadFinder.getHeadIndex(): NP -> S production assuming right-most head");
                    }
                }
                if (countTokens)
                {
                    tokenCount += syntacticChild.Tokens.Count;
                }
            }
            List <IParse> tokens = parse.Tokens;

            if (tokens.Count == 0)
            {
                System.Console.Error.WriteLine("PTBHeadFinder.getHeadIndex(): empty tok list for parse " + parse);
            }
            for (int currentToken = tokens.Count - tokenCount - 1; currentToken >= 0; currentToken--)
            {
                IParse token = tokens[currentToken];
                if (!mSkipSet.Contains(token.SyntacticType))
                {
                    return(currentToken);
                }
            }
            //System.err.println("PTBHeadFinder.getHeadIndex: "+p+" hi="+toks.size()+"-"+tokenCount+" -1 = "+(toks.size()-tokenCount -1));
            return(tokens.Count - tokenCount - 1);
        }
        public int GetHeadIndex(IParse parse)
        {
            var syntacticChildren = parse.SyntacticChildren;
            var countTokens       = false;
            var tokenCount        = 0;

            //check for NP -> NN S type structures and return last token before S as head.
            for (var currentSyntacticChild = 0; currentSyntacticChild < syntacticChildren.Count; currentSyntacticChild++)
            {
                var syntacticChild = syntacticChildren[currentSyntacticChild];
                if (syntacticChild.SyntacticType.StartsWith("S"))
                {
                    if (currentSyntacticChild != 0)
                    {
                        countTokens = true;
                    }
                }
                if (countTokens)
                {
                    tokenCount += syntacticChild.Tokens.Count;
                }
            }
            var tokens = parse.Tokens;

            if (tokens.Count == 0)
            {
                Console.Error.WriteLine("PTBHeadFinder.getHeadIndex(): empty tok list for parse " + parse);
            }
            for (var currentToken = tokens.Count - tokenCount - 1; currentToken >= 0; currentToken--)
            {
                var token = tokens[currentToken];
                if (!mSkipSet.Contains(token.SyntacticType))
                {
                    return(currentToken);
                }
            }
            return(tokens.Count - tokenCount - 1);
        }
示例#9
0
        /// <summary>
        /// Returns string-match features for the the specified mention and entity.</summary>
        /// <param name="mention">
        /// The mention.
        /// </param>
        /// <param name="entity">
        /// The entity.
        /// </param>
        /// <returns>
        /// list of string-match features for the the specified mention and entity.
        /// </returns>
        protected internal virtual List <string> GetStringMatchFeatures(Mention.MentionContext mention, DiscourseEntity entity)
        {
            bool          sameHead            = false;
            bool          modifersMatch       = false;
            bool          titleMatch          = false;
            bool          noTheModifiersMatch = false;
            List <string> features            = new List <string>();

            Mention.IParse[] mentionTokens = mention.TokenParses;
            OpenNLP.Tools.Util.Set <string> entityContextModifierSet = ConstructModifierSet(mentionTokens, mention.HeadTokenIndex);
            string mentionHeadString = mention.HeadTokenText.ToLower();

            Util.Set <string> featureSet = new Util.HashSet <string>();

            foreach (Mention.MentionContext entityMention in entity.Mentions)
            {
                string exactMatchFeature = GetExactMatchFeature(entityMention, mention);
                if (exactMatchFeature != null)
                {
                    featureSet.Add(exactMatchFeature);
                }
                else if (entityMention.Parse.IsCoordinatedNounPhrase && !mention.Parse.IsCoordinatedNounPhrase)
                {
                    featureSet.Add("cmix");
                }
                else
                {
                    string mentionStrip       = StripNounPhrase(mention);
                    string entityMentionStrip = StripNounPhrase(entityMention);
                    if (mentionStrip != null && entityMentionStrip != null)
                    {
                        if (IsSubstring(mentionStrip, entityMentionStrip))
                        {
                            featureSet.Add("substring");
                        }
                    }
                }
                Mention.IParse[] entityMentionTokens = entityMention.TokenParses;
                int headIndex = entityMention.HeadTokenIndex;
                //if (!mention.getHeadTokenTag().equals(entityMention.getHeadTokenTag())) {
                //  //System.err.println("skipping "+mention.headTokenText+" with "+xec.headTokenText+" because "+mention.headTokenTag+" != "+xec.headTokenTag);
                //  continue;
                //}  want to match NN NNP
                string entityMentionHeadString = entityMention.HeadTokenText.ToLower();
                // model lexical similarity
                if (mentionHeadString == entityMentionHeadString)
                {
                    sameHead = true;
                    featureSet.Add("hds=" + mentionHeadString);
                    if (!modifersMatch || !noTheModifiersMatch)
                    {
                        //only check if we haven't already found one which is the same
                        modifersMatch       = true;
                        noTheModifiersMatch = true;
                        Util.Set <string> entityMentionModifierSet = ConstructModifierSet(entityMentionTokens, headIndex);
                        foreach (string modifierWord in entityContextModifierSet)
                        {
                            if (!entityMentionModifierSet.Contains(modifierWord))
                            {
                                modifersMatch = false;
                                if (modifierWord != "the")
                                {
                                    noTheModifiersMatch = false;
                                    featureSet.Add("mmw=" + modifierWord);
                                }
                            }
                        }
                    }
                }
                Util.Set <string> descriptorModifierSet = ConstructModifierSet(entityMentionTokens, entityMention.NonDescriptorStart);
                if (descriptorModifierSet.Contains(mentionHeadString))
                {
                    titleMatch = true;
                }
            }
            if (!(featureSet.Count == 0))
            {
                features.AddRange(featureSet);
            }
            if (sameHead)
            {
                features.Add("sameHead");
                if (modifersMatch)
                {
                    features.Add("modsMatch");
                }
                else if (noTheModifiersMatch)
                {
                    features.Add("nonTheModsMatch");
                }
                else
                {
                    features.Add("modsMisMatch");
                }
            }
            if (titleMatch)
            {
                features.Add("titleMatch");
            }
            return(features);
        }
        /// <summary>
        /// Builds up the list of features, anchored around a position within the
        /// StringBuilder.
        /// </summary>
        public virtual string[] GetContext(Util.Pair <System.Text.StringBuilder, int> pair)
        {
            string prefix;                      //string preceeding the eos character in the eos token.
            string previousToken;               //space delimited token preceding token containing eos character.
            string suffix;                      //string following the eos character in the eos token.
            string nextToken;                   //space delimited token following token containsing eos character.

            System.Text.StringBuilder buffer = pair.FirstValue;
            int position = pair.SecondValue;             //character offset of eos character in

            //if (first is string[])
            //{
            //    string[] firstList = (string[])first;
            //    previousToken = firstList[0];
            //    string current = firstList[1];
            //    prefix = current.Substring(0, (position) - (0));
            //    suffix = current.Substring(position + 1);
            //    if (suffix.StartsWith(" "))
            //    {
            //        mCollectFeatures.Add("sn");
            //    }
            //    if (prefix.EndsWith(" "))
            //    {
            //        mCollectFeatures.Add("pn");
            //    }
            //    mCollectFeatures.Add("eos=" + current[position]);
            //    nextToken = firstList[2];
            //}
            //else
            //{
            //    //compute previous, next, prefix and suffix strings and space previous, space next features and eos features.
            //    System.Text.StringBuilder buffer = (System.Text.StringBuilder)((Util.Pair)input).FirstValue;
            int lastIndex = buffer.Length - 1;

            // compute space previousToken and space next features.
            if (position > 0 && buffer[position - 1] == ' ')
            {
                mCollectFeatures.Add("sp");
            }
            if (position < lastIndex && buffer[position + 1] == ' ')
            {
                mCollectFeatures.Add("sn");
            }
            mCollectFeatures.Add("eos=" + buffer[position]);

            int prefixStart = PreviousSpaceIndex(buffer, position);

            int currentPosition = position;

            //assign prefix, stop if you run into a period though otherwise stop at space
            while (--currentPosition > prefixStart)
            {
                for (int currentEndOfSentenceCharacter = 0, endOfSentenceCharactersLength = mEndOfSentenceCharacters.Length; currentEndOfSentenceCharacter < endOfSentenceCharactersLength; currentEndOfSentenceCharacter++)
                {
                    if (buffer[currentPosition] == mEndOfSentenceCharacters[currentEndOfSentenceCharacter])
                    {
                        prefixStart = currentPosition;
                        currentPosition++;                         // this gets us out of while loop.
                        break;
                    }
                }
            }

            prefix = buffer.ToString(prefixStart, position - prefixStart).Trim();

            int previousStart = PreviousSpaceIndex(buffer, prefixStart);

            previousToken = buffer.ToString(previousStart, prefixStart - previousStart).Trim();

            int suffixEnd = NextSpaceIndex(buffer, position, lastIndex);

            currentPosition = position;
            while (++currentPosition < suffixEnd)
            {
                for (int currentEndOfSentenceCharacter = 0, endOfSentenceCharactersLength = mEndOfSentenceCharacters.Length; currentEndOfSentenceCharacter < endOfSentenceCharactersLength; currentEndOfSentenceCharacter++)
                {
                    if (buffer[currentPosition] == mEndOfSentenceCharacters[currentEndOfSentenceCharacter])
                    {
                        suffixEnd = currentPosition;
                        currentPosition--;                         // this gets us out of while loop.
                        break;
                    }
                }
            }

            int nextEnd = NextSpaceIndex(buffer, suffixEnd + 1, lastIndex + 1);

            if (position == lastIndex)
            {
                suffix    = "";
                nextToken = "";
            }
            else
            {
                suffix    = buffer.ToString(position + 1, suffixEnd - (position + 1)).Trim();
                nextToken = buffer.ToString(suffixEnd + 1, nextEnd - (suffixEnd + 1)).Trim();
            }

            mBuffer.Append("x=");
            mBuffer.Append(prefix);
            mCollectFeatures.Add(mBuffer.ToString());
            mBuffer.Length = 0;
            if (prefix.Length > 0)
            {
                mCollectFeatures.Add(System.Convert.ToString(prefix.Length, System.Globalization.CultureInfo.InvariantCulture));
                if (IsFirstUpper(prefix))
                {
                    mCollectFeatures.Add("xcap");
                }
                if (mInducedAbbreviations.Contains(prefix))
                {
                    mCollectFeatures.Add("xabbrev");
                }
            }

            mBuffer.Append("v=");
            mBuffer.Append(previousToken);
            mCollectFeatures.Add(mBuffer.ToString());
            mBuffer.Length = 0;
            if (previousToken.Length > 0)
            {
                if (IsFirstUpper(previousToken))
                {
                    mCollectFeatures.Add("vcap");
                }
                if (mInducedAbbreviations.Contains(previousToken))
                {
                    mCollectFeatures.Add("vabbrev");
                }
            }

            mBuffer.Append("s=");
            mBuffer.Append(suffix);
            mCollectFeatures.Add(mBuffer.ToString());
            mBuffer.Length = 0;
            if (suffix.Length > 0)
            {
                if (IsFirstUpper(suffix))
                {
                    mCollectFeatures.Add("scap");
                }
                if (mInducedAbbreviations.Contains(suffix))
                {
                    mCollectFeatures.Add("sabbrev");
                }
            }

            mBuffer.Append("n=");
            mBuffer.Append(nextToken);
            mCollectFeatures.Add(mBuffer.ToString());
            mBuffer.Length = 0;
            if (nextToken.Length > 0)
            {
                if (IsFirstUpper(nextToken))
                {
                    mCollectFeatures.Add("ncap");
                }
                if (mInducedAbbreviations.Contains(nextToken))
                {
                    mCollectFeatures.Add("nabbrev");
                }
            }

            string[] context = mCollectFeatures.ToArray();
            mCollectFeatures.Clear();
            return(context);
        }
示例#11
0
        public virtual void SetExtents(Context[] extents)
        {
            var entities = new Util.HashList <int, Context>();
            // Extents which are not in a coreference chain.
            var singletons = new List <Context>();
            var allExtents = new List <Context>();

            //populate data structures
            foreach (Context currentExtent in extents)
            {
                if (currentExtent.Id == -1)
                {
                    singletons.Add(currentExtent);
                }
                else
                {
                    entities.Put(currentExtent.Id, currentExtent);
                }
                allExtents.Add(currentExtent);
            }

            int allExtentsIndex = 0;
            Dictionary <int, Util.Set <string> > headSets = ConstructHeadSets(entities);
            Dictionary <int, Util.Set <string> > nameSets = ConstructNameSets(entities);

            foreach (int key in entities.Keys)
            {
                Util.Set <string> entityNameSet = nameSets[key];
                if (entityNameSet.Count == 0)
                {
                    continue;
                }

                List <Context>     entityContexts = entities[key];
                Util.Set <Context> exclusionSet   = ConstructExclusionSet(key, entities, headSets, nameSets, singletons);

                //if (entityContexts.Count == 1)
                //{
                //}
                for (int firstEntityContextIndex = 0; firstEntityContextIndex < entityContexts.Count; firstEntityContextIndex++)
                {
                    Context firstEntityContext = entityContexts[firstEntityContextIndex];
                    //if (isPronoun(ec1)) {
                    //  continue;
                    //}
                    for (int secondEntityContextIndex = firstEntityContextIndex + 1; secondEntityContextIndex < entityContexts.Count; secondEntityContextIndex++)
                    {
                        Context secondEntityContext = entityContexts[secondEntityContextIndex];
                        //if (isPronoun(ec2)) {
                        //  continue;
                        //}
                        AddEvent(true, firstEntityContext, secondEntityContext);
                        int startIndex = allExtentsIndex;
                        do
                        {
                            Context compareEntityContext = allExtents[allExtentsIndex];
                            allExtentsIndex = (allExtentsIndex + 1) % allExtents.Count;
                            if (!exclusionSet.Contains(compareEntityContext))
                            {
                                if (DebugOn)
                                {
                                    System.Console.Error.WriteLine(firstEntityContext.ToString() + " " + string.Join(",", entityNameSet.ToArray()) + " " + compareEntityContext.ToString() + " " + nameSets[compareEntityContext.Id]);
                                }
                                AddEvent(false, firstEntityContext, compareEntityContext);
                                break;
                            }
                        }while (allExtentsIndex != startIndex);
                    }
                }
            }
        }
示例#12
0
        private IEnumerable <string> GetCommonCommonFeatures(Context common1, Context common2)
        {
            var features = new List <string>();

            Util.Set <string> synsets1 = common1.Synsets;
            Util.Set <string> synsets2 = common2.Synsets;

            if (synsets1.Count == 0)
            {
                //features.add("missing_"+common1.headToken);
                return(features);
            }
            if (synsets2.Count == 0)
            {
                //features.add("missing_"+common2.headToken);
                return(features);
            }
            int commonSynsetCount = 0;

            //RN commented out - this looks wrong in the java
            //bool same = false;

            //if (commonSynsetCount == 0)
            //{
            //    features.Add("ncss");
            //}
            //else if (commonSynsetCount == synsets1.Count && commonSynsetCount == synsets2.Count)
            //{
            //    same = true;
            //    features.Add("samess");
            //}
            //else if (commonSynsetCount == synsets1.Count)
            //{
            //    features.Add("2isa1");
            //    //features.add("2isa1-"+(synsets2.size() - numCommonSynsets));
            //}
            //else if (commonSynsetCount == synsets2.Count)
            //{
            //    features.Add("1isa2");
            //    //features.add("1isa2-"+(synsets1.size() - numCommonSynsets));
            //}


            //if (!same)
            //{
            foreach (string synset in synsets1)
            {
                if (synsets2.Contains(synset))
                {
                    features.Add("ss=" + synset);
                    commonSynsetCount++;
                }
            }
            //}
            //end RN commented out
            if (commonSynsetCount == 0)
            {
                features.Add("ncss");
            }
            else if (commonSynsetCount == synsets1.Count && commonSynsetCount == synsets2.Count)
            {
                features.Add("samess");
            }
            else if (commonSynsetCount == synsets1.Count)
            {
                features.Add("2isa1");
                //features.add("2isa1-"+(synsets2.size() - numCommonSynsets));
            }
            else if (commonSynsetCount == synsets2.Count)
            {
                features.Add("1isa2");
                //features.add("1isa2-"+(synsets1.size() - numCommonSynsets));
            }
            return(features);
        }
示例#13
0
    /// Input format: each two vectors represents a segment from the beginning.
    /// Considering all triangles are filled and non-triangle structure is always empty.
    /// Extract the outlines (may have holes) and return the outline.
    /// Points in each edge is arranged where their left-side is filled.
    /// Notice edges are not in order, but two points in each edge has its order.
    public static List <Vector2> ExtractEdge(this List <Vector2> src)
    {
        var adj = new Dictionary <Vector2, List <Vector2> >();

        for (int i = 0; i < src.Count; i += 2)
        {
            adj.GetOrDefault(src[i]).Add(src[i + 1]);
            adj.GetOrDefault(src[i + 1]).Add(src[i]);
        }

        // Sort the adjacent edges.
        foreach (var x in adj)
        {
            var curVert = x.Key;
            var adjList = x.Value;
            int Compare(Vector2 va, Vector2 vb)
            {
                Vector2 da = curVert.To(va);
                Vector2 db = curVert.To(vb);
                float   aa = Mathf.Atan2(da.y, da.x);
                float   ba = Mathf.Atan2(db.y, db.x);

                return(aa <ba ? -1 : aa> ba ? 1 : 0);
            }

            adjList.Sort(Compare);
        }

        // output size should not exceeded input size.
        var rest = new Util.Set <Edge>(src.Count);

        foreach (var vert in src.Distinct().ToList())
        {
            var adx = adj[vert];
            for (int i = 0; i < adx.Count; i++)
            {
                var from = adx[i];
                var to   = adx[(i + 1).ModSys(adx.Count)];

                // Exclude the edge if triangle edges are arranged clockwise.
                if (new Triangle(vert, from, to).area <= 0)
                {
                    continue;
                }

                // Edges can either appear for 1 or 2 times.
                // Because an edge can only be owned by 1 or 2 triangles.
                // Use this to extract outlines, including outlines inside.
                var edge = new Edge(from, to);

                // take up about 200ms time when src.Length == 60000.
                if (rest.Contains(edge))
                {
                    rest.Remove(edge);
                }
                else
                {
                    rest.Add(edge);
                }
            }
        }

        var res = new List <Vector2>();

        rest.Foreach((i) => { res.Add(i.a); res.Add(i.b); });
        return(res);
    }