Example #1
0
 public Context(object[] tokens, string headToken, string headTag, string neType)
     : base(null, null, 1, null, null, neType)
 {
     mTokens = tokens;
     mHeadTokenIndex = tokens.Length - 1;
     mHeadTokenText = headToken;
     mHeadTokenTag = headTag;
     mSynsets = GetSynsetSet(this);
 }
Example #2
0
 private GenderModel(string modelName, bool train)
 {
     mModelName = modelName;
     mMaleNames = ReadNames(modelName + ".mal");
     mFemaleNames = ReadNames(modelName + ".fem");
     if (train)
     {
         mEvents = new List<SharpEntropy.TrainingEvent>();
     }
     else
     {
         mTestModel = new SharpEntropy.GisModel(new SharpEntropy.IO.BinaryGisModelReader(modelName + mModelExtension));
         
         mMaleIndex = mTestModel.GetOutcomeIndex(GenderEnum.Male.ToString());
         mFemaleIndex = mTestModel.GetOutcomeIndex(GenderEnum.Female.ToString());
         mNeuterIndex = mTestModel.GetOutcomeIndex(GenderEnum.Neuter.ToString());
     }
 }
Example #3
0
		/// <summary>
		/// Adds an entry to the lookup list in memory, ready for writing to file.
		/// </summary>
		/// <param name="word">
		/// The word for which an entry should be added.
		/// </param>
		/// <param name="tag">
		/// The tag that should be marked as valid for this word.
		/// </param>
		public virtual void AddEntry(string word, string tag)
		{
            Util.Set<string> tags;
            if (mDictionary.ContainsKey(word))
            {
                tags = mDictionary[word];
            }
            else
            {
                tags = new Util.Set<string>();
                mDictionary.Add(word, tags);
            }
			tags.Add(tag);
			
			if (!(mWordCounts.ContainsKey(word)))
			{
				mWordCounts.Add(word, 1);
            }
			else
			{
				mWordCounts[word]++;
			}
		}
Example #4
0
        /// <summary>
        /// Returns string-match features for the the specified mention and entity.</summary>
        /// <param name="mention">
        /// The mention.
        /// </param>
        /// <param name="entity">
        /// The entity.
        /// </param>
        /// <returns>
        /// list of string-match features for the the specified mention and entity.
        /// </returns>
        protected internal virtual List <string> GetStringMatchFeatures(Mention.MentionContext mention, DiscourseEntity entity)
        {
            bool sameHead            = false;
            bool modifersMatch       = false;
            bool titleMatch          = false;
            bool noTheModifiersMatch = false;
            var  features            = new List <string>();

            Mention.IParse[] mentionTokens  = mention.TokenParses;
            var    entityContextModifierSet = ConstructModifierSet(mentionTokens, mention.HeadTokenIndex);
            string mentionHeadString        = mention.HeadTokenText.ToLower();

            Util.Set <string> featureSet = new Util.HashSet <string>();

            foreach (Mention.MentionContext entityMention in entity.Mentions)
            {
                string exactMatchFeature = GetExactMatchFeature(entityMention, mention);
                if (exactMatchFeature != null)
                {
                    featureSet.Add(exactMatchFeature);
                }
                else if (entityMention.Parse.IsCoordinatedNounPhrase && !mention.Parse.IsCoordinatedNounPhrase)
                {
                    featureSet.Add("cmix");
                }
                else
                {
                    string mentionStrip       = StripNounPhrase(mention);
                    string entityMentionStrip = StripNounPhrase(entityMention);
                    if (mentionStrip != null && entityMentionStrip != null)
                    {
                        if (IsSubstring(mentionStrip, entityMentionStrip))
                        {
                            featureSet.Add("substring");
                        }
                    }
                }
                Mention.IParse[] entityMentionTokens = entityMention.TokenParses;
                int headIndex = entityMention.HeadTokenIndex;
                //if (!mention.getHeadTokenTag().equals(entityMention.getHeadTokenTag())) {
                //  continue;
                //}  want to match NN NNP
                string entityMentionHeadString = entityMention.HeadTokenText.ToLower();
                // model lexical similarity
                if (mentionHeadString == entityMentionHeadString)
                {
                    sameHead = true;
                    featureSet.Add("hds=" + mentionHeadString);
                    if (!modifersMatch || !noTheModifiersMatch)
                    {
                        //only check if we haven't already found one which is the same
                        modifersMatch       = true;
                        noTheModifiersMatch = true;
                        Util.Set <string> entityMentionModifierSet = ConstructModifierSet(entityMentionTokens, headIndex);
                        foreach (string modifierWord in entityContextModifierSet)
                        {
                            if (!entityMentionModifierSet.Contains(modifierWord))
                            {
                                modifersMatch = false;
                                if (modifierWord != "the")
                                {
                                    noTheModifiersMatch = false;
                                    featureSet.Add("mmw=" + modifierWord);
                                }
                            }
                        }
                    }
                }
                Util.Set <string> descriptorModifierSet = ConstructModifierSet(entityMentionTokens, entityMention.NonDescriptorStart);
                if (descriptorModifierSet.Contains(mentionHeadString))
                {
                    titleMatch = true;
                }
            }
            if (featureSet.Count != 0)
            {
                features.AddRange(featureSet);
            }
            if (sameHead)
            {
                features.Add("sameHead");
                if (modifersMatch)
                {
                    features.Add("modsMatch");
                }
                else if (noTheModifiersMatch)
                {
                    features.Add("nonTheModsMatch");
                }
                else
                {
                    features.Add("modsMisMatch");
                }
            }
            if (titleMatch)
            {
                features.Add("titleMatch");
            }
            return(features);
        }
Example #5
0
        public virtual void SetExtents(Context[] extents)
        {
            var entities = new Util.HashList <int, Context>();
            // Extents which are not in a coreference chain.
            var singletons = new List <Context>();
            var allExtents = new List <Context>();

            //populate data structures
            foreach (Context currentExtent in extents)
            {
                if (currentExtent.Id == -1)
                {
                    singletons.Add(currentExtent);
                }
                else
                {
                    entities.Put(currentExtent.Id, currentExtent);
                }
                allExtents.Add(currentExtent);
            }

            int allExtentsIndex = 0;
            Dictionary <int, Util.Set <string> > headSets = ConstructHeadSets(entities);
            Dictionary <int, Util.Set <string> > nameSets = ConstructNameSets(entities);

            foreach (int key in entities.Keys)
            {
                Util.Set <string> entityNameSet = nameSets[key];
                if (entityNameSet.Count == 0)
                {
                    continue;
                }

                List <Context>     entityContexts = entities[key];
                Util.Set <Context> exclusionSet   = ConstructExclusionSet(key, entities, headSets, nameSets, singletons);

                //if (entityContexts.Count == 1)
                //{
                //}
                for (int firstEntityContextIndex = 0; firstEntityContextIndex < entityContexts.Count; firstEntityContextIndex++)
                {
                    Context firstEntityContext = entityContexts[firstEntityContextIndex];
                    //if (isPronoun(ec1)) {
                    //  continue;
                    //}
                    for (int secondEntityContextIndex = firstEntityContextIndex + 1; secondEntityContextIndex < entityContexts.Count; secondEntityContextIndex++)
                    {
                        Context secondEntityContext = entityContexts[secondEntityContextIndex];
                        //if (isPronoun(ec2)) {
                        //  continue;
                        //}
                        AddEvent(true, firstEntityContext, secondEntityContext);
                        int startIndex = allExtentsIndex;
                        do
                        {
                            Context compareEntityContext = allExtents[allExtentsIndex];
                            allExtentsIndex = (allExtentsIndex + 1) % allExtents.Count;
                            if (!exclusionSet.Contains(compareEntityContext))
                            {
                                if (DebugOn)
                                {
                                    System.Console.Error.WriteLine(firstEntityContext.ToString() + " " + string.Join(",", entityNameSet.ToArray()) + " " + compareEntityContext.ToString() + " " + nameSets[compareEntityContext.Id]);
                                }
                                AddEvent(false, firstEntityContext, compareEntityContext);
                                break;
                            }
                        }while (allExtentsIndex != startIndex);
                    }
                }
            }
        }
Example #6
0
        private IEnumerable <string> GetCommonCommonFeatures(Context common1, Context common2)
        {
            var features = new List <string>();

            Util.Set <string> synsets1 = common1.Synsets;
            Util.Set <string> synsets2 = common2.Synsets;

            if (synsets1.Count == 0)
            {
                //features.add("missing_"+common1.headToken);
                return(features);
            }
            if (synsets2.Count == 0)
            {
                //features.add("missing_"+common2.headToken);
                return(features);
            }
            int commonSynsetCount = 0;

            //RN commented out - this looks wrong in the java
            //bool same = false;

            //if (commonSynsetCount == 0)
            //{
            //    features.Add("ncss");
            //}
            //else if (commonSynsetCount == synsets1.Count && commonSynsetCount == synsets2.Count)
            //{
            //    same = true;
            //    features.Add("samess");
            //}
            //else if (commonSynsetCount == synsets1.Count)
            //{
            //    features.Add("2isa1");
            //    //features.add("2isa1-"+(synsets2.size() - numCommonSynsets));
            //}
            //else if (commonSynsetCount == synsets2.Count)
            //{
            //    features.Add("1isa2");
            //    //features.add("1isa2-"+(synsets1.size() - numCommonSynsets));
            //}


            //if (!same)
            //{
            foreach (string synset in synsets1)
            {
                if (synsets2.Contains(synset))
                {
                    features.Add("ss=" + synset);
                    commonSynsetCount++;
                }
            }
            //}
            //end RN commented out
            if (commonSynsetCount == 0)
            {
                features.Add("ncss");
            }
            else if (commonSynsetCount == synsets1.Count && commonSynsetCount == synsets2.Count)
            {
                features.Add("samess");
            }
            else if (commonSynsetCount == synsets1.Count)
            {
                features.Add("2isa1");
                //features.add("2isa1-"+(synsets2.size() - numCommonSynsets));
            }
            else if (commonSynsetCount == synsets2.Count)
            {
                features.Add("1isa2");
                //features.add("1isa2-"+(synsets1.size() - numCommonSynsets));
            }
            return(features);
        }
Example #7
0
        /// <summary>
        /// Constructs a set of entities which may be semantically compatible with the entity indicated by
        /// the specified entityKey.
        /// </summary>
        /// <param name="entityKey">
        /// The key of the entity for which the set is being constructed.
        /// </param>
        /// <param name="entities">
        /// A mapping between entity keys and their mentions.
        /// </param>
        /// <param name="headSets">
        /// A mapping between entity keys and their head sets.
        /// </param>
        /// <param name="nameSets">
        /// A mapping between entity keys and their name sets.
        /// </param>
        /// <param name="singletons">
        /// A list of all entities which consists of a single mention.
        /// </param>
        /// <returns>
        /// A set of mentions for all the entities which might be semantically compatible
        /// with entity indicated by the specified key.
        /// </returns>
        private Util.Set <Context> ConstructExclusionSet(int entityKey, Util.HashList <int, Context> entities,
                                                         Dictionary <int, Util.Set <string> > headSets, Dictionary <int, Util.Set <string> > nameSets, IEnumerable <Context> singletons)
        {
            Util.Set <Context> exclusionSet   = new Util.HashSet <Context>();
            Util.Set <string>  entityHeadSet  = headSets[entityKey];
            Util.Set <string>  entityNameSet  = nameSets[entityKey];
            List <Context>     entityContexts = entities[entityKey];

            //entities
            foreach (int key in entities.Keys)
            {
                List <Context> candidateContexts = entities[key];

                if (key == entityKey)
                {
                    exclusionSet.AddAll(candidateContexts);
                }
                else if (nameSets[key].Count == 0)
                {
                    exclusionSet.AddAll(candidateContexts);
                }
                else if (HasSameHead(entityHeadSet, headSets[key]))
                {
                    exclusionSet.AddAll(candidateContexts);
                }
                else if (HasSameNameType(entityNameSet, nameSets[key]))
                {
                    exclusionSet.AddAll(candidateContexts);
                }
                else if (HasSuperClass(entityContexts, candidateContexts))
                {
                    exclusionSet.AddAll(candidateContexts);
                }
            }

            //singles
            var singles = new List <Context>(1);

            foreach (Context currentSingleton in singletons)
            {
                singles.Clear();
                singles.Add(currentSingleton);
                if (entityHeadSet.Contains(currentSingleton.HeadTokenText.ToLower()))
                {
                    exclusionSet.Add(currentSingleton);
                }
                else if (currentSingleton.NameType == null)
                {
                    exclusionSet.Add(currentSingleton);
                }
                else if (entityNameSet.Contains(currentSingleton.NameType))
                {
                    exclusionSet.Add(currentSingleton);
                }
                else if (HasSuperClass(entityContexts, singles))
                {
                    exclusionSet.Add(currentSingleton);
                }
            }
            return(exclusionSet);
        }
Example #8
0
 private bool HasSameNameType(IEnumerable <string> entityNameSet, Util.Set <string> candidateNameSet)
 {
     return(entityNameSet.Any(candidateNameSet.Contains));
 }
Example #9
0
 private bool HasSameHead(IEnumerable <string> entityHeadSet, Util.Set <string> candidateHeadSet)
 {
     return(entityHeadSet.Any(candidateHeadSet.Contains));
 }
Example #10
0
 /// <summary>
 /// Creates a new <code>SentenceDetectionContextGenerator</code> instance which uses
 /// the set of induced abbreviations.
 /// </summary>
 /// <param name="inducedAbbreviations">
 /// a <code>Set</code> of strings
 /// representing induced abbreviations in the training data.
 /// Example: Mr.
 /// </param>
 /// <param name="endOfSentenceCharacters">
 /// Character array of end of sentence characters.
 /// </param>
 public SentenceDetectionContextGenerator(Util.Set <string> inducedAbbreviations, char[] endOfSentenceCharacters)
 {
     _inducedAbbreviations         = inducedAbbreviations;
     this._endOfSentenceCharacters = endOfSentenceCharacters;
 }
Example #11
0
    /// Input format: each two vectors represents a segment from the beginning.
    /// Considering all triangles are filled and non-triangle structure is always empty.
    /// Extract the outlines (may have holes) and return the outline.
    /// Points in each edge is arranged where their left-side is filled.
    /// Notice edges are not in order, but two points in each edge has its order.
    public static List <Vector2> ExtractEdge(this List <Vector2> src)
    {
        var adj = new Dictionary <Vector2, List <Vector2> >();

        for (int i = 0; i < src.Count; i += 2)
        {
            adj.GetOrDefault(src[i]).Add(src[i + 1]);
            adj.GetOrDefault(src[i + 1]).Add(src[i]);
        }

        // Sort the adjacent edges.
        foreach (var x in adj)
        {
            var curVert = x.Key;
            var adjList = x.Value;
            int Compare(Vector2 va, Vector2 vb)
            {
                Vector2 da = curVert.To(va);
                Vector2 db = curVert.To(vb);
                float   aa = Mathf.Atan2(da.y, da.x);
                float   ba = Mathf.Atan2(db.y, db.x);

                return(aa <ba ? -1 : aa> ba ? 1 : 0);
            }

            adjList.Sort(Compare);
        }

        // output size should not exceeded input size.
        var rest = new Util.Set <Edge>(src.Count);

        foreach (var vert in src.Distinct().ToList())
        {
            var adx = adj[vert];
            for (int i = 0; i < adx.Count; i++)
            {
                var from = adx[i];
                var to   = adx[(i + 1).ModSys(adx.Count)];

                // Exclude the edge if triangle edges are arranged clockwise.
                if (new Triangle(vert, from, to).area <= 0)
                {
                    continue;
                }

                // Edges can either appear for 1 or 2 times.
                // Because an edge can only be owned by 1 or 2 triangles.
                // Use this to extract outlines, including outlines inside.
                var edge = new Edge(from, to);

                // take up about 200ms time when src.Length == 60000.
                if (rest.Contains(edge))
                {
                    rest.Remove(edge);
                }
                else
                {
                    rest.Add(edge);
                }
            }
        }

        var res = new List <Vector2>();

        rest.Foreach((i) => { res.Add(i.a); res.Add(i.b); });
        return(res);
    }
Example #12
0
        private static bool IsHeadOfExistingMention(IParse nounPhrase, Dictionary <IParse, IParse> headMap, Util.Set <IParse> mentions)
        {
            IParse head = nounPhrase;

            while (headMap.ContainsKey(head))
            {
                head = headMap[head];
                if (mentions.Contains(head))
                {
                    return(true);
                }
            }
            return(false);
        }
Example #13
0
 private void Initialize(Mention.IHeadFinder headFinder)
 {
     Mention.IParse head = headFinder.GetLastHead(Parse);
     List<Mention.IParse> tokenList = head.Tokens;
     mHeadTokenIndex = headFinder.GetHeadIndex(head);
     Mention.IParse headToken = headFinder.GetHeadToken(head);
     mTokens = tokenList.ToArray();
     mHeadTokenTag = headToken.SyntacticType;
     mHeadTokenText = headToken.ToString();
     if (mHeadTokenTag.StartsWith("NN") && !mHeadTokenTag.StartsWith("NNP"))
     {
         mSynsets = GetSynsetSet(this);
     }
     else
     {
         mSynsets = new Util.HashSet<string>();
     }
 }