public Context(object[] tokens, string headToken, string headTag, string neType) : base(null, null, 1, null, null, neType) { mTokens = tokens; mHeadTokenIndex = tokens.Length - 1; mHeadTokenText = headToken; mHeadTokenTag = headTag; mSynsets = GetSynsetSet(this); }
private GenderModel(string modelName, bool train) { mModelName = modelName; mMaleNames = ReadNames(modelName + ".mal"); mFemaleNames = ReadNames(modelName + ".fem"); if (train) { mEvents = new List<SharpEntropy.TrainingEvent>(); } else { mTestModel = new SharpEntropy.GisModel(new SharpEntropy.IO.BinaryGisModelReader(modelName + mModelExtension)); mMaleIndex = mTestModel.GetOutcomeIndex(GenderEnum.Male.ToString()); mFemaleIndex = mTestModel.GetOutcomeIndex(GenderEnum.Female.ToString()); mNeuterIndex = mTestModel.GetOutcomeIndex(GenderEnum.Neuter.ToString()); } }
/// <summary> /// Adds an entry to the lookup list in memory, ready for writing to file. /// </summary> /// <param name="word"> /// The word for which an entry should be added. /// </param> /// <param name="tag"> /// The tag that should be marked as valid for this word. /// </param> public virtual void AddEntry(string word, string tag) { Util.Set<string> tags; if (mDictionary.ContainsKey(word)) { tags = mDictionary[word]; } else { tags = new Util.Set<string>(); mDictionary.Add(word, tags); } tags.Add(tag); if (!(mWordCounts.ContainsKey(word))) { mWordCounts.Add(word, 1); } else { mWordCounts[word]++; } }
/// <summary> /// Returns string-match features for the the specified mention and entity.</summary> /// <param name="mention"> /// The mention. /// </param> /// <param name="entity"> /// The entity. /// </param> /// <returns> /// list of string-match features for the the specified mention and entity. /// </returns> protected internal virtual List <string> GetStringMatchFeatures(Mention.MentionContext mention, DiscourseEntity entity) { bool sameHead = false; bool modifersMatch = false; bool titleMatch = false; bool noTheModifiersMatch = false; var features = new List <string>(); Mention.IParse[] mentionTokens = mention.TokenParses; var entityContextModifierSet = ConstructModifierSet(mentionTokens, mention.HeadTokenIndex); string mentionHeadString = mention.HeadTokenText.ToLower(); Util.Set <string> featureSet = new Util.HashSet <string>(); foreach (Mention.MentionContext entityMention in entity.Mentions) { string exactMatchFeature = GetExactMatchFeature(entityMention, mention); if (exactMatchFeature != null) { featureSet.Add(exactMatchFeature); } else if (entityMention.Parse.IsCoordinatedNounPhrase && !mention.Parse.IsCoordinatedNounPhrase) { featureSet.Add("cmix"); } else { string mentionStrip = StripNounPhrase(mention); string entityMentionStrip = StripNounPhrase(entityMention); if (mentionStrip != null && entityMentionStrip != null) { if (IsSubstring(mentionStrip, entityMentionStrip)) { featureSet.Add("substring"); } } } Mention.IParse[] entityMentionTokens = entityMention.TokenParses; int headIndex = entityMention.HeadTokenIndex; //if (!mention.getHeadTokenTag().equals(entityMention.getHeadTokenTag())) { // continue; //} want to match NN NNP string entityMentionHeadString = entityMention.HeadTokenText.ToLower(); // model lexical similarity if (mentionHeadString == entityMentionHeadString) { sameHead = true; featureSet.Add("hds=" + mentionHeadString); if (!modifersMatch || !noTheModifiersMatch) { //only check if we haven't already found one which is the same modifersMatch = true; noTheModifiersMatch = true; Util.Set <string> entityMentionModifierSet = ConstructModifierSet(entityMentionTokens, headIndex); foreach (string modifierWord in entityContextModifierSet) { if (!entityMentionModifierSet.Contains(modifierWord)) { modifersMatch = false; if (modifierWord != "the") { noTheModifiersMatch = false; featureSet.Add("mmw=" + modifierWord); } } } } } Util.Set <string> descriptorModifierSet = ConstructModifierSet(entityMentionTokens, entityMention.NonDescriptorStart); if (descriptorModifierSet.Contains(mentionHeadString)) { titleMatch = true; } } if (featureSet.Count != 0) { features.AddRange(featureSet); } if (sameHead) { features.Add("sameHead"); if (modifersMatch) { features.Add("modsMatch"); } else if (noTheModifiersMatch) { features.Add("nonTheModsMatch"); } else { features.Add("modsMisMatch"); } } if (titleMatch) { features.Add("titleMatch"); } return(features); }
public virtual void SetExtents(Context[] extents) { var entities = new Util.HashList <int, Context>(); // Extents which are not in a coreference chain. var singletons = new List <Context>(); var allExtents = new List <Context>(); //populate data structures foreach (Context currentExtent in extents) { if (currentExtent.Id == -1) { singletons.Add(currentExtent); } else { entities.Put(currentExtent.Id, currentExtent); } allExtents.Add(currentExtent); } int allExtentsIndex = 0; Dictionary <int, Util.Set <string> > headSets = ConstructHeadSets(entities); Dictionary <int, Util.Set <string> > nameSets = ConstructNameSets(entities); foreach (int key in entities.Keys) { Util.Set <string> entityNameSet = nameSets[key]; if (entityNameSet.Count == 0) { continue; } List <Context> entityContexts = entities[key]; Util.Set <Context> exclusionSet = ConstructExclusionSet(key, entities, headSets, nameSets, singletons); //if (entityContexts.Count == 1) //{ //} for (int firstEntityContextIndex = 0; firstEntityContextIndex < entityContexts.Count; firstEntityContextIndex++) { Context firstEntityContext = entityContexts[firstEntityContextIndex]; //if (isPronoun(ec1)) { // continue; //} for (int secondEntityContextIndex = firstEntityContextIndex + 1; secondEntityContextIndex < entityContexts.Count; secondEntityContextIndex++) { Context secondEntityContext = entityContexts[secondEntityContextIndex]; //if (isPronoun(ec2)) { // continue; //} AddEvent(true, firstEntityContext, secondEntityContext); int startIndex = allExtentsIndex; do { Context compareEntityContext = allExtents[allExtentsIndex]; allExtentsIndex = (allExtentsIndex + 1) % allExtents.Count; if (!exclusionSet.Contains(compareEntityContext)) { if (DebugOn) { System.Console.Error.WriteLine(firstEntityContext.ToString() + " " + string.Join(",", entityNameSet.ToArray()) + " " + compareEntityContext.ToString() + " " + nameSets[compareEntityContext.Id]); } AddEvent(false, firstEntityContext, compareEntityContext); break; } }while (allExtentsIndex != startIndex); } } } }
private IEnumerable <string> GetCommonCommonFeatures(Context common1, Context common2) { var features = new List <string>(); Util.Set <string> synsets1 = common1.Synsets; Util.Set <string> synsets2 = common2.Synsets; if (synsets1.Count == 0) { //features.add("missing_"+common1.headToken); return(features); } if (synsets2.Count == 0) { //features.add("missing_"+common2.headToken); return(features); } int commonSynsetCount = 0; //RN commented out - this looks wrong in the java //bool same = false; //if (commonSynsetCount == 0) //{ // features.Add("ncss"); //} //else if (commonSynsetCount == synsets1.Count && commonSynsetCount == synsets2.Count) //{ // same = true; // features.Add("samess"); //} //else if (commonSynsetCount == synsets1.Count) //{ // features.Add("2isa1"); // //features.add("2isa1-"+(synsets2.size() - numCommonSynsets)); //} //else if (commonSynsetCount == synsets2.Count) //{ // features.Add("1isa2"); // //features.add("1isa2-"+(synsets1.size() - numCommonSynsets)); //} //if (!same) //{ foreach (string synset in synsets1) { if (synsets2.Contains(synset)) { features.Add("ss=" + synset); commonSynsetCount++; } } //} //end RN commented out if (commonSynsetCount == 0) { features.Add("ncss"); } else if (commonSynsetCount == synsets1.Count && commonSynsetCount == synsets2.Count) { features.Add("samess"); } else if (commonSynsetCount == synsets1.Count) { features.Add("2isa1"); //features.add("2isa1-"+(synsets2.size() - numCommonSynsets)); } else if (commonSynsetCount == synsets2.Count) { features.Add("1isa2"); //features.add("1isa2-"+(synsets1.size() - numCommonSynsets)); } return(features); }
/// <summary> /// Constructs a set of entities which may be semantically compatible with the entity indicated by /// the specified entityKey. /// </summary> /// <param name="entityKey"> /// The key of the entity for which the set is being constructed. /// </param> /// <param name="entities"> /// A mapping between entity keys and their mentions. /// </param> /// <param name="headSets"> /// A mapping between entity keys and their head sets. /// </param> /// <param name="nameSets"> /// A mapping between entity keys and their name sets. /// </param> /// <param name="singletons"> /// A list of all entities which consists of a single mention. /// </param> /// <returns> /// A set of mentions for all the entities which might be semantically compatible /// with entity indicated by the specified key. /// </returns> private Util.Set <Context> ConstructExclusionSet(int entityKey, Util.HashList <int, Context> entities, Dictionary <int, Util.Set <string> > headSets, Dictionary <int, Util.Set <string> > nameSets, IEnumerable <Context> singletons) { Util.Set <Context> exclusionSet = new Util.HashSet <Context>(); Util.Set <string> entityHeadSet = headSets[entityKey]; Util.Set <string> entityNameSet = nameSets[entityKey]; List <Context> entityContexts = entities[entityKey]; //entities foreach (int key in entities.Keys) { List <Context> candidateContexts = entities[key]; if (key == entityKey) { exclusionSet.AddAll(candidateContexts); } else if (nameSets[key].Count == 0) { exclusionSet.AddAll(candidateContexts); } else if (HasSameHead(entityHeadSet, headSets[key])) { exclusionSet.AddAll(candidateContexts); } else if (HasSameNameType(entityNameSet, nameSets[key])) { exclusionSet.AddAll(candidateContexts); } else if (HasSuperClass(entityContexts, candidateContexts)) { exclusionSet.AddAll(candidateContexts); } } //singles var singles = new List <Context>(1); foreach (Context currentSingleton in singletons) { singles.Clear(); singles.Add(currentSingleton); if (entityHeadSet.Contains(currentSingleton.HeadTokenText.ToLower())) { exclusionSet.Add(currentSingleton); } else if (currentSingleton.NameType == null) { exclusionSet.Add(currentSingleton); } else if (entityNameSet.Contains(currentSingleton.NameType)) { exclusionSet.Add(currentSingleton); } else if (HasSuperClass(entityContexts, singles)) { exclusionSet.Add(currentSingleton); } } return(exclusionSet); }
private bool HasSameNameType(IEnumerable <string> entityNameSet, Util.Set <string> candidateNameSet) { return(entityNameSet.Any(candidateNameSet.Contains)); }
private bool HasSameHead(IEnumerable <string> entityHeadSet, Util.Set <string> candidateHeadSet) { return(entityHeadSet.Any(candidateHeadSet.Contains)); }
/// <summary> /// Creates a new <code>SentenceDetectionContextGenerator</code> instance which uses /// the set of induced abbreviations. /// </summary> /// <param name="inducedAbbreviations"> /// a <code>Set</code> of strings /// representing induced abbreviations in the training data. /// Example: Mr. /// </param> /// <param name="endOfSentenceCharacters"> /// Character array of end of sentence characters. /// </param> public SentenceDetectionContextGenerator(Util.Set <string> inducedAbbreviations, char[] endOfSentenceCharacters) { _inducedAbbreviations = inducedAbbreviations; this._endOfSentenceCharacters = endOfSentenceCharacters; }
/// Input format: each two vectors represents a segment from the beginning. /// Considering all triangles are filled and non-triangle structure is always empty. /// Extract the outlines (may have holes) and return the outline. /// Points in each edge is arranged where their left-side is filled. /// Notice edges are not in order, but two points in each edge has its order. public static List <Vector2> ExtractEdge(this List <Vector2> src) { var adj = new Dictionary <Vector2, List <Vector2> >(); for (int i = 0; i < src.Count; i += 2) { adj.GetOrDefault(src[i]).Add(src[i + 1]); adj.GetOrDefault(src[i + 1]).Add(src[i]); } // Sort the adjacent edges. foreach (var x in adj) { var curVert = x.Key; var adjList = x.Value; int Compare(Vector2 va, Vector2 vb) { Vector2 da = curVert.To(va); Vector2 db = curVert.To(vb); float aa = Mathf.Atan2(da.y, da.x); float ba = Mathf.Atan2(db.y, db.x); return(aa <ba ? -1 : aa> ba ? 1 : 0); } adjList.Sort(Compare); } // output size should not exceeded input size. var rest = new Util.Set <Edge>(src.Count); foreach (var vert in src.Distinct().ToList()) { var adx = adj[vert]; for (int i = 0; i < adx.Count; i++) { var from = adx[i]; var to = adx[(i + 1).ModSys(adx.Count)]; // Exclude the edge if triangle edges are arranged clockwise. if (new Triangle(vert, from, to).area <= 0) { continue; } // Edges can either appear for 1 or 2 times. // Because an edge can only be owned by 1 or 2 triangles. // Use this to extract outlines, including outlines inside. var edge = new Edge(from, to); // take up about 200ms time when src.Length == 60000. if (rest.Contains(edge)) { rest.Remove(edge); } else { rest.Add(edge); } } } var res = new List <Vector2>(); rest.Foreach((i) => { res.Add(i.a); res.Add(i.b); }); return(res); }
private static bool IsHeadOfExistingMention(IParse nounPhrase, Dictionary <IParse, IParse> headMap, Util.Set <IParse> mentions) { IParse head = nounPhrase; while (headMap.ContainsKey(head)) { head = headMap[head]; if (mentions.Contains(head)) { return(true); } } return(false); }
private void Initialize(Mention.IHeadFinder headFinder) { Mention.IParse head = headFinder.GetLastHead(Parse); List<Mention.IParse> tokenList = head.Tokens; mHeadTokenIndex = headFinder.GetHeadIndex(head); Mention.IParse headToken = headFinder.GetHeadToken(head); mTokens = tokenList.ToArray(); mHeadTokenTag = headToken.SyntacticType; mHeadTokenText = headToken.ToString(); if (mHeadTokenTag.StartsWith("NN") && !mHeadTokenTag.StartsWith("NNP")) { mSynsets = GetSynsetSet(this); } else { mSynsets = new Util.HashSet<string>(); } }