private List <string> GetFeatures(Context nounPhrase) { var features = new List <string>(); features.Add("default"); for (int tokenIndex = 0; tokenIndex < nounPhrase.HeadTokenIndex; tokenIndex++) { features.Add("mw=" + nounPhrase.Tokens[tokenIndex].ToString()); } features.Add("hw=" + nounPhrase.HeadTokenText); features.Add("n=" + nounPhrase.NameType); if (nounPhrase.NameType != null && nounPhrase.NameType == "person") { object[] tokens = nounPhrase.Tokens; for (int tokenIndex = 0; tokenIndex < nounPhrase.HeadTokenIndex || tokenIndex == 0; tokenIndex++) { string name = tokens[tokenIndex].ToString().ToLower(); if (mFemaleNames.Contains(name)) { features.Add("fem"); } if (mMaleNames.Contains(name)) { features.Add("mas"); } } } foreach (string synset in nounPhrase.Synsets) { features.Add("ss=" + synset); } return(features); }
/// <summary> /// Constructs a set of entities which may be semantically compatible with the entity indicated by /// the specified entityKey. /// </summary> /// <param name="entityKey"> /// The key of the entity for which the set is being constructed. /// </param> /// <param name="entities"> /// A mapping between entity keys and their mentions. /// </param> /// <param name="headSets"> /// A mapping between entity keys and their head sets. /// </param> /// <param name="nameSets"> /// A mapping between entity keys and their name sets. /// </param> /// <param name="singletons"> /// A list of all entities which consists of a single mention. /// </param> /// <returns> /// A set of mentions for all the entities which might be semantically compatible /// with entity indicated by the specified key. /// </returns> private Util.Set <Context> ConstructExclusionSet(int entityKey, Util.HashList <int, Context> entities, Dictionary <int, Util.Set <string> > headSets, Dictionary <int, Util.Set <string> > nameSets, List <Context> singletons) { Util.Set <Context> exclusionSet = new Util.HashSet <Context>(); Util.Set <string> entityHeadSet = headSets[entityKey]; Util.Set <string> entityNameSet = nameSets[entityKey]; List <Context> entityContexts = entities[entityKey]; //entities foreach (int key in entities.Keys) { List <Context> candidateContexts = entities[key]; if (key == entityKey) { exclusionSet.AddAll(candidateContexts); } else if (nameSets[key].Count == 0) { exclusionSet.AddAll(candidateContexts); } else if (HasSameHead(entityHeadSet, headSets[key])) { exclusionSet.AddAll(candidateContexts); } else if (HasSameNameType(entityNameSet, nameSets[key])) { exclusionSet.AddAll(candidateContexts); } else if (HasSuperClass(entityContexts, candidateContexts)) { exclusionSet.AddAll(candidateContexts); } } //singles List <Context> singles = new List <Context>(1); foreach (Context currentSingleton in singletons) { singles.Clear(); singles.Add(currentSingleton); if (entityHeadSet.Contains(currentSingleton.HeadTokenText.ToLower())) { exclusionSet.Add(currentSingleton); } else if (currentSingleton.NameType == null) { exclusionSet.Add(currentSingleton); } else if (entityNameSet.Contains(currentSingleton.NameType)) { exclusionSet.Add(currentSingleton); } else if (HasSuperClass(entityContexts, singles)) { exclusionSet.Add(currentSingleton); } } return(exclusionSet); }
private bool isAcronym(string ecStrip, string xecStrip) { Util.Set <string> exSet = (Util.Set <string>)acroMap[ecStrip]; if (exSet != null && exSet.Contains(xecStrip)) { return(true); } return(false); }
private bool HasSameNameType(Util.Set <string> entityNameSet, Util.Set <string> candidateNameSet) { foreach (string currentName in entityNameSet) { if (candidateNameSet.Contains(currentName)) { return(true); } } return(false); }
private bool HasSameHead(Util.Set <string> entityHeadSet, Util.Set <string> candidateHeadSet) { foreach (string currentHead in entityHeadSet) { if (candidateHeadSet.Contains(currentHead)) { return(true); } } return(false); }
private static bool IsHeadOfExistingMention(IParse nounPhrase, Dictionary <IParse, IParse> headMap, Util.Set <IParse> mentions) { IParse head = nounPhrase; while (headMap.ContainsKey(head)) { head = headMap[head]; if (mentions.Contains(head)) { return(true); } } return(false); }
public int GetHeadIndex(IParse parse) { List <IParse> syntacticChildren = parse.SyntacticChildren; bool countTokens = false; int tokenCount = 0; //check for NP -> NN S type structures and return last token before S as head. for (int currentSyntacticChild = 0; currentSyntacticChild < syntacticChildren.Count; currentSyntacticChild++) { IParse syntacticChild = syntacticChildren[currentSyntacticChild]; //System.err.println("PTBHeadFinder.getHeadIndex "+p+" "+p.getSyntacticType()+" sChild "+sci+" type = "+sc.getSyntacticType()); if (syntacticChild.SyntacticType.StartsWith("S")) { if (currentSyntacticChild != 0) { countTokens = true; } else { //System.err.println("PTBHeadFinder.getHeadIndex(): NP -> S production assuming right-most head"); } } if (countTokens) { tokenCount += syntacticChild.Tokens.Count; } } List <IParse> tokens = parse.Tokens; if (tokens.Count == 0) { System.Console.Error.WriteLine("PTBHeadFinder.getHeadIndex(): empty tok list for parse " + parse); } for (int currentToken = tokens.Count - tokenCount - 1; currentToken >= 0; currentToken--) { IParse token = tokens[currentToken]; if (!mSkipSet.Contains(token.SyntacticType)) { return(currentToken); } } //System.err.println("PTBHeadFinder.getHeadIndex: "+p+" hi="+toks.size()+"-"+tokenCount+" -1 = "+(toks.size()-tokenCount -1)); return(tokens.Count - tokenCount - 1); }
public int GetHeadIndex(IParse parse) { var syntacticChildren = parse.SyntacticChildren; var countTokens = false; var tokenCount = 0; //check for NP -> NN S type structures and return last token before S as head. for (var currentSyntacticChild = 0; currentSyntacticChild < syntacticChildren.Count; currentSyntacticChild++) { var syntacticChild = syntacticChildren[currentSyntacticChild]; if (syntacticChild.SyntacticType.StartsWith("S")) { if (currentSyntacticChild != 0) { countTokens = true; } } if (countTokens) { tokenCount += syntacticChild.Tokens.Count; } } var tokens = parse.Tokens; if (tokens.Count == 0) { Console.Error.WriteLine("PTBHeadFinder.getHeadIndex(): empty tok list for parse " + parse); } for (var currentToken = tokens.Count - tokenCount - 1; currentToken >= 0; currentToken--) { var token = tokens[currentToken]; if (!mSkipSet.Contains(token.SyntacticType)) { return(currentToken); } } return(tokens.Count - tokenCount - 1); }
/// <summary> /// Returns string-match features for the the specified mention and entity.</summary> /// <param name="mention"> /// The mention. /// </param> /// <param name="entity"> /// The entity. /// </param> /// <returns> /// list of string-match features for the the specified mention and entity. /// </returns> protected internal virtual List <string> GetStringMatchFeatures(Mention.MentionContext mention, DiscourseEntity entity) { bool sameHead = false; bool modifersMatch = false; bool titleMatch = false; bool noTheModifiersMatch = false; List <string> features = new List <string>(); Mention.IParse[] mentionTokens = mention.TokenParses; OpenNLP.Tools.Util.Set <string> entityContextModifierSet = ConstructModifierSet(mentionTokens, mention.HeadTokenIndex); string mentionHeadString = mention.HeadTokenText.ToLower(); Util.Set <string> featureSet = new Util.HashSet <string>(); foreach (Mention.MentionContext entityMention in entity.Mentions) { string exactMatchFeature = GetExactMatchFeature(entityMention, mention); if (exactMatchFeature != null) { featureSet.Add(exactMatchFeature); } else if (entityMention.Parse.IsCoordinatedNounPhrase && !mention.Parse.IsCoordinatedNounPhrase) { featureSet.Add("cmix"); } else { string mentionStrip = StripNounPhrase(mention); string entityMentionStrip = StripNounPhrase(entityMention); if (mentionStrip != null && entityMentionStrip != null) { if (IsSubstring(mentionStrip, entityMentionStrip)) { featureSet.Add("substring"); } } } Mention.IParse[] entityMentionTokens = entityMention.TokenParses; int headIndex = entityMention.HeadTokenIndex; //if (!mention.getHeadTokenTag().equals(entityMention.getHeadTokenTag())) { // //System.err.println("skipping "+mention.headTokenText+" with "+xec.headTokenText+" because "+mention.headTokenTag+" != "+xec.headTokenTag); // continue; //} want to match NN NNP string entityMentionHeadString = entityMention.HeadTokenText.ToLower(); // model lexical similarity if (mentionHeadString == entityMentionHeadString) { sameHead = true; featureSet.Add("hds=" + mentionHeadString); if (!modifersMatch || !noTheModifiersMatch) { //only check if we haven't already found one which is the same modifersMatch = true; noTheModifiersMatch = true; Util.Set <string> entityMentionModifierSet = ConstructModifierSet(entityMentionTokens, headIndex); foreach (string modifierWord in entityContextModifierSet) { if (!entityMentionModifierSet.Contains(modifierWord)) { modifersMatch = false; if (modifierWord != "the") { noTheModifiersMatch = false; featureSet.Add("mmw=" + modifierWord); } } } } } Util.Set <string> descriptorModifierSet = ConstructModifierSet(entityMentionTokens, entityMention.NonDescriptorStart); if (descriptorModifierSet.Contains(mentionHeadString)) { titleMatch = true; } } if (!(featureSet.Count == 0)) { features.AddRange(featureSet); } if (sameHead) { features.Add("sameHead"); if (modifersMatch) { features.Add("modsMatch"); } else if (noTheModifiersMatch) { features.Add("nonTheModsMatch"); } else { features.Add("modsMisMatch"); } } if (titleMatch) { features.Add("titleMatch"); } return(features); }
/// <summary> /// Builds up the list of features, anchored around a position within the /// StringBuilder. /// </summary> public virtual string[] GetContext(Util.Pair <System.Text.StringBuilder, int> pair) { string prefix; //string preceeding the eos character in the eos token. string previousToken; //space delimited token preceding token containing eos character. string suffix; //string following the eos character in the eos token. string nextToken; //space delimited token following token containsing eos character. System.Text.StringBuilder buffer = pair.FirstValue; int position = pair.SecondValue; //character offset of eos character in //if (first is string[]) //{ // string[] firstList = (string[])first; // previousToken = firstList[0]; // string current = firstList[1]; // prefix = current.Substring(0, (position) - (0)); // suffix = current.Substring(position + 1); // if (suffix.StartsWith(" ")) // { // mCollectFeatures.Add("sn"); // } // if (prefix.EndsWith(" ")) // { // mCollectFeatures.Add("pn"); // } // mCollectFeatures.Add("eos=" + current[position]); // nextToken = firstList[2]; //} //else //{ // //compute previous, next, prefix and suffix strings and space previous, space next features and eos features. // System.Text.StringBuilder buffer = (System.Text.StringBuilder)((Util.Pair)input).FirstValue; int lastIndex = buffer.Length - 1; // compute space previousToken and space next features. if (position > 0 && buffer[position - 1] == ' ') { mCollectFeatures.Add("sp"); } if (position < lastIndex && buffer[position + 1] == ' ') { mCollectFeatures.Add("sn"); } mCollectFeatures.Add("eos=" + buffer[position]); int prefixStart = PreviousSpaceIndex(buffer, position); int currentPosition = position; //assign prefix, stop if you run into a period though otherwise stop at space while (--currentPosition > prefixStart) { for (int currentEndOfSentenceCharacter = 0, endOfSentenceCharactersLength = mEndOfSentenceCharacters.Length; currentEndOfSentenceCharacter < endOfSentenceCharactersLength; currentEndOfSentenceCharacter++) { if (buffer[currentPosition] == mEndOfSentenceCharacters[currentEndOfSentenceCharacter]) { prefixStart = currentPosition; currentPosition++; // this gets us out of while loop. break; } } } prefix = buffer.ToString(prefixStart, position - prefixStart).Trim(); int previousStart = PreviousSpaceIndex(buffer, prefixStart); previousToken = buffer.ToString(previousStart, prefixStart - previousStart).Trim(); int suffixEnd = NextSpaceIndex(buffer, position, lastIndex); currentPosition = position; while (++currentPosition < suffixEnd) { for (int currentEndOfSentenceCharacter = 0, endOfSentenceCharactersLength = mEndOfSentenceCharacters.Length; currentEndOfSentenceCharacter < endOfSentenceCharactersLength; currentEndOfSentenceCharacter++) { if (buffer[currentPosition] == mEndOfSentenceCharacters[currentEndOfSentenceCharacter]) { suffixEnd = currentPosition; currentPosition--; // this gets us out of while loop. break; } } } int nextEnd = NextSpaceIndex(buffer, suffixEnd + 1, lastIndex + 1); if (position == lastIndex) { suffix = ""; nextToken = ""; } else { suffix = buffer.ToString(position + 1, suffixEnd - (position + 1)).Trim(); nextToken = buffer.ToString(suffixEnd + 1, nextEnd - (suffixEnd + 1)).Trim(); } mBuffer.Append("x="); mBuffer.Append(prefix); mCollectFeatures.Add(mBuffer.ToString()); mBuffer.Length = 0; if (prefix.Length > 0) { mCollectFeatures.Add(System.Convert.ToString(prefix.Length, System.Globalization.CultureInfo.InvariantCulture)); if (IsFirstUpper(prefix)) { mCollectFeatures.Add("xcap"); } if (mInducedAbbreviations.Contains(prefix)) { mCollectFeatures.Add("xabbrev"); } } mBuffer.Append("v="); mBuffer.Append(previousToken); mCollectFeatures.Add(mBuffer.ToString()); mBuffer.Length = 0; if (previousToken.Length > 0) { if (IsFirstUpper(previousToken)) { mCollectFeatures.Add("vcap"); } if (mInducedAbbreviations.Contains(previousToken)) { mCollectFeatures.Add("vabbrev"); } } mBuffer.Append("s="); mBuffer.Append(suffix); mCollectFeatures.Add(mBuffer.ToString()); mBuffer.Length = 0; if (suffix.Length > 0) { if (IsFirstUpper(suffix)) { mCollectFeatures.Add("scap"); } if (mInducedAbbreviations.Contains(suffix)) { mCollectFeatures.Add("sabbrev"); } } mBuffer.Append("n="); mBuffer.Append(nextToken); mCollectFeatures.Add(mBuffer.ToString()); mBuffer.Length = 0; if (nextToken.Length > 0) { if (IsFirstUpper(nextToken)) { mCollectFeatures.Add("ncap"); } if (mInducedAbbreviations.Contains(nextToken)) { mCollectFeatures.Add("nabbrev"); } } string[] context = mCollectFeatures.ToArray(); mCollectFeatures.Clear(); return(context); }
public virtual void SetExtents(Context[] extents) { var entities = new Util.HashList <int, Context>(); // Extents which are not in a coreference chain. var singletons = new List <Context>(); var allExtents = new List <Context>(); //populate data structures foreach (Context currentExtent in extents) { if (currentExtent.Id == -1) { singletons.Add(currentExtent); } else { entities.Put(currentExtent.Id, currentExtent); } allExtents.Add(currentExtent); } int allExtentsIndex = 0; Dictionary <int, Util.Set <string> > headSets = ConstructHeadSets(entities); Dictionary <int, Util.Set <string> > nameSets = ConstructNameSets(entities); foreach (int key in entities.Keys) { Util.Set <string> entityNameSet = nameSets[key]; if (entityNameSet.Count == 0) { continue; } List <Context> entityContexts = entities[key]; Util.Set <Context> exclusionSet = ConstructExclusionSet(key, entities, headSets, nameSets, singletons); //if (entityContexts.Count == 1) //{ //} for (int firstEntityContextIndex = 0; firstEntityContextIndex < entityContexts.Count; firstEntityContextIndex++) { Context firstEntityContext = entityContexts[firstEntityContextIndex]; //if (isPronoun(ec1)) { // continue; //} for (int secondEntityContextIndex = firstEntityContextIndex + 1; secondEntityContextIndex < entityContexts.Count; secondEntityContextIndex++) { Context secondEntityContext = entityContexts[secondEntityContextIndex]; //if (isPronoun(ec2)) { // continue; //} AddEvent(true, firstEntityContext, secondEntityContext); int startIndex = allExtentsIndex; do { Context compareEntityContext = allExtents[allExtentsIndex]; allExtentsIndex = (allExtentsIndex + 1) % allExtents.Count; if (!exclusionSet.Contains(compareEntityContext)) { if (DebugOn) { System.Console.Error.WriteLine(firstEntityContext.ToString() + " " + string.Join(",", entityNameSet.ToArray()) + " " + compareEntityContext.ToString() + " " + nameSets[compareEntityContext.Id]); } AddEvent(false, firstEntityContext, compareEntityContext); break; } }while (allExtentsIndex != startIndex); } } } }
private IEnumerable <string> GetCommonCommonFeatures(Context common1, Context common2) { var features = new List <string>(); Util.Set <string> synsets1 = common1.Synsets; Util.Set <string> synsets2 = common2.Synsets; if (synsets1.Count == 0) { //features.add("missing_"+common1.headToken); return(features); } if (synsets2.Count == 0) { //features.add("missing_"+common2.headToken); return(features); } int commonSynsetCount = 0; //RN commented out - this looks wrong in the java //bool same = false; //if (commonSynsetCount == 0) //{ // features.Add("ncss"); //} //else if (commonSynsetCount == synsets1.Count && commonSynsetCount == synsets2.Count) //{ // same = true; // features.Add("samess"); //} //else if (commonSynsetCount == synsets1.Count) //{ // features.Add("2isa1"); // //features.add("2isa1-"+(synsets2.size() - numCommonSynsets)); //} //else if (commonSynsetCount == synsets2.Count) //{ // features.Add("1isa2"); // //features.add("1isa2-"+(synsets1.size() - numCommonSynsets)); //} //if (!same) //{ foreach (string synset in synsets1) { if (synsets2.Contains(synset)) { features.Add("ss=" + synset); commonSynsetCount++; } } //} //end RN commented out if (commonSynsetCount == 0) { features.Add("ncss"); } else if (commonSynsetCount == synsets1.Count && commonSynsetCount == synsets2.Count) { features.Add("samess"); } else if (commonSynsetCount == synsets1.Count) { features.Add("2isa1"); //features.add("2isa1-"+(synsets2.size() - numCommonSynsets)); } else if (commonSynsetCount == synsets2.Count) { features.Add("1isa2"); //features.add("1isa2-"+(synsets1.size() - numCommonSynsets)); } return(features); }
/// Input format: each two vectors represents a segment from the beginning. /// Considering all triangles are filled and non-triangle structure is always empty. /// Extract the outlines (may have holes) and return the outline. /// Points in each edge is arranged where their left-side is filled. /// Notice edges are not in order, but two points in each edge has its order. public static List <Vector2> ExtractEdge(this List <Vector2> src) { var adj = new Dictionary <Vector2, List <Vector2> >(); for (int i = 0; i < src.Count; i += 2) { adj.GetOrDefault(src[i]).Add(src[i + 1]); adj.GetOrDefault(src[i + 1]).Add(src[i]); } // Sort the adjacent edges. foreach (var x in adj) { var curVert = x.Key; var adjList = x.Value; int Compare(Vector2 va, Vector2 vb) { Vector2 da = curVert.To(va); Vector2 db = curVert.To(vb); float aa = Mathf.Atan2(da.y, da.x); float ba = Mathf.Atan2(db.y, db.x); return(aa <ba ? -1 : aa> ba ? 1 : 0); } adjList.Sort(Compare); } // output size should not exceeded input size. var rest = new Util.Set <Edge>(src.Count); foreach (var vert in src.Distinct().ToList()) { var adx = adj[vert]; for (int i = 0; i < adx.Count; i++) { var from = adx[i]; var to = adx[(i + 1).ModSys(adx.Count)]; // Exclude the edge if triangle edges are arranged clockwise. if (new Triangle(vert, from, to).area <= 0) { continue; } // Edges can either appear for 1 or 2 times. // Because an edge can only be owned by 1 or 2 triangles. // Use this to extract outlines, including outlines inside. var edge = new Edge(from, to); // take up about 200ms time when src.Length == 60000. if (rest.Contains(edge)) { rest.Remove(edge); } else { rest.Add(edge); } } } var res = new List <Vector2>(); rest.Foreach((i) => { res.Add(i.a); res.Add(i.b); }); return(res); }