public virtual Gender ComputeGender(Context context) { Gender gender; double[] genderDistribution = mGenderModel.GenderDistribution(context); if (mDebugOn) { Console.Error.WriteLine("MaxentCompatibilityModel.computeGender: " + context.ToString() + " m=" + genderDistribution[mGenderModel.MaleIndex] + " f=" + genderDistribution[mGenderModel.FemaleIndex] + " n=" + genderDistribution[mGenderModel.NeuterIndex]); } if (mGenderModel.MaleIndex >= 0 && genderDistribution[mGenderModel.MaleIndex] > mMinimumGenderProbability) { gender = new Gender(GenderEnum.Male, genderDistribution[mGenderModel.MaleIndex]); } else if (mGenderModel.FemaleIndex >= 0 && genderDistribution[mGenderModel.FemaleIndex] > mMinimumGenderProbability) { gender = new Gender(GenderEnum.Female, genderDistribution[mGenderModel.FemaleIndex]); } else if (mGenderModel.NeuterIndex >= 0 && genderDistribution[mGenderModel.NeuterIndex] > mMinimumGenderProbability) { gender = new Gender(GenderEnum.Neuter, genderDistribution[mGenderModel.NeuterIndex]); } else { gender = new Gender(GenderEnum.Unknown, mMinimumGenderProbability); } return gender; }
virtual public void SetExtents(Context[] extents) { var entities = new Util.HashList<int, Context>(); var singletons = new List<Context>(); for (int extentIndex = 0; extentIndex < extents.Length; extentIndex++) { Context currentExtent = extents[extentIndex]; if (currentExtent.Id != -1) { entities.Put(currentExtent.Id, currentExtent); } else { singletons.Add(currentExtent); } } var singles = new List<Context>(); var plurals = new List<Context>(); // coref entities foreach (int key in entities.Keys) { List<Context> entityContexts = entities[key]; NumberEnum number = GetNumber(entityContexts); if (number == NumberEnum.Singular) { singles.AddRange(entityContexts); } else if (number == NumberEnum.Plural) { plurals.AddRange(entityContexts); } } // non-coref entities. foreach (Context currentContext in singletons) { NumberEnum number = GetNumber(currentContext); if (number == NumberEnum.Singular) { singles.Add(currentContext); } else if (number == NumberEnum.Plural) { plurals.Add(currentContext); } } foreach (Context currentContext in singles) { AddEvent(NumberEnum.Singular.ToString(), currentContext); } foreach (Context currentContext in plurals) { AddEvent(NumberEnum.Plural.ToString(), currentContext); } }
public virtual Number ComputeNumber(Context context) { double[] numberDistribution = mNumberModel.NumberDistribution(context); Number number; if (numberDistribution[mNumberModel.SingularIndex] > mMinimumNumberProbability) { number = new Number(NumberEnum.Singular, numberDistribution[mNumberModel.SingularIndex]); } else if (numberDistribution[mNumberModel.PluralIndex] > mMinimumNumberProbability) { number = new Number(NumberEnum.Plural, numberDistribution[mNumberModel.PluralIndex]); } else { number = new Number(NumberEnum.Unknown, mMinimumNumberProbability); } return number; }
public virtual Number ComputeNumber(Context context) { double[] numberDistribution = mNumberModel.NumberDistribution(context); Number number; //System.err.println("MaxentCompatibiltyResolver.computeNumber: "+c+" sing="+dist[numModel.getSingularIndex()]+" plural="+dist[numModel.getPluralIndex()]); if (numberDistribution[mNumberModel.SingularIndex] > mMinimumNumberProbability) { number = new Number(NumberEnum.Singular, numberDistribution[mNumberModel.SingularIndex]); } else if (numberDistribution[mNumberModel.PluralIndex] > mMinimumNumberProbability) { number = new Number(NumberEnum.Plural, numberDistribution[mNumberModel.PluralIndex]); } else { number = new Number(NumberEnum.Unknown, mMinimumNumberProbability); } return number; }
private void AddEvent(string outcome, Context nounPhrase) { List<string> features = GetFeatures(nounPhrase); mEvents.Add(new SharpEntropy.TrainingEvent(outcome, features.ToArray())); }
virtual public void SetExtents(Context[] extents) { var entities = new Util.HashList<int, Context>(); var singletons = new List<Context>(); for (int currentExtent = 0; currentExtent < extents.Length; currentExtent++) { Context extent = extents[currentExtent]; if (extent.Id != -1) { entities.Put(extent.Id, extent); } else { singletons.Add(extent); } } var males = new List<Context>(); var females = new List<Context>(); var eunuches = new List<Context>(); //coref entities foreach (int key in entities.Keys) { List<Context> entityContexts = entities[key]; GenderEnum gender = GetGender(entityContexts); if (gender != null) { if (gender == GenderEnum.Male) { males.AddRange(entityContexts); } else if (gender == GenderEnum.Female) { females.AddRange(entityContexts); } else if (gender == GenderEnum.Neuter) { eunuches.AddRange(entityContexts); } } } //non-coref entities foreach (Context entityContext in singletons) { GenderEnum gender = GetGender(entityContext); if (gender == GenderEnum.Male) { males.Add(entityContext); } else if (gender == GenderEnum.Female) { females.Add(entityContext); } else if (gender == GenderEnum.Neuter) { eunuches.Add(entityContext); } } foreach (Context entityContext in males) { AddEvent(GenderEnum.Male.ToString(), entityContext); } foreach (Context entityContext in females) { AddEvent(GenderEnum.Female.ToString(), entityContext); } foreach (Context entityContext in eunuches) { AddEvent(GenderEnum.Neuter.ToString(), entityContext); } }
private bool IsPronoun(Context nounPhrase) { return nounPhrase.HeadTokenTag.StartsWith("PRP"); }
private bool IsName(Context nounPhrase) { return nounPhrase.HeadTokenTag.StartsWith("NNP"); }
private bool InSuperClass(Context entityContext, Context candidateEntityContext) { if (entityContext.Synsets.Count == 0 || candidateEntityContext.Synsets.Count == 0) { return false; } else { int commonSynsetCount = 0; foreach (string synset in entityContext.Synsets) { if (candidateEntityContext.Synsets.Contains(synset)) { commonSynsetCount++; } } if (commonSynsetCount == 0) { return false; } else if (commonSynsetCount == entityContext.Synsets.Count || commonSynsetCount == candidateEntityContext.Synsets.Count) { return true; } else { return false; } } }
private List<string> GetNumberPronounFeatures(Context number, Context pronoun) { List<string> features = new List<string>(); string pronounText = pronoun.HeadTokenText.ToLower(); string genderText = Resolver.AbstractResolver.GetPronounGender(pronounText); features.Add("wt=" + pronounText + "," + number.HeadTokenTag); features.Add("wn=" + pronounText + "," + number.NameType); features.Add("wt=" + genderText + "," + number.HeadTokenTag); features.Add("wn=" + genderText + "," + number.NameType); return features; }
private List<string> GetNameNumberFeatures(Context name, Context number) { List<string> features = new List<string>(2); features.Add("nt=" + name.NameType + "," + number.HeadTokenTag); features.Add("nn=" + name.NameType + "," + number.NameType); return features; }
public virtual double[] NumberDistribution(Context context) { List<string> features = GetFeatures(context); return mTestModel.Evaluate(features.ToArray()); }
public virtual NumberEnum GetNumber(Context context) { if (Linker.SingularPronounPattern.IsMatch(context.HeadTokenText)) { return NumberEnum.Singular; } else if (Linker.PluralPronounPattern.IsMatch(context.HeadTokenText)) { return NumberEnum.Plural; } else { return NumberEnum.Unknown; } }
private List<string> GetFeatures(Context nounPhrase) { var features = new List<string> {"default"}; object[] nounPhraseTokens = nounPhrase.Tokens; for (int tokenIndex = 0, tokenLength = nounPhraseTokens.Length - 1; tokenIndex < tokenLength; tokenIndex++) { features.Add("mw=" + nounPhraseTokens[tokenIndex].ToString()); } features.Add("hw=" + nounPhrase.HeadTokenText.ToLower()); features.Add("ht=" + nounPhrase.HeadTokenTag); return features; }
/* private boolean isPronoun(MentionContext mention) { return mention.getHeadTokenTag().startsWith("PRP"); } */ /// <summary> /// Returns a number between 0 and 1 which represents the models belief that the specified mentions are /// compatible. /// Value closer to 1 are more compatible, while values closer to 0 are less compatible. /// </summary> /// <param name="firstMention"> /// The first mention to be considered. /// </param> /// <param name="secondMention"> /// The second mention to be considered. /// </param> /// <returns> /// a number between 0 and 1 which represents the models belief that the specified mentions are compatible. /// </returns> public virtual double AreCompatible(Context firstMention, Context secondMention) { List<string> features = GetFeatures(firstMention, secondMention); if (mDebugOn) { System.Console.Error.WriteLine("SimilarityModel.compatible: feats=" + string.Join(",", features.ToArray())); } return mTestModel.Evaluate(features.ToArray())[mSameIndex]; }
private List<string> GetNameNameFeatures(Context name1, Context name2) { List<string> features = new List<string>(1); if (name1.NameType == null && name2.NameType == null) { features.Add("nn=" + name1.NameType + "," + name2.NameType); //features.addAll(getCommonCommonFeatures(name1,name2)); } else if (name1.NameType == null) { features.Add("nn=" + name1.NameType + "," + name2.NameType); //features.addAll(getNameCommonFeatures(name2,name1)); } else if (name2.NameType == null) { features.Add("nn=" + name2.NameType + "," + name1.NameType); //features.addAll(getNameCommonFeatures(name1,name2)); } else { if (string.CompareOrdinal(name1.NameType, name2.NameType) < 0) { features.Add("nn=" + name1.NameType + "," + name2.NameType); } else { features.Add("nn=" + name2.NameType + "," + name1.NameType); } if (name1.NameType == name2.NameType) { features.Add("sameNameType"); } } return features; }
public virtual void SetExtents(Context[] extents) { Util.HashList<int, Context> entities = new Util.HashList<int, Context>(); /** Extents which are not in a coreference chain. */ List<Context> singletons = new List<Context>(); List<Context> allExtents = new List<Context>(); //populate data structures for (int extentIndex = 0; extentIndex < extents.Length; extentIndex++) { Context currentExtent = extents[extentIndex]; //System.err.println("SimilarityModel: setExtents: ec("+ec.getId()+") "+ec.getNameType()+" "+ec); if (currentExtent.Id == -1) { singletons.Add(currentExtent); } else { entities.Put(currentExtent.Id, currentExtent); } allExtents.Add(currentExtent); } int allExtentsIndex = 0; Dictionary<int, Util.Set<string>> headSets = ConstructHeadSets(entities); Dictionary<int, Util.Set<string>> nameSets = ConstructNameSets(entities); foreach (int key in entities.Keys) { Util.Set<string> entityNameSet = nameSets[key]; if (entityNameSet.Count == 0) { continue; } List<Context> entityContexts = entities[key]; Util.Set<Context> exclusionSet = ConstructExclusionSet(key, entities, headSets, nameSets, singletons); //if (entityContexts.Count == 1) //{ //} for (int firstEntityContextIndex = 0; firstEntityContextIndex < entityContexts.Count; firstEntityContextIndex++) { Context firstEntityContext = entityContexts[firstEntityContextIndex]; //if (isPronoun(ec1)) { // continue; //} for (int secondEntityContextIndex = firstEntityContextIndex + 1; secondEntityContextIndex < entityContexts.Count; secondEntityContextIndex++) { Context secondEntityContext = entityContexts[secondEntityContextIndex]; //if (isPronoun(ec2)) { // continue; //} AddEvent(true, firstEntityContext, secondEntityContext); int startIndex = allExtentsIndex; do { Context compareEntityContext = allExtents[allExtentsIndex]; allExtentsIndex = (allExtentsIndex + 1) % allExtents.Count; if (!exclusionSet.Contains(compareEntityContext)) { if (mDebugOn) { System.Console.Error.WriteLine(firstEntityContext.ToString() + " " + string.Join(",", entityNameSet.ToArray()) + " " + compareEntityContext.ToString() + " " + nameSets[compareEntityContext.Id]); } AddEvent(false, firstEntityContext, compareEntityContext); break; } } while (allExtentsIndex != startIndex); } } } }
private List<string> GetNamePronounFeatures(Context name, Context pronoun) { List<string> features = new List<string>(2); features.Add("nw=" + name.NameType + "," + pronoun.HeadTokenText.ToLower()); features.Add("ng=" + name.NameType + "," + Resolver.AbstractResolver.GetPronounGender(pronoun.HeadTokenText.ToLower())); return features; }
private void AddEvent(bool same, Context firstNounPhrase, Context secondNounPhrase) { if (same) { List<string> features = GetFeatures(firstNounPhrase, secondNounPhrase); //System.err.println(SAME+" "+np1.headTokenText+" ("+np1.id+") -> "+np2.headTokenText+" ("+np2.id+") "+feats); mEvents.Add(new SharpEntropy.TrainingEvent(mSame, features.ToArray())); } else { List<string> features = GetFeatures(firstNounPhrase, secondNounPhrase); //System.err.println(DIFF+" "+np1.headTokenText+" ("+np1.id+") -> "+np2.headTokenText+" ("+np2.id+") "+feats); mEvents.Add(new SharpEntropy.TrainingEvent(mDifferent, features.ToArray())); } }
private List<string> GetPronounPronounFeatures(Context pronoun1, Context pronoun2) { List<string> features = new List<string>(); string firstGender = Resolver.AbstractResolver.GetPronounGender(pronoun1.HeadTokenText); string secondGender = Resolver.AbstractResolver.GetPronounGender(pronoun2.HeadTokenText); if (firstGender == secondGender) { features.Add("sameGender"); } else { features.Add("diffGender"); } return features; }
private List<string> GetCommonCommonFeatures(Context common1, Context common2) { List<string> features = new List<string>(); Util.Set<string> synsets1 = common1.Synsets; Util.Set<string> synsets2 = common2.Synsets; if (synsets1.Count == 0) { //features.add("missing_"+common1.headToken); return features; } if (synsets2.Count == 0) { //features.add("missing_"+common2.headToken); return features; } int commonSynsetCount = 0; //RN commented out - this looks wrong in the java //bool same = false; //if (commonSynsetCount == 0) //{ // features.Add("ncss"); //} //else if (commonSynsetCount == synsets1.Count && commonSynsetCount == synsets2.Count) //{ // same = true; // features.Add("samess"); //} //else if (commonSynsetCount == synsets1.Count) //{ // features.Add("2isa1"); // //features.add("2isa1-"+(synsets2.size() - numCommonSynsets)); //} //else if (commonSynsetCount == synsets2.Count) //{ // features.Add("1isa2"); // //features.add("1isa2-"+(synsets1.size() - numCommonSynsets)); //} //if (!same) //{ foreach(string synset in synsets1) { if (synsets2.Contains(synset)) { features.Add("ss=" + synset); commonSynsetCount++; } } //} //end RN commented out if (commonSynsetCount == 0) { features.Add("ncss"); } else if (commonSynsetCount == synsets1.Count && commonSynsetCount == synsets2.Count) { features.Add("samess"); } else if (commonSynsetCount == synsets1.Count) { features.Add("2isa1"); //features.add("2isa1-"+(synsets2.size() - numCommonSynsets)); } else if (commonSynsetCount == synsets2.Count) { features.Add("1isa2"); //features.add("1isa2-"+(synsets1.size() - numCommonSynsets)); } return features; }
private bool IsCommonNoun(Context nounPhrase) { return !nounPhrase.HeadTokenTag.StartsWith("NNP") && nounPhrase.HeadTokenTag.StartsWith("NN"); }
private List<string> GetCommonNumberFeatures(Context common, Context number) { List<string> features = new List<string>(); Util.Set<string> synsets = common.Synsets; foreach (string synset in synsets) { features.Add("ts=" + number.HeadTokenTag + "," + synset); features.Add("ns=" + number.NameType + "," + synset); } features.Add("nn=" + number.NameType + "," + common.NameType); return features; }
private bool IsNumber(Context nounPhrase) { return nounPhrase.HeadTokenTag == "CD"; }
private List<string> GetCommonPronounFeatures(Context common, Context pronoun) { List<string> features = new List<string>(); Util.Set<string> synsets = common.Synsets; string pronounText = pronoun.HeadTokenText.ToLower(); string genderText = Resolver.AbstractResolver.GetPronounGender(pronounText); features.Add("wn=" + pronounText + "," + common.NameType); foreach (string synset in synsets) { features.Add("ws=" + pronounText + "," + synset); features.Add("gs=" + genderText + "," + synset); } return features; }
public virtual double[] GenderDistribution(Context nounPhrase) { List<string> features = GetFeatures(nounPhrase); return mTestModel.Evaluate(features.ToArray()); }
private List<string> GetFeatures(Context np1, Context np2) { List<string> features = new List<string>(); features.Add("default"); // semantic categories string w1 = np1.HeadTokenText.ToLower(); string w2 = np2.HeadTokenText.ToLower(); if (String.CompareOrdinal(w1, w2) < 0) { features.Add("ww=" + w1 + "," + w2); } else { features.Add("ww=" + w2 + "," + w1); } if (w1 == w2) { features.Add("sameHead"); } //features.add("tt="+np1.headTag+","+np2.headTag); if (IsName(np1)) { if (IsName(np2)) { features.AddRange(GetNameNameFeatures(np1, np2)); } else if (IsCommonNoun(np2)) { features.AddRange(GetNameCommonFeatures(np1, np2)); } else if (IsPronoun(np2)) { features.AddRange(GetNamePronounFeatures(np1, np2)); } else if (IsNumber(np2)) { features.AddRange(GetNameNumberFeatures(np1, np2)); } } else if (IsCommonNoun(np1)) { if (IsName(np2)) { features.AddRange(GetNameCommonFeatures(np2, np1)); } else if (IsCommonNoun(np2)) { features.AddRange(GetCommonCommonFeatures(np1, np2)); } else if (IsPronoun(np2)) { features.AddRange(GetCommonPronounFeatures(np1, np2)); } else if (IsNumber(np2)) { features.AddRange(GetCommonNumberFeatures(np1, np2)); } else { //System.err.println("unknown group for " + np1.headTokenText + " -> " + np2.headTokenText); } } else if (IsPronoun(np1)) { if (IsName(np2)) { features.AddRange(GetNamePronounFeatures(np2, np1)); } else if (IsCommonNoun(np2)) { features.AddRange(GetCommonPronounFeatures(np2, np1)); } else if (IsPronoun(np2)) { features.AddRange(GetPronounPronounFeatures(np1, np2)); } else if (IsNumber(np2)) { features.AddRange(GetNumberPronounFeatures(np2, np1)); } else { //System.err.println("unknown group for " + np1.headTokenText + " -> " + np2.headTokenText); } } else if (IsNumber(np1)) { if (IsName(np2)) { features.AddRange(GetNameNumberFeatures(np2, np1)); } else if (IsCommonNoun(np2)) { features.AddRange(GetCommonNumberFeatures(np2, np1)); } else if (IsPronoun(np2)) { features.AddRange(GetNumberPronounFeatures(np1, np2)); } else if (IsNumber(np2)) { } else { //System.err.println("unknown group for " + np1.headTokenText + " -> " + np2.headTokenText); } } else { //System.err.println("unknown group for " + np1.headToken); } return features; }
private List<string> GetFeatures(Context nounPhrase) { var features = new List<string>(); features.Add("default"); for (int tokenIndex = 0; tokenIndex < nounPhrase.HeadTokenIndex; tokenIndex++) { features.Add("mw=" + nounPhrase.Tokens[tokenIndex].ToString()); } features.Add("hw=" + nounPhrase.HeadTokenText); features.Add("n=" + nounPhrase.NameType); if (nounPhrase.NameType != null && nounPhrase.NameType == "person") { object[] tokens = nounPhrase.Tokens; for (int tokenIndex = 0; tokenIndex < nounPhrase.HeadTokenIndex || tokenIndex == 0; tokenIndex++) { string name = tokens[tokenIndex].ToString().ToLower(); if (mFemaleNames.Contains(name)) { features.Add("fem"); } if (mMaleNames.Contains(name)) { features.Add("mas"); } } } foreach (string synset in nounPhrase.Synsets) { features.Add("ss=" + synset); } return features; }
private List<string> GetNameCommonFeatures(Context name, Context common) { Util.Set<string> synsets = common.Synsets; List<string> features = new List<string>(2 + synsets.Count); features.Add("nn=" + name.NameType + "," + common.NameType); features.Add("nw=" + name.NameType + "," + common.HeadTokenText.ToLower()); foreach (string synset in synsets) { features.Add("ns=" + name.NameType + "," + synset); } if (name.NameType == null) { //features.addAll(GetCommonCommonFeatures(name,common)); } return features; }
/// <summary> /// Heuristic computation of gender for a mention context using pronouns and honorifics. /// </summary> /// <param name="mention"> /// The mention whose gender is to be computed. /// </param> /// <returns> /// The heuristically determined gender or unknown. /// </returns> private GenderEnum GetGender(Context mention) { if (Linker.MalePronounPattern.IsMatch(mention.HeadTokenText)) { return GenderEnum.Male; } else if (Linker.FemalePronounPattern.IsMatch(mention.HeadTokenText)) { return GenderEnum.Female; } else if (Linker.NeuterPronounPattern.IsMatch(mention.HeadTokenText)) { return GenderEnum.Neuter; } object[] mentionTokens = mention.Tokens; for (int tokenIndex = 0, tokenLength = mentionTokens.Length - 1; tokenIndex < tokenLength; tokenIndex++) { string token = mentionTokens[tokenIndex].ToString(); if (token == "Mr." || token == "Mr") { return GenderEnum.Male; } else if (token == "Mrs." || token == "Mrs" || token == "Ms." || token == "Ms") { return GenderEnum.Female; } } return GenderEnum.Unknown; }
private static string[] GetLemmas(Context context) { string word = context.HeadTokenText.ToLower(); return(Mention.DictionaryFactory.GetDictionary().GetLemmas(word, PartsOfSpeech.NounSingularOrMass)); }