public GenderAttribute GuessGender(InformedPhrase word, IWordLookup informer) { List <string> parts = word.Generate(); string part = parts[0]; ProbableStrength given = IsGivenName(informer, part); if (given.strength < .25 && given.weight > 0.75) { GenderAttribute attr = new GenderAttribute(GenderAttribute.GenderOptions.Neuter); attr.Strength = given.InverseProbability(); return(attr); } ProbableStrength female = IsFemaleName(part); if (female.strength > 0.5) { GenderAttribute attr = new GenderAttribute(GenderAttribute.GenderOptions.Female); attr.Strength = (new ProbableStrength(2.0 * (female.strength - 0.5), female.weight)).Relative(given); return(attr); } else if (female.strength < 0.5 && female.weight > 0.25) { GenderAttribute attr = new GenderAttribute(GenderAttribute.GenderOptions.Male); attr.Strength = (new ProbableStrength(2.0 * (0.5 - female.strength), female.weight)).Relative(given); return(attr); } else { GenderAttribute attr = new GenderAttribute(GenderAttribute.GenderOptions.Human); attr.Strength = given; return(attr); } }
public static ProbableStrength SeemsReferee(InformedPhrase phrase, IWordLookup informer, ProbableStrength anaphora) { List<string> words = phrase.Generate(); double weight = 0; foreach (string word in words) weight += informer.GetWeight(word, false); /* words weight result * 1 0 0 * 1 1 1 * 2 0 0 * 2 .5 .4 * 2 1 2/3 * 2 1.5 .86 * 2 2 1 * 3 .5 .3 * 3 1 .5 * 3 2 .8 */ double myweight = 2.0 * weight / (words.Count + weight); if (words.Count == 1) myweight /= 2.0; // down-score 1-word answers if (words.Count > 2) myweight /= Math.Log(words.Count); ProbableStrength notanaphora = anaphora.InverseProbability(); return new ProbableStrength(Math.Sqrt(myweight * notanaphora.strength), notanaphora.weight + .5 - notanaphora.weight * .5); }
// Try to determine if this is any kind of given name public ProbableStrength IsGivenName(IWordLookup informer, string word) { word = word.ToLower(); // Try to look it up in me string nametype = null; if (TryGetValue(word, out nametype)) { return(ProbableStrength.Full); } double weight = informer.GetWeight(word, false); return(new ProbableStrength(weight * weight, 0.5)); }
// Inform everything that looks like a name public void GenderInformAll(InformedPhrase phrase, IWordLookup informer) { foreach (KeyValuePair <PhraseSense, double> sense in phrase.Senses) { // Could this be a person? Have to check all noun phrases, because DummyEnglishParser doesn't properly specify Nouns if (nounparts.Contains(sense.Key.SpeechPart())) { GenderInform(sense.Key, informer); } else { // Otherwise drill down foreach (InformedPhrase subphr in sense.Key.Phrases) { GenderInformAll(subphr, informer); } } } }
public static ProbableStrength SeemsReferee(InformedPhrase phrase, IWordLookup informer, ProbableStrength anaphora) { List <string> words = phrase.Generate(); double weight = 0; foreach (string word in words) { weight += informer.GetWeight(word, false); } /* words weight result * 1 0 0 * 1 1 1 * 2 0 0 * 2 .5 .4 * 2 1 2/3 * 2 1.5 .86 * 2 2 1 * 3 .5 .3 * 3 1 .5 * 3 2 .8 */ double myweight = 2.0 * weight / (words.Count + weight); if (words.Count == 1) { myweight /= 2.0; // down-score 1-word answers } if (words.Count > 2) { myweight /= Math.Log(words.Count); } ProbableStrength notanaphora = anaphora.InverseProbability(); return(new ProbableStrength(Math.Sqrt(myweight * notanaphora.strength), notanaphora.weight + .5 - notanaphora.weight * .5)); }
public static List <PhraseAttribute> TreebankToAttributes(string part, IWordLookup informer, string word, bool simplified) { List <PhraseAttribute> attributes = new List <PhraseAttribute>(); InformedPhrase informed = (informer == null || word == null || word.Contains(" ")) ? null : informer.GetInformed(word, false); if (part == "NN") { attributes.Add(new PartOSAttribute(Noun)); attributes.Add(new NumberAttribute(NumberAttribute.NumberOptions.One)); PhraseSense sense = informed != null?informed.FindSense(SpeechPart.Noun, true) : null; return(OverrideAttributes(sense, attributes)); } if (part == "NNS") { attributes.Add(new PartOSAttribute(Noun)); attributes.Add(new NumberAttribute(NumberAttribute.NumberOptions.Many)); PhraseSense sense = informed != null?informed.FindSense(SpeechPart.Noun, false) : null; return(OverrideAttributes(sense, attributes)); } if (part == "NNP") { attributes.Add(new PartOSAttribute(ProperNoun)); attributes.Add(new NumberAttribute(NumberAttribute.NumberOptions.One)); return(attributes); } if (part == "NNPS") { attributes.Add(new PartOSAttribute(ProperNoun)); attributes.Add(new NumberAttribute(NumberAttribute.NumberOptions.Many)); return(attributes); } if (part == "PRP" || part == "PRP$" || part == "WDT" || part == "WP" || part == "WP$" || part == "WDT" || part == "WRB") { if (informer == null || word == null || informed == null) { if (part == "PRP") { attributes.Add(new PartOSAttribute(PersonalPronoun)); } else if (part == "PRP$") { attributes.Add(new PartOSAttribute(PossessivePronoun)); } else if (part == "WP") { attributes.Add(new PartOSAttribute(WhPronoun)); } else if (part == "WP$") { attributes.Add(new PartOSAttribute(PossiveWhPronoun)); } else { attributes.Add(new PartOSAttribute(part)); } return(attributes); } PhraseSense sense = informed.FindSense(PersonalPronoun, true); if (sense == null) { sense = informed.FindSense(ArticulatePronoun, true); } if (sense == null) { sense = informed.Senses[0].Key; } return(sense.Attributes); } // Convert phrases if (part.StartsWith("NP")) { part = "NN_P" + part.Substring(2); } else if (part.StartsWith("VP")) { part = "VB_P" + part.Substring(2); } else if (part.StartsWith("PP")) { part = "IN_P" + part.Substring(2); } if (simplified) { int dash = part.IndexOf('-'); if (dash > 0) { part = part.Substring(0, dash); } } if (!catalog.ContainsKey(part)) { attributes.Add(new PartOSAttribute(new SpeechPart(part))); } else { attributes.Add(new PartOSAttribute(part)); } return(attributes); }
public ScoreCalculator(IWordLookup wordLookup) { _wordLookup = wordLookup ?? throw new ArgumentNullException(nameof(wordLookup)); }
// Given a noun phrase sense public void GenderInform(PhraseSense sense, IWordLookup informer) { // Drill down to the first noun word, looking for a current attribute PhraseSense first = sense; GenderAttribute bypart = (GenderAttribute)first.FindAttribute(typeof(GenderAttribute)); while (bypart == null) { // Find a noun sub-phrase PhraseSense next = null; foreach (InformedPhrase subfirst in first.Phrases) { foreach (KeyValuePair <PhraseSense, double> firstsense in subfirst.Senses) { if (nounparts.Contains(firstsense.Key.SpeechPart())) { next = firstsense.Key; break; } } if (next != null) { break; } } if (next == null) { break; } bypart = (GenderAttribute)next.FindAttribute(typeof(GenderAttribute)); if (next.Phrases.Count == 0) { break; } first = next; } if (first.SpeechPart() != SpeechPart.ProperNoun) { if (bypart != null && bypart.Strength.weight > 0.5) { return; // we seem to know! } List <string> words; if (first.Phrases.Count > 0) { words = first.Phrases[0].Generate(); } else { words = new List <string>(); words.Add(first.Name()); } if (informer.GetWeight(words[0], false) < 0.5) { return; // not worth the lookup } if (bypart == null) { GenderAttribute dummy = new GenderAttribute(); bypart = (GenderAttribute)dummy.Guess(sense.Phrases[0]); } } // Now guess using the given name InformedPhrase tocheck; if (first.Phrases.Count > 0) { tocheck = first.Phrases[0]; } else { List <KeyValuePair <PhraseSense, double> > senses = new List <KeyValuePair <PhraseSense, double> >(); senses.Add(new KeyValuePair <PhraseSense, double>(first, 1.0)); tocheck = new InformedPhrase(sense.Name(), senses); } GenderAttribute gender = GuessGender(tocheck, informer); if (bypart != null) { gender = MergeGenderAttributes(gender, bypart); } // Update relevant gender attributes sense.AddOrUpdateAttribute(gender); if (sense != first) { first.AddOrUpdateAttribute(gender); } }
public static List<PhraseAttribute> TreebankToAttributes(string part, IWordLookup informer, string word, bool simplified) { List<PhraseAttribute> attributes = new List<PhraseAttribute>(); InformedPhrase informed = (informer == null || word == null || word.Contains(" ")) ? null : informer.GetInformed(word, false); if (part == "NN") { attributes.Add(new PartOSAttribute(Noun)); attributes.Add(new NumberAttribute(NumberAttribute.NumberOptions.One)); PhraseSense sense = informed != null ? informed.FindSense(SpeechPart.Noun, true) : null; return OverrideAttributes(sense, attributes); } if (part == "NNS") { attributes.Add(new PartOSAttribute(Noun)); attributes.Add(new NumberAttribute(NumberAttribute.NumberOptions.Many)); PhraseSense sense = informed != null ? informed.FindSense(SpeechPart.Noun, false) : null; return OverrideAttributes(sense, attributes); } if (part == "NNP") { attributes.Add(new PartOSAttribute(ProperNoun)); attributes.Add(new NumberAttribute(NumberAttribute.NumberOptions.One)); return attributes; } if (part == "NNPS") { attributes.Add(new PartOSAttribute(ProperNoun)); attributes.Add(new NumberAttribute(NumberAttribute.NumberOptions.Many)); return attributes; } if (part == "PRP" || part == "PRP$" || part == "WDT" || part == "WP" || part == "WP$" || part == "WDT" || part == "WRB") { if (informer == null || word == null || informed == null) { if (part == "PRP") attributes.Add(new PartOSAttribute(PersonalPronoun)); else if (part == "PRP$") attributes.Add(new PartOSAttribute(PossessivePronoun)); else if (part == "WP") attributes.Add(new PartOSAttribute(WhPronoun)); else if (part == "WP$") attributes.Add(new PartOSAttribute(PossiveWhPronoun)); else attributes.Add(new PartOSAttribute(part)); return attributes; } PhraseSense sense = informed.FindSense(PersonalPronoun, true); if (sense == null) sense = informed.FindSense(ArticulatePronoun, true); if (sense == null) sense = informed.Senses[0].Key; return sense.Attributes; } // Convert phrases if (part.StartsWith("NP")) part = "NN_P" + part.Substring(2); else if (part.StartsWith("VP")) part = "VB_P" + part.Substring(2); else if (part.StartsWith("PP")) part = "IN_P" + part.Substring(2); if (simplified) { int dash = part.IndexOf('-'); if (dash > 0) part = part.Substring(0, dash); } if (!catalog.ContainsKey(part)) attributes.Add(new PartOSAttribute(new SpeechPart(part))); else attributes.Add(new PartOSAttribute(part)); return attributes; }