void QueryWNIAndPopulate(object sender, DoWorkEventArgs e) { string strQuery = (string) e.Argument; // query WordNet interface List<WordNetEngine.RelativeFamily> relatives; List<WordNetEngine.POS> poses = wn.Lookup(strQuery, out relatives); if (poses.Count == 0) { WordNetEngine.POS dummyPos = new WordNetEngine.POS() { posType = 4, Senses = new List<WordNetEngine.Sense>() }; dummyPos.Senses.Add(new WordNetEngine.Sense() { Definition = "Queried term not found! Please try something different." }); poses = new List<WordNetEngine.POS>(); poses.Add(dummyPos); } // set senses to definitions list Dispatcher.BeginInvoke(() => { lstPOSs.ItemsSource = poses; }); // deal with relatives, if any foreach (WordNetEngine.RelativeFamily relFamily in relatives) { Dispatcher.BeginInvoke(new PopulateRelativesDelegate(PopulateRelatives), relFamily.RelativeFamilyName, relFamily.Terms); } }
void QueryWNIAndPopulate(object sender, DoWorkEventArgs e) { string strQuery = (string)e.Argument; // query WordNet interface List <WordNetEngine.RelativeFamily> relatives; List <WordNetEngine.POS> poses = wn.Lookup(strQuery, out relatives); if (poses.Count == 0) { WordNetEngine.POS dummyPos = new WordNetEngine.POS() { posType = 4, Senses = new List <WordNetEngine.Sense>() }; dummyPos.Senses.Add(new WordNetEngine.Sense() { Definition = "Queried term not found! Please try something different." }); poses = new List <WordNetEngine.POS>(); poses.Add(dummyPos); } // set senses to definitions list Dispatcher.BeginInvoke(() => { lstPOSs.ItemsSource = poses; }); // deal with relatives, if any foreach (WordNetEngine.RelativeFamily relFamily in relatives) { Dispatcher.BeginInvoke(new PopulateRelativesDelegate(PopulateRelatives), relFamily.RelativeFamilyName, relFamily.Terms); } }
private void AnalyzeName(string text, WordNetEngine.POS pOS) { if (text.Length < 2) { throw new Exception("Length is less than 2 char"); } Spelling oSpell = new Spelling(); var words = Regex.Split(text, @"([A-Z _][a-z]+)"); foreach (var word in words.Where(c => !string.IsNullOrEmpty(c) && c != "_")) { if (!oSpell.TestWord(word)) { throw new Exception("FieldName Is not a Valid Word"); } if (pOS == WordNetEngine.POS.Noun) { SynSet token = WordNetEngine.GetMostCommonSynSet(word, pOS); if (token == null) { throw new Exception("FieldName Is Not A Valid noun"); } } } if (pOS == WordNetEngine.POS.Verb) { var word = string.Join(" ", words.Where <string>(c => c.Length > 0)); SynSet token = WordNetEngine.GetMostCommonSynSet(word, pOS); if (token == null) { throw new Exception("MethodName Is Not A Valid Verb"); } } }
public IEnumerable <Term> FindTermsByWordnetPOS(WordNetEngine.POS pos) { var result = from term in terms where term.PoS.FitsWordnetPOS(pos) select term; return(result); }
/// <summary> /// Outputs the Hypernyms (words that the original word is a meaning of) /// </summary> private void OutputHypernyms(IEnumerable <string> words, WordNetEngine.POS pos) { StringBuilder output = new StringBuilder(); int limitCounter = 0; foreach (string word in words) { limitCounter++; //get the synsets var synSets = _wordNetEngine.GetSynSets(word, pos); //retrieve hypernymSynsets output.Append(word + "["); foreach (SynSet directSynset in synSets) { var hypernymSynSets = directSynset.GetRelatedSynSets(WordNetEngine.SynSetRelation.Hypernym, false); foreach (SynSet hypernymSynSet in hypernymSynSets) { //have the synset, find all the words in that synset IEnumerable <string> outputList = hypernymSynSet.Words.Where(synonym => synonym != word); if (outputList.Count() > 0) { output.Append("("); int count = 0; foreach (string synonym in outputList) { count++; output.Append(synonym); //add comma not to the last one if (count != outputList.Count()) { output.Append(","); } } output.Append(")"); } } } output.Append("]" + Environment.NewLine); if (limitCounter > nupdLimit.Value) { break; } } this.txtHypernyms.Text += output.ToString(); }
/// <summary> /// Outputs synonyms of the specified words /// </summary> private void OutputSynonyms(IEnumerable <string> words, WordNetEngine.POS pos) { StringBuilder synonymOutput = new StringBuilder(); int limitCounter = 0; foreach (string word in words) { limitCounter++; //Words are synonyms if they share the same Synsets //So find synsets and then find the words connected to these synsets. var synSets = _wordNetEngine.GetSynSets(word, pos); synonymOutput.Append(word + "["); foreach (SynSet synSet in synSets) { //have the synset, find all the words in that synset IEnumerable <string> outputList = synSet.Words .Where(synonym => synonym != word); //add to dictionary if (outputList.Count() > 0) { synonymOutput.Append("("); int count = 0; foreach (string synonym in outputList) { count++; synonymOutput.Append(synonym); //add comma not to the last one if (count != outputList.Count()) { synonymOutput.Append(","); } } synonymOutput.Append(")"); } } synonymOutput.Append("]" + Environment.NewLine); if (limitCounter > nupdLimit.Value) { break; } } this.txtSynonyms.Text += synonymOutput.ToString(); }
private List<string> _words; // words must be ordered in order to use lexical relation indexes #endregion Fields #region Constructors /// <summary> /// Constructor. Creates the shell of a SynSet without any actual information. To gain access to SynSet words, gloss, /// and related SynSets, call SynSet.Instantiate. /// </summary> /// <param name="pos">POS of SynSet</param> /// <param name="offset">Byte location of SynSet definition within data file</param> /// <param name="wordNetEngine">WordNet engine used to instantiate this synset. This should be non-null only when constructing /// synsets for disk-based WordNet engines.</param> internal SynSet(WordNetEngine.POS pos, int offset, WordNetEngine wordNetEngine) { _pos = pos; _offset = offset; _wordNetEngine = wordNetEngine; _instantiated = false; if (_wordNetEngine != null && _wordNetEngine.InMemory) throw new Exception("Don't need to pass a non-null WordNetEngine when using in-memory storage"); // precompute the ID and hash code for efficiency _id = _pos + ":" + _offset; _hashCode = _id.GetHashCode(); }
/// <summary> /// Constructor. Creates the shell of a SynSet without any actual information. To gain access to SynSet words, gloss, /// and related SynSets, call SynSet.Instantiate. /// </summary> /// <param name="pos">POS of SynSet</param> /// <param name="offset">Byte location of SynSet definition within data file</param> /// <param name="wordNetEngine">WordNet engine used to instantiate this synset. This should be non-null only when constructing /// synsets for disk-based WordNet engines.</param> internal SynSet(WordNetEngine.POS pos, int offset, WordNetEngine wordNetEngine) { _pos = pos; _offset = offset; _wordNetEngine = wordNetEngine; _instantiated = false; if (_wordNetEngine != null && _wordNetEngine.InMemory) { throw new Exception("Don't need to pass a non-null WordNetEngine when using in-memory storage"); } // precompute the ID and hash code for efficiency _id = _pos + ":" + _offset; _hashCode = _id.GetHashCode(); }
private static List <string> getSynSets(string word, WordNetEngine.POS type) { Set <SynSet> synSetsToShow = null; try { synSetsToShow = _wordNetEngine.GetSynSets(word, type); } catch (Exception) { return(null); } var returnList = new List <string>(); foreach (var syns in synSetsToShow) { returnList.AddRange(syns.Words); } returnList = returnList.Distinct().ToList(); foreach (var returnListItem in returnList.ToList()) { try { synSetsToShow = _wordNetEngine.GetSynSets(returnListItem, type); } catch (Exception) { return(null); } foreach (var syns in synSetsToShow) { returnList.AddRange(syns.Words); } } returnList = returnList.Distinct().ToList(); return(returnList); }
protected override SynSet SelectSynset(string word, POS pos) { SynSet result = base.SelectSynset(word, pos); // temporary WordNetEngine.POS wordnetPos = pos.ForWordnet(); if (wordnetPos != WordNetEngine.POS.None) { IGlossaryEntry glossEntry = glossary.FindWord(word); if (glossEntry == null) { Set <SynSet> synsets = wordnet.GetSynSets(word, wordnetPos); foreach (SynSet synset in synsets) { // great algorythms will be added here } } else { result = glossEntry.Synset; } } return(result); }
/// <summary> /// Instantiates the current synset. If idSynset is non-null, related synsets references are set to those from /// idSynset; otherwise, related synsets are created as shells. /// </summary> /// <param name="definition">Definition line of synset from data file</param> /// <param name="idSynset">Lookup for related synsets. If null, all related synsets will be created as shells.</param> internal void Instantiate(string definition, Dictionary <string, SynSet> idSynset) { // don't re-instantiate if (!_instantiated) { // get number of words in the synset and the start character of the word list int wordStart; int numWords = int.Parse(GetField(definition, 3, out wordStart), NumberStyles.HexNumber); wordStart = definition.IndexOf(' ', wordStart) + 1; _words = new List <string>(numWords); // get words in synset for (int i = 0; i < numWords; ++i) { int wordEnd = definition.IndexOf(' ', wordStart + 1) - 1; int wordLen = wordEnd - wordStart + 1; string word = definition.Substring(wordStart, wordLen); _words.Add(word); // get lex_id lex_id = Convert.ToInt32(definition.Substring(definition.IndexOf(' ') + 1, 2)); // skip lex_id field wordStart = definition.IndexOf(' ', wordEnd + 2) + 1; } // get gloss _gloss = definition.Substring(definition.IndexOf('|') + 1).Trim(); // get number and start of relations int relationCountField = 3 + (_words.Count * 2) + 1; int relationFieldStart; int numRelations = int.Parse(GetField(definition, relationCountField, out relationFieldStart)); relationFieldStart = definition.IndexOf(' ', relationFieldStart) + 1; // grab each related synset _relationSynSets = new Dictionary <WordNetEngine.SynSetRelation, List <SynSet> >(); _lexicalRelations = new Dictionary <WordNetEngine.SynSetRelation, Dictionary <SynSet, Dictionary <int, List <int> > > >(); for (int relationNum = 0; relationNum < numRelations; ++relationNum) { string relationSymbol = null; int relatedSynSetOffset = -1; WordNetEngine.POS relatedSynSetPOS = WordNetEngine.POS.None; int sourceWordIndex = -1; int targetWordIndex = -1; // each relation has four columns for (int relationField = 0; relationField <= 3; ++relationField) { int fieldEnd = definition.IndexOf(' ', relationFieldStart + 1) - 1; int fieldLen = fieldEnd - relationFieldStart + 1; string fieldValue = definition.Substring(relationFieldStart, fieldLen); // relation symbol if (relationField == 0) { relationSymbol = fieldValue; } // related synset offset else if (relationField == 1) { relatedSynSetOffset = int.Parse(fieldValue); } // related synset POS else if (relationField == 2) { relatedSynSetPOS = GetPOS(fieldValue); } // source/target word for lexical relation else if (relationField == 3) { sourceWordIndex = int.Parse(fieldValue.Substring(0, 2), NumberStyles.HexNumber); targetWordIndex = int.Parse(fieldValue.Substring(2), NumberStyles.HexNumber); } else { throw new Exception(); } relationFieldStart = definition.IndexOf(' ', relationFieldStart + 1) + 1; } // get related synset...create shell if we don't have a lookup SynSet relatedSynSet; if (idSynset == null) { relatedSynSet = new SynSet(relatedSynSetPOS, relatedSynSetOffset, _wordNetEngine); } // look up related synset directly else { relatedSynSet = idSynset[relatedSynSetPOS + ":" + relatedSynSetOffset]; } // get relation WordNetEngine.SynSetRelation relation = WordNetEngine.GetSynSetRelation(_pos, relationSymbol); // add semantic relation if we have neither a source nor a target word index if (sourceWordIndex == 0 && targetWordIndex == 0) { _relationSynSets.EnsureContainsKey(relation, typeof(List <SynSet>)); _relationSynSets[relation].Add(relatedSynSet); } // add lexical relation else { _lexicalRelations.EnsureContainsKey(relation, typeof(Dictionary <SynSet, Dictionary <int, List <int> > >)); _lexicalRelations[relation].EnsureContainsKey(relatedSynSet, typeof(Dictionary <int, List <int> >)); _lexicalRelations[relation][relatedSynSet].EnsureContainsKey(sourceWordIndex, typeof(List <int>)); if (!_lexicalRelations[relation][relatedSynSet][sourceWordIndex].Contains(targetWordIndex)) { _lexicalRelations[relation][relatedSynSet][sourceWordIndex].Add(targetWordIndex); } } } _instantiated = true; } // release the wordnet engine if we have one...don't need it anymore if (_wordNetEngine != null) { _wordNetEngine = null; } }
public override float CanUnify(Unifiable with) { string re = ComputeInner(); string wnPos = GetAttribValue("wnpos", "").ToLower(); string wnRelation = GetAttribValue("wnrel", "").ToLower(); //Lookup definition for current word we could unify with string wordAttributes = ""; string key = (string)with.ToValue(query).Trim(); if (Proc.wordAttributeHash.Contains(key)) { wordAttributes = (string)Proc.wordAttributeHash[key]; } else { if (Proc.wordAttributeHash.Contains(key.ToLower())) { key = key.ToLower(); wordAttributes = (string)Proc.wordAttributeHash[key]; } } // Can you find a match inside ? var matcher = new Regex(re); if (matcher.IsMatch(wordAttributes)) { return(AND_TRUE); } // Ok, lets try WordNet WordNetEngine ourWordNetEngine = Proc.wordNetEngine; Set <SynSet> synPatternSet = null; // find our POS domain if possible WordNetEngine.POS ourPOS = WordNetEngine.POS.Noun; if (wnPos.Length > 0) { // populate POS list foreach (WordNetEngine.POS p in Enum.GetValues(typeof(WordNetEngine.POS))) { if (p != WordNetEngine.POS.None) { if (p.ToString().ToLower().Equals(wnPos) || p.ToString().ToLower().StartsWith(wnPos)) { ourPOS = p; } } } } if (ourWordNetEngine == null) { writeDebugLine("Wordnet engine not loaded .. returning AND_FALSE"); { return(AND_FALSE); } } try { synPatternSet = ourWordNetEngine.GetSynSets(re, ourPOS); } catch (Exception) { return(AND_FALSE); } if (synPatternSet.Count == 0) { try { synPatternSet = ourWordNetEngine.GetSynSets(re.ToLower(), ourPOS); } catch (Exception) { return(AND_FALSE); } } Set <SynSet> synInputSet = null; try { synInputSet = ourWordNetEngine.GetSynSets(key, ourPOS); } catch (Exception) { return(AND_FALSE); } if (synInputSet.Count == 0) { try { synInputSet = ourWordNetEngine.GetSynSets(key.ToLower(), ourPOS); } catch (Exception) { return(AND_FALSE); } } List <WordNetEngine.SynSetRelation> vlist = new List <WordNetEngine.SynSetRelation>(); //[2]; //vlist[0] = WordNetEngine.SynSetRelation.Hyponym; //vlist[1] = WordNetEngine.SynSetRelation.InstanceHyponym; //vlist[0] = WordNetEngine.SynSetRelation.Hypernym ; //vlist[1] = WordNetEngine.SynSetRelation.InstanceHypernym; if (wnRelation.Length == 0) { vlist.Add(WordNetEngine.SynSetRelation.Hypernym); vlist.Add(WordNetEngine.SynSetRelation.InstanceHypernym); } else { // populate Relation list foreach (WordNetEngine.SynSetRelation r in Enum.GetValues(typeof(WordNetEngine.SynSetRelation))) { if (r != WordNetEngine.SynSetRelation.None) { if (r.ToString().ToLower().Contains(wnRelation)) { vlist.Add(r); } } } } if ((synInputSet.Count > 0) && (synPatternSet.Count > 0)) { foreach (SynSet synDstSet in synInputSet) { foreach (SynSet synSrcSet in synPatternSet) { //synSets.Items.Add(synSet); List <SynSet> linkageList = null; linkageList = synDstSet.GetShortestPathTo(synSrcSet, vlist); if ((linkageList != null) && (linkageList.Count > 0)) { return(AND_TRUE); } } } return(AND_FALSE); } return(AND_FALSE); }
protected virtual SynSet SelectSynset(string word, POS pos) { WordNetEngine.POS wordnetPOS = pos.ForWordnet(); return((wordnetPOS == WordNetEngine.POS.None) ? null : wordnet.GetMostCommonSynSet(word, wordnetPOS)); }
/// <summary> /// Instantiates the current synset. If idSynset is non-null, related synsets references are set to those from /// idSynset; otherwise, related synsets are created as shells. /// </summary> /// <param name="definition">Definition line of synset from data file</param> /// <param name="idSynset">Lookup for related synsets. If null, all related synsets will be created as shells.</param> internal void Instantiate(string definition, Dictionary <string, SynSet> idSynset) { // don't re-instantiate if (_instantiated) { throw new Exception("Synset has already been instantiated"); } /* get lexicographer file name...the enumeration lines up precisely with the wordnet spec (see the lexnames file) except that * it starts with None, so we need to add 1 to the definition line's value to get the correct file name */ int lexicographerFileNumber = int.Parse(GetField(definition, 1)) + 1; if (lexicographerFileNumber <= 0) { throw new Exception("Invalid lexicographer file name number. Should be >= 1."); } _lexicographerFileName = (WordNetEngine.LexicographerFileName)lexicographerFileNumber; // get number of words in the synset and the start character of the word list int wordStart; int numWords = int.Parse(GetField(definition, 3, out wordStart), NumberStyles.HexNumber); wordStart = definition.IndexOf(' ', wordStart) + 1; // get words in synset _words = new List <string>(numWords); for (int i = 0; i < numWords; ++i) { int wordEnd = definition.IndexOf(' ', wordStart + 1) - 1; int wordLen = wordEnd - wordStart + 1; string word = definition.Substring(wordStart, wordLen); if (word.Contains(' ')) { throw new Exception("Unexpected space in word: " + word); } _words.Add(word); // skip lex_id field wordStart = definition.IndexOf(' ', wordEnd + 2) + 1; } // get gloss _gloss = definition.Substring(definition.IndexOf('|') + 1).Trim(); if (_gloss.Contains('|')) { throw new Exception("Unexpected pipe in gloss"); } // get number and start of relations int relationCountField = 3 + (_words.Count * 2) + 1; int relationFieldStart; int numRelations = int.Parse(GetField(definition, relationCountField, out relationFieldStart)); relationFieldStart = definition.IndexOf(' ', relationFieldStart) + 1; // grab each related synset _relationSynSets = new Dictionary <WordNetEngine.SynSetRelation, Set <SynSet> >(); _lexicalRelations = new Dictionary <WordNetEngine.SynSetRelation, Dictionary <SynSet, Dictionary <int, Set <int> > > >(); for (int relationNum = 0; relationNum < numRelations; ++relationNum) { string relationSymbol = null; int relatedSynSetOffset = -1; WordNetEngine.POS relatedSynSetPOS = WordNetEngine.POS.None; int sourceWordIndex = -1; int targetWordIndex = -1; // each relation has four columns for (int relationField = 0; relationField <= 3; ++relationField) { int fieldEnd = definition.IndexOf(' ', relationFieldStart + 1) - 1; int fieldLen = fieldEnd - relationFieldStart + 1; string fieldValue = definition.Substring(relationFieldStart, fieldLen); // relation symbol if (relationField == 0) { relationSymbol = fieldValue; } // related synset offset else if (relationField == 1) { relatedSynSetOffset = int.Parse(fieldValue); } // related synset POS else if (relationField == 2) { relatedSynSetPOS = GetPOS(fieldValue); } // source/target word for lexical relation else if (relationField == 3) { sourceWordIndex = int.Parse(fieldValue.Substring(0, 2), NumberStyles.HexNumber); targetWordIndex = int.Parse(fieldValue.Substring(2), NumberStyles.HexNumber); } else { throw new Exception(); } relationFieldStart = definition.IndexOf(' ', relationFieldStart + 1) + 1; } // get related synset...create shell if we don't have a lookup SynSet relatedSynSet; if (idSynset == null) { relatedSynSet = new SynSet(relatedSynSetPOS, relatedSynSetOffset, _wordNetEngine); } // look up related synset directly else { relatedSynSet = idSynset[relatedSynSetPOS + ":" + relatedSynSetOffset]; } // get relation WordNetEngine.SynSetRelation relation = WordNetEngine.GetSynSetRelation(_pos, relationSymbol); // add semantic relation if we have neither a source nor a target word index if (sourceWordIndex == 0 && targetWordIndex == 0) { _relationSynSets.EnsureContainsKey(relation, typeof(Set <SynSet>)); _relationSynSets[relation].Add(relatedSynSet); } // add lexical relation else { _lexicalRelations.EnsureContainsKey(relation, typeof(Dictionary <SynSet, Dictionary <int, Set <int> > >)); _lexicalRelations[relation].EnsureContainsKey(relatedSynSet, typeof(Dictionary <int, Set <int> >)); _lexicalRelations[relation][relatedSynSet].EnsureContainsKey(sourceWordIndex, typeof(Set <int>)); if (!_lexicalRelations[relation][relatedSynSet][sourceWordIndex].Contains(targetWordIndex)) { _lexicalRelations[relation][relatedSynSet][sourceWordIndex].Add(targetWordIndex); } } } // release the wordnet engine if we have one...don't need it anymore if (_wordNetEngine != null) { _wordNetEngine = null; } _instantiated = true; }
public bool FitsWordnetPOS(WordNetEngine.POS pos) { return(ForWordnet() == pos); }
public string WordNetExpand0(string inputString, bool queryhook) { string[] words = inputString.Split(' '); string returnText = inputString + " "; if (userCachedPending == false) { WNUser2Cache(); } int numWords = words.Length; // Ok, lets try WordNet //WordNetEngine ourWordNetEngine = this.user.bot.wordNetEngine; WordNetEngine.POS ourPOS = WordNetEngine.POS.Noun; List <WordNetEngine.SynSetRelation> vlist = new List <WordNetEngine.SynSetRelation>(); //[2]; //vlist.Add(WordNetEngine.SynSetRelation.Hypernym); //vlist.Add(WordNetEngine.SynSetRelation.InstanceHypernym); vlist.Add(WordNetEngine.SynSetRelation.Hyponym); vlist.Add(WordNetEngine.SynSetRelation.InstanceHyponym); // retrive synsets Set <SynSet> synStartSet = null; try { synStartSet = wordNetEngine.GetSynSets("entity", ourPOS); } catch (Exception) { writeToLog("Invalid Start SynSet ID"); return(returnText); } for (int i = 0; i < numWords; i++) { string focusWord = words[i]; string focusWordResults = ""; if (WNExpandCache.Contains(focusWord)) { focusWordResults = (string)WNExpandCache[focusWord]; } else { Set <SynSet> synDestSet = null; try { synDestSet = wordNetEngine.GetSynSets(focusWord, ourPOS); } catch (Exception) { writeToLog("Invalid Dest SynSet ID"); continue; } int numlinks = 0; if (synStartSet.Count > 0) { //WordNetEngine.SynSetRelation[] vlist = new WordNetEngine.SynSetRelation[2]; //vlist[0] = WordNetEngine.SynSetRelation.Hyponym; //vlist[1] = WordNetEngine.SynSetRelation.InstanceHyponym; foreach (SynSet synSrcSet in synStartSet) { foreach (SynSet synDstSet in synDestSet) { //synSets.Items.Add(synSet); List <SynSet> linkageList = null; linkageList = synSrcSet.GetShortestPathTo(synDstSet, vlist); if ((linkageList != null) && (linkageList.Count > 0)) { foreach (SynSet s in linkageList) { StringBuilder desc = new StringBuilder(); //desc.Append("{"); bool prependComma = false; foreach (string word in s.Words) { desc.Append((prependComma ? ", " : "") + word); prependComma = true; } //desc.Append("}"); //LinkBox.Items.Add(desc.ToString()); focusWordResults = focusWordResults + " " + desc.ToString() + " "; } //LinkBox.Text = "true"; numlinks++; //return; } } } } WNExpandCache.Add(focusWord, focusWordResults.Trim()); //Add to Cache } returnText = returnText + " " + focusWordResults; } returnText = returnText.Trim(); if (queryhook) { if (returnText.Contains("person")) { returnText = returnText + " who"; } if (returnText.Contains("imaginary_being")) { returnText = returnText + " who"; } if (returnText.Contains("causal_agent")) { returnText = returnText + " who"; } if (returnText.Contains("object")) { returnText = returnText + " what"; } if (returnText.Contains("location")) { returnText = returnText + " where"; } if (returnText.Contains("time_period")) { returnText = returnText + " when"; } if (returnText.Contains("amount")) { returnText = returnText + " how much how many"; } if (returnText.Contains("measure")) { returnText = returnText + " how much how many"; } if (returnText.Contains("quantity")) { returnText = returnText + " how much how many"; } } // filter out "stop concepts" which have a > 70% occurance and thus low info content returnText = returnText.Replace("entity", ""); returnText = returnText.Replace("abstraction", ""); returnText = returnText.Replace("abstract", ""); returnText = returnText.Replace("unit", ""); returnText = returnText.Replace("physical", ""); returnText = returnText.Replace("yes", ""); return(returnText.Trim()); }
/// <summary> /// Gets similarity of two strings using the most common synset for given string/pos pairs /// </summary> /// <param name="string1">First string</param> /// <param name="pos1">First POS</param> /// <param name="pos2">Second POS</param> /// <param name="string2">Second string</param> /// <param name="strategy">Similarity strategy to use</param> /// <param name="relations">Relations to use when computing similarity</param> /// <returns>Similarity</returns> public float GetSimilarity(string string1, WordNetEngine.POS pos1, string string2, WordNetEngine.POS pos2, Strategy strategy, params WordNetEngine.SynSetRelation[] relations) { float similarity = 0; if (strategy == Strategy.WuPalmer1994Average) { // get average similarity across all synsets int numScores = 0; foreach (SynSet synset1 in _wordNetEngine.GetSynSets(string1, pos1)) { foreach (SynSet synset2 in _wordNetEngine.GetSynSets(string2, pos2)) { similarity += GetSimilarity(synset1, synset2, strategy, relations); ++numScores; } } if (numScores > 0) { similarity = similarity / (float)numScores; } } else if (strategy == Strategy.WuPalmer1994Maximum) { // get maximum similarity across all synsets foreach (SynSet synset1 in _wordNetEngine.GetSynSets(string1, pos1)) { foreach (SynSet synset2 in _wordNetEngine.GetSynSets(string2, pos2)) { float currSim = GetSimilarity(synset1, synset2, strategy, relations); if (currSim > similarity) { similarity = currSim; } } } } else if (strategy == Strategy.WuPalmer1994Minimum) { // get minimum similarity across all synsets similarity = -1; foreach (SynSet synset1 in _wordNetEngine.GetSynSets(string1, pos1)) { foreach (SynSet synset2 in _wordNetEngine.GetSynSets(string2, pos2)) { float currSim = GetSimilarity(synset1, synset2, strategy, relations); if (similarity == -1 || currSim < similarity) { similarity = currSim; } } } // if we didn't find any synsets, similarity is zero if (similarity == -1) { similarity = 0; } } else if (strategy == Strategy.WuPalmer1994MostCommon) { // use most common synsets SynSet synset1 = _wordNetEngine.GetMostCommonSynSet(string1, pos1); SynSet synset2 = _wordNetEngine.GetMostCommonSynSet(string2, pos2); if (synset1 != null && synset2 != null) { similarity = GetSimilarity(synset1, synset2, strategy, relations); } } else { throw new NotImplementedException("Unimplemented strategy: " + strategy); } if (similarity < 0 || similarity > 1) { throw new Exception("Invalid similarity: " + similarity); } return(similarity); }