private static string[] lookupSynonyms(Wnlib.Index index) { // OVERVIEW: For each sense, grab the synset associated with our index. // Then, add the lexemes in the synset to a list. var synonyms = new ArrayList(10); // for each sense... for (int s = 0; s < index.SynsetOffsets.Length; s++) { // read in the word and its pointers var synset = new Wnlib.SynSet(index.SynsetOffsets[s], index.PartOfSpeech, index.Wd, null, s); // build a string out of the words for (int i = 0; i < synset.words.Length; i++) { string word = synset.words[i].word.Replace("_", " "); // if the word is capitalized, that means it's a proper noun. We don't want those. if (word[0] <= 'Z') { continue; } // add it to the list if it's a different word if (string.Compare(word, index.Wd, true) != 0) { synonyms.Add(word); } } } return((string[])synonyms.ToArray(typeof(string))); }
private static WordInfo lookupWord(string word) { // OVERVIEW: For each part of speech, look for the word. // Compare relative strengths of the synsets in each category // to determine the most probable part of speech. // // PROBLEM: Word definitions are often context-based. It would be better // to find a way to search in-context in stead of just singling // out an individual word. // // SOLUTION: Modify FindPartOfSpeech to include a second argument, string // context. The pass the entire sentence as the context for part // of speech determination. // // PROBLEM: That's difficult to do so I'm going to keep this simple for now. int maxCount = 0; WordInfo wordinfo = new WordInfo(); wordinfo.partOfSpeech = Wnlib.PartsOfSpeech.Unknown; // for each part of speech... var enums = (Wnlib.PartsOfSpeech[])Enum.GetValues(typeof(Wnlib.PartsOfSpeech)); wordinfo.SenseCounts = new int[enums.Length]; for (int i = 0; i < enums.Length; i++) { // get a valid part of speech Wnlib.PartsOfSpeech pos = enums[i]; if (pos == Wnlib.PartsOfSpeech.Unknown) { continue; } // get an index to a synset collection Wnlib.Index index = Wnlib.Index.lookup(word, Wnlib.PartOfSpeech.of(pos)); // none found? if (index == null) { continue; } // does this part of speech have a higher sense count? wordinfo.SenseCounts[i] = index.SenseCnt; if (wordinfo.SenseCounts[i] > maxCount) { maxCount = wordinfo.SenseCounts[i]; wordinfo.partOfSpeech = pos; } } return(wordinfo); }
/// <summary>Returns a list of Synonyms for a given word</summary> /// <param name="word">the word</param> /// <param name="pos">The Part of speech of a word</param> /// <param name="includeMorphs">include morphology? (fuzzy matching)</param> /// <returns>An array of strings containing the synonyms found</returns> /// <remarks> /// Note that my usage of 'Synonyms' here is not the same as hypernyms as defined by /// WordNet. Synonyms in this sense are merely words in the same SynSet as the given /// word. Hypernyms are found by tracing the pointers in a given synset. /// </remarks> public static string[] FindSynonyms(string word, Wnlib.PartsOfSpeech pos, bool includeMorphs) { // get an index to a synset collection word = word.ToLower(); Wnlib.Index index = Wnlib.Index.lookup(word, Wnlib.PartOfSpeech.of(pos)); // none found? if (index == null) { if (!includeMorphs) { return(null); } // check morphs var morphs = new Wnlib.MorphStr(word, Wnlib.PartOfSpeech.of(pos)); string morph = ""; while ((morph = morphs.next()) != null) { index = Wnlib.Index.lookup(morph, Wnlib.PartOfSpeech.of(pos)); if (index != null) { break; } } } // still none found? if (index == null) { return(null); } // at this point we will always have a valid index return(lookupSynonyms(index)); }
private static WordInfo LookupWordMorphs(string word) { // OVERVIEW: This functions only gets called when the word was not found with // an exact match. So, enumerate all the parts of speech, then enumerate // all of the word's morphs in each category. Perform a lookup on each // morph and save the morph/strength/part-of-speech data sets. Finally, // loop over all the data sets and then pick the strongest one. ArrayList wordinfos = new ArrayList(); // for each part of speech... for (int i = 0; i < Enums.Length; i++) { // get a valid part of speech Wnlib.PartsOfSpeech pos = Enums[i]; if (pos == Wnlib.PartsOfSpeech.Unknown) { continue; } // generate morph list Wnlib.MorphStr morphs = new Wnlib.MorphStr(word, Wnlib.PartOfSpeech.of(pos)); string morph = ""; while ((morph = morphs.next()) != null) { // get an index to a synset collection Wnlib.Index index = Wnlib.Index.lookup(morph, Wnlib.PartOfSpeech.of(pos)); // none found? if (index == null) { continue; } // save the wordinfo WordInfo wordinfo = GetMorphInfo(wordinfos, morph); wordinfo.SenseCounts[i] = index.SenseCnt; } } // search the wordinfo list for the best match WordInfo bestWordInfo = new WordInfo(); int maxStrength = 0; foreach (WordInfo wordinfo in wordinfos) { // for each part of speech... int maxSenseCount = 0; int strength = 0; for (int i = 0; i < Enums.Length; i++) { // get a valid part of speech Wnlib.PartsOfSpeech pos = Enums[i]; if (pos == Wnlib.PartsOfSpeech.Unknown) { continue; } // determine part of speech and strength strength += wordinfo.SenseCounts[i]; if (wordinfo.SenseCounts[i] > maxSenseCount) { maxSenseCount = wordinfo.SenseCounts[i]; wordinfo.partOfSpeech = pos; } } // best match? if (strength > maxStrength) { maxStrength = strength; bestWordInfo = wordinfo; } } return(bestWordInfo); }
private static WordInfo lookupWordMorphs(string word, bool tagged_only) { // OVERVIEW: This functions only gets called when the word was not found with // an exact match. So, enumerate all the parts of speech, then enumerate // all of the word's morphs in each category. Perform a lookup on each // morph and save the morph/strength/part-of-speech data sets. Finally, // loop over all the data sets and then pick the strongest one. ArrayList wordinfos = new ArrayList(); // for each part of speech... for (int i = 0; i < enums.Length; i++) { // get a valid part of speech Wnlib.PartsOfSpeech pos = enums[i]; if (pos == Wnlib.PartsOfSpeech.Unknown) { continue; } // generate morph list Wnlib.MorphStr morphs = new Wnlib.MorphStr(word, Wnlib.PartOfSpeech.of(pos)); string morph = ""; while ((morph = morphs.next()) != null) { // get an index to a synset collection Wnlib.Index index = Wnlib.Index.lookup(morph, Wnlib.PartOfSpeech.of(pos)); // none found? if (index == null) { continue; } // none tagged if (tagged_only && index.tagsense_cnt == 0) { continue; } // save the wordinfo WordInfo wordinfo = getMorphInfo(wordinfos, morph); if (tagged_only) { wordinfo.senseCounts[i] = index.tagsense_cnt; } else { wordinfo.senseCounts[i] = index.sense_cnt; } } } return(WordInfo.Compine(wordinfos)); /* * // search the wordinfo list for the best match * WordInfo bestWordInfo = new WordInfo(); * int maxStrength = 0; * foreach( WordInfo wordinfo in wordinfos ) * { * // for each part of speech... * int maxSenseCount = 0; * int strength = 0; * for( int i=0; i<enums.Length; i++ ) * { * // get a valid part of speech * Wnlib.PartsOfSpeech pos = enums[i]; * if( pos == Wnlib.PartsOfSpeech.Unknown ) * continue; * * // determine part of speech and strength * strength += wordinfo.senseCounts[i]; * if( wordinfo.senseCounts[i] > maxSenseCount ) * { * maxSenseCount = wordinfo.senseCounts[i]; * wordinfo.partOfSpeech = pos; * } * } * * // best match? * if( strength > maxStrength ) * { * maxStrength = strength; * bestWordInfo = wordinfo; * } * } * * return bestWordInfo; */ }