private static WSDWordInfo lookupWord(string word) { // OVERVIEW: For each part of speech, look for the word. // Compare relative strengths of the synsets in each category // to determine the most probable part of speech. // // PROBLEM: Word definitions are often context-based. It would be better // to find a way to search in-context in stead of just singling // out an individual word. // // SOLUTION: Modify FindPartOfSpeech to include a second argument, string // context. The pass the entire sentence as the context for part // of speech determination. // // PROBLEM: That's difficult to do so I'm going to keep this simple for now. int maxCount = 0; WSDWordInfo wordinfo = new WSDWordInfo(); wordinfo.partOfSpeech = Wnlib.PartsOfSpeech.Unknown; // for each part of speech... Wnlib.PartsOfSpeech[] enums = (Wnlib.PartsOfSpeech[])Enum.GetValues(typeof(Wnlib.PartsOfSpeech)); wordinfo.senseCounts = new int[enums.Length]; for (int i = 0; i < enums.Length; i++) { // get a valid part of speech Wnlib.PartsOfSpeech pos = enums[i]; if (pos == Wnlib.PartsOfSpeech.Unknown) { continue; } // get an index to a synset collection Wnlib.Index index = Wnlib.Index.lookup(word, Wnlib.PartOfSpeech.of(pos)); // none found? if (index == null) { continue; } // does this part of speech have a higher sense count? wordinfo.senseCounts[i] = index.sense_cnt; if (wordinfo.senseCounts[i] > maxCount) { maxCount = wordinfo.senseCounts[i]; wordinfo.partOfSpeech = pos; } } return(wordinfo); }
/*-------- * Methods *--------*/ /// <summary>Finds the part of speech for a given single word</summary> /// <param name="word">the word</param> /// <param name="includeMorphs">include morphology? (fuzzy matching)</param> /// <returns>a structure containing information about the word</returns> /// <remarks> /// This function is designed to determine the part of speech of a word. Since all /// of the WordNet search functions require the part of speech, this function will be useful /// in cases when the part of speech of a word is not known. It is not 100% correct /// because WordNet was most likely not intended to be used this way. However, it is /// accurate enough for most applications. /// </remarks> public static WSDWordInfo FindWordInfo(string word, bool includeMorphs) { word = word.ToLower(); WSDWordInfo wordinfo = lookupWord(word); // include morphology if nothing was found on the original word if (wordinfo.Strength == 0 && includeMorphs) { wordinfo = lookupWordMorphs(word); } return(wordinfo); }
private static WSDWordInfo getMorphInfo(ArrayList morphinfos, string morph) { // Attempt to find the morph string in the list. // NOTE: Since the list should never get very large, a selection search will work just fine foreach (WSDWordInfo morphinfo in morphinfos) { if (morphinfo.text == morph) { return(morphinfo); } } // if not found, create a new one WSDWordInfo wordinfo = new WSDWordInfo(); wordinfo.text = morph; wordinfo.senseCounts = new int[enums.Length]; return((WSDWordInfo)morphinfos[morphinfos.Add(wordinfo)]); }
private static WSDWordInfo lookupWordMorphs(string word) { // OVERVIEW: This functions only gets called when the word was not found with // an exact match. So, enumerate all the parts of speech, then enumerate // all of the word's morphs in each category. Perform a lookup on each // morph and save the morph/strength/part-of-speech data sets. Finally, // loop over all the data sets and then pick the strongest one. ArrayList wordinfos = new ArrayList(); // for each part of speech... for (int i = 0; i < enums.Length; i++) { // get a valid part of speech Wnlib.PartsOfSpeech pos = enums[i]; if (pos == Wnlib.PartsOfSpeech.Unknown) { continue; } // generate morph list Wnlib.MorphStr morphs = new Wnlib.MorphStr(word, Wnlib.PartOfSpeech.of(pos)); string morph = ""; while ((morph = morphs.next()) != null) { // get an index to a synset collection Wnlib.Index index = Wnlib.Index.lookup(morph, Wnlib.PartOfSpeech.of(pos)); // none found? if (index == null) { continue; } // save the wordinfo WSDWordInfo wordinfo = getMorphInfo(wordinfos, morph); wordinfo.senseCounts[i] = index.sense_cnt; } } // search the wordinfo list for the best match WSDWordInfo bestWordInfo = new WSDWordInfo(); int maxStrength = 0; foreach (WSDWordInfo wordinfo in wordinfos) { // for each part of speech... int maxSenseCount = 0; int strength = 0; for (int i = 0; i < enums.Length; i++) { // get a valid part of speech Wnlib.PartsOfSpeech pos = enums[i]; if (pos == Wnlib.PartsOfSpeech.Unknown) { continue; } // determine part of speech and strength strength += wordinfo.senseCounts[i]; if (wordinfo.senseCounts[i] > maxSenseCount) { maxSenseCount = wordinfo.senseCounts[i]; wordinfo.partOfSpeech = pos; } } // best match? if (strength > maxStrength) { maxStrength = strength; bestWordInfo = wordinfo; } } return(bestWordInfo); }