Esempio n. 1
0
        private static string[] lookupSynonyms(Wnlib.Index index)
        {
            // OVERVIEW: For each sense, grab the synset associated with our index.
            //           Then, add the lexemes in the synset to a list.

            var synonyms = new ArrayList(10);

            // for each sense...
            for (int s = 0; s < index.SynsetOffsets.Length; s++)
            {
                // read in the word and its pointers
                var synset = new Wnlib.SynSet(index.SynsetOffsets[s], index.PartOfSpeech, index.Wd, null, s);

                // build a string out of the words
                for (int i = 0; i < synset.words.Length; i++)
                {
                    string word = synset.words[i].word.Replace("_", " ");

                    // if the word is capitalized, that means it's a proper noun. We don't want those.
                    if (word[0] <= 'Z')
                    {
                        continue;
                    }

                    // add it to the list if it's a different word
                    if (string.Compare(word, index.Wd, true) != 0)
                    {
                        synonyms.Add(word);
                    }
                }
            }

            return((string[])synonyms.ToArray(typeof(string)));
        }
Esempio n. 2
0
        private static WordInfo lookupWord(string word)
        {
            // OVERVIEW: For each part of speech, look for the word.
            //           Compare relative strengths of the synsets in each category
            //			 to determine the most probable part of speech.
            //
            // PROBLEM:  Word definitions are often context-based. It would be better
            //           to find a way to search in-context in stead of just singling
            //           out an individual word.
            //
            // SOLUTION: Modify FindPartOfSpeech to include a second argument, string
            //           context. The pass the entire sentence as the context for part
            //           of speech determination.
            //
            // PROBLEM:  That's difficult to do so I'm going to keep this simple for now.

            int      maxCount = 0;
            WordInfo wordinfo = new WordInfo();

            wordinfo.partOfSpeech = Wnlib.PartsOfSpeech.Unknown;

            // for each part of speech...
            var enums = (Wnlib.PartsOfSpeech[])Enum.GetValues(typeof(Wnlib.PartsOfSpeech));

            wordinfo.SenseCounts = new int[enums.Length];
            for (int i = 0; i < enums.Length; i++)
            {
                // get a valid part of speech
                Wnlib.PartsOfSpeech pos = enums[i];
                if (pos == Wnlib.PartsOfSpeech.Unknown)
                {
                    continue;
                }

                // get an index to a synset collection
                Wnlib.Index index = Wnlib.Index.lookup(word, Wnlib.PartOfSpeech.of(pos));

                // none found?
                if (index == null)
                {
                    continue;
                }

                // does this part of speech have a higher sense count?
                wordinfo.SenseCounts[i] = index.SenseCnt;
                if (wordinfo.SenseCounts[i] > maxCount)
                {
                    maxCount = wordinfo.SenseCounts[i];
                    wordinfo.partOfSpeech = pos;
                }
            }

            return(wordinfo);
        }
Esempio n. 3
0
        /// <summary>Returns a list of Synonyms for a given word</summary>
        /// <param name="word">the word</param>
        /// <param name="pos">The Part of speech of a word</param>
        /// <param name="includeMorphs">include morphology? (fuzzy matching)</param>
        /// <returns>An array of strings containing the synonyms found</returns>
        /// <remarks>
        /// Note that my usage of 'Synonyms' here is not the same as hypernyms as defined by
        /// WordNet. Synonyms in this sense are merely words in the same SynSet as the given
        /// word. Hypernyms are found by tracing the pointers in a given synset.
        /// </remarks>
        public static string[] FindSynonyms(string word, Wnlib.PartsOfSpeech pos, bool includeMorphs)
        {
            // get an index to a synset collection
            word = word.ToLower();
            Wnlib.Index index = Wnlib.Index.lookup(word, Wnlib.PartOfSpeech.of(pos));

            // none found?
            if (index == null)
            {
                if (!includeMorphs)
                {
                    return(null);
                }

                // check morphs
                var    morphs = new Wnlib.MorphStr(word, Wnlib.PartOfSpeech.of(pos));
                string morph  = "";
                while ((morph = morphs.next()) != null)
                {
                    index = Wnlib.Index.lookup(morph, Wnlib.PartOfSpeech.of(pos));
                    if (index != null)
                    {
                        break;
                    }
                }
            }

            // still none found?
            if (index == null)
            {
                return(null);
            }

            // at this point we will always have a valid index
            return(lookupSynonyms(index));
        }
Esempio n. 4
0
        private static WordInfo LookupWordMorphs(string word)
        {
            // OVERVIEW: This functions only gets called when the word was not found with
            //           an exact match. So, enumerate all the parts of speech, then enumerate
            //           all of the word's morphs in each category. Perform a lookup on each
            //           morph and save the morph/strength/part-of-speech data sets. Finally,
            //           loop over all the data sets and then pick the strongest one.

            ArrayList wordinfos = new ArrayList();

            // for each part of speech...
            for (int i = 0; i < Enums.Length; i++)
            {
                // get a valid part of speech
                Wnlib.PartsOfSpeech pos = Enums[i];
                if (pos == Wnlib.PartsOfSpeech.Unknown)
                {
                    continue;
                }

                // generate morph list
                Wnlib.MorphStr morphs = new Wnlib.MorphStr(word, Wnlib.PartOfSpeech.of(pos));
                string         morph  = "";
                while ((morph = morphs.next()) != null)
                {
                    // get an index to a synset collection
                    Wnlib.Index index = Wnlib.Index.lookup(morph, Wnlib.PartOfSpeech.of(pos));

                    // none found?
                    if (index == null)
                    {
                        continue;
                    }

                    // save the wordinfo
                    WordInfo wordinfo = GetMorphInfo(wordinfos, morph);
                    wordinfo.SenseCounts[i] = index.SenseCnt;
                }
            }

            // search the wordinfo list for the best match
            WordInfo bestWordInfo = new WordInfo();
            int      maxStrength  = 0;

            foreach (WordInfo wordinfo in wordinfos)
            {
                // for each part of speech...
                int maxSenseCount = 0;
                int strength      = 0;
                for (int i = 0; i < Enums.Length; i++)
                {
                    // get a valid part of speech
                    Wnlib.PartsOfSpeech pos = Enums[i];
                    if (pos == Wnlib.PartsOfSpeech.Unknown)
                    {
                        continue;
                    }

                    // determine part of speech and strength
                    strength += wordinfo.SenseCounts[i];
                    if (wordinfo.SenseCounts[i] > maxSenseCount)
                    {
                        maxSenseCount         = wordinfo.SenseCounts[i];
                        wordinfo.partOfSpeech = pos;
                    }
                }

                // best match?
                if (strength > maxStrength)
                {
                    maxStrength  = strength;
                    bestWordInfo = wordinfo;
                }
            }

            return(bestWordInfo);
        }
Esempio n. 5
0
        private static WordInfo lookupWordMorphs(string word, bool tagged_only)
        {
            // OVERVIEW: This functions only gets called when the word was not found with
            //           an exact match. So, enumerate all the parts of speech, then enumerate
            //           all of the word's morphs in each category. Perform a lookup on each
            //           morph and save the morph/strength/part-of-speech data sets. Finally,
            //           loop over all the data sets and then pick the strongest one.

            ArrayList wordinfos = new ArrayList();

            // for each part of speech...
            for (int i = 0; i < enums.Length; i++)
            {
                // get a valid part of speech
                Wnlib.PartsOfSpeech pos = enums[i];
                if (pos == Wnlib.PartsOfSpeech.Unknown)
                {
                    continue;
                }

                // generate morph list
                Wnlib.MorphStr morphs = new Wnlib.MorphStr(word, Wnlib.PartOfSpeech.of(pos));
                string         morph  = "";
                while ((morph = morphs.next()) != null)
                {
                    // get an index to a synset collection
                    Wnlib.Index index = Wnlib.Index.lookup(morph, Wnlib.PartOfSpeech.of(pos));

                    // none found?
                    if (index == null)
                    {
                        continue;
                    }
                    // none tagged
                    if (tagged_only && index.tagsense_cnt == 0)
                    {
                        continue;
                    }

                    // save the wordinfo
                    WordInfo wordinfo = getMorphInfo(wordinfos, morph);
                    if (tagged_only)
                    {
                        wordinfo.senseCounts[i] = index.tagsense_cnt;
                    }
                    else
                    {
                        wordinfo.senseCounts[i] = index.sense_cnt;
                    }
                }
            }

            return(WordInfo.Compine(wordinfos));

/*
 *                      // search the wordinfo list for the best match
 *                      WordInfo bestWordInfo = new WordInfo();
 *                      int maxStrength = 0;
 *                      foreach( WordInfo wordinfo in wordinfos )
 *                      {
 *                              // for each part of speech...
 *                              int maxSenseCount = 0;
 *                              int strength = 0;
 *                              for( int i=0; i<enums.Length; i++ )
 *                              {
 *                                      // get a valid part of speech
 *                                      Wnlib.PartsOfSpeech pos = enums[i];
 *                                      if( pos == Wnlib.PartsOfSpeech.Unknown )
 *                                              continue;
 *
 *                                      // determine part of speech and strength
 *                                      strength += wordinfo.senseCounts[i];
 *                                      if( wordinfo.senseCounts[i] > maxSenseCount )
 *                                      {
 *                                              maxSenseCount = wordinfo.senseCounts[i];
 *                                              wordinfo.partOfSpeech = pos;
 *                                      }
 *                              }
 *
 *                              // best match?
 *                              if( strength > maxStrength )
 *                              {
 *                                      maxStrength = strength;
 *                                      bestWordInfo = wordinfo;
 *                              }
 *                      }
 *
 *                      return bestWordInfo;
 */
        }