Beispiel #1
0
        private static WSDWordInfo lookupWord(string word)
        {
            // OVERVIEW: For each part of speech, look for the word.
            //           Compare relative strengths of the synsets in each category
            //			 to determine the most probable part of speech.
            //
            // PROBLEM:  Word definitions are often context-based. It would be better
            //           to find a way to search in-context in stead of just singling
            //           out an individual word.
            //
            // SOLUTION: Modify FindPartOfSpeech to include a second argument, string
            //           context. The pass the entire sentence as the context for part
            //           of speech determination.
            //
            // PROBLEM:  That's difficult to do so I'm going to keep this simple for now.

            int         maxCount = 0;
            WSDWordInfo wordinfo = new WSDWordInfo();

            wordinfo.partOfSpeech = Wnlib.PartsOfSpeech.Unknown;

            // for each part of speech...
            Wnlib.PartsOfSpeech[] enums = (Wnlib.PartsOfSpeech[])Enum.GetValues(typeof(Wnlib.PartsOfSpeech));
            wordinfo.senseCounts = new int[enums.Length];
            for (int i = 0; i < enums.Length; i++)
            {
                // get a valid part of speech
                Wnlib.PartsOfSpeech pos = enums[i];
                if (pos == Wnlib.PartsOfSpeech.Unknown)
                {
                    continue;
                }

                // get an index to a synset collection
                Wnlib.Index index = Wnlib.Index.lookup(word, Wnlib.PartOfSpeech.of(pos));

                // none found?
                if (index == null)
                {
                    continue;
                }

                // does this part of speech have a higher sense count?
                wordinfo.senseCounts[i] = index.sense_cnt;
                if (wordinfo.senseCounts[i] > maxCount)
                {
                    maxCount = wordinfo.senseCounts[i];
                    wordinfo.partOfSpeech = pos;
                }
            }

            return(wordinfo);
        }
Beispiel #2
0
        /*--------
         * Methods
         *--------*/

        /// <summary>Finds the part of speech for a given single word</summary>
        /// <param name="word">the word</param>
        /// <param name="includeMorphs">include morphology? (fuzzy matching)</param>
        /// <returns>a structure containing information about the word</returns>
        /// <remarks>
        /// This function is designed to determine the part of speech of a word. Since all
        /// of the WordNet search functions require the part of speech, this function will be useful
        /// in cases when the part of speech of a word is not known. It is not 100% correct
        /// because WordNet was most likely not intended to be used this way. However, it is
        /// accurate enough for most applications.
        /// </remarks>
        public static WSDWordInfo FindWordInfo(string word, bool includeMorphs)
        {
            word = word.ToLower();
            WSDWordInfo wordinfo = lookupWord(word);

            // include morphology if nothing was found on the original word
            if (wordinfo.Strength == 0 && includeMorphs)
            {
                wordinfo = lookupWordMorphs(word);
            }

            return(wordinfo);
        }
Beispiel #3
0
        private static WSDWordInfo getMorphInfo(ArrayList morphinfos, string morph)
        {
            // Attempt to find the morph string in the list.
            // NOTE: Since the list should never get very large, a selection search will work just fine
            foreach (WSDWordInfo morphinfo in morphinfos)
            {
                if (morphinfo.text == morph)
                {
                    return(morphinfo);
                }
            }

            // if not found, create a new one
            WSDWordInfo wordinfo = new WSDWordInfo();

            wordinfo.text        = morph;
            wordinfo.senseCounts = new int[enums.Length];
            return((WSDWordInfo)morphinfos[morphinfos.Add(wordinfo)]);
        }
Beispiel #4
0
        private static WSDWordInfo lookupWordMorphs(string word)
        {
            // OVERVIEW: This functions only gets called when the word was not found with
            //           an exact match. So, enumerate all the parts of speech, then enumerate
            //           all of the word's morphs in each category. Perform a lookup on each
            //           morph and save the morph/strength/part-of-speech data sets. Finally,
            //           loop over all the data sets and then pick the strongest one.

            ArrayList wordinfos = new ArrayList();

            // for each part of speech...
            for (int i = 0; i < enums.Length; i++)
            {
                // get a valid part of speech
                Wnlib.PartsOfSpeech pos = enums[i];
                if (pos == Wnlib.PartsOfSpeech.Unknown)
                {
                    continue;
                }

                // generate morph list
                Wnlib.MorphStr morphs = new Wnlib.MorphStr(word, Wnlib.PartOfSpeech.of(pos));
                string         morph  = "";
                while ((morph = morphs.next()) != null)
                {
                    // get an index to a synset collection
                    Wnlib.Index index = Wnlib.Index.lookup(morph, Wnlib.PartOfSpeech.of(pos));

                    // none found?
                    if (index == null)
                    {
                        continue;
                    }

                    // save the wordinfo
                    WSDWordInfo wordinfo = getMorphInfo(wordinfos, morph);
                    wordinfo.senseCounts[i] = index.sense_cnt;
                }
            }

            // search the wordinfo list for the best match
            WSDWordInfo bestWordInfo = new WSDWordInfo();
            int         maxStrength  = 0;

            foreach (WSDWordInfo wordinfo in wordinfos)
            {
                // for each part of speech...
                int maxSenseCount = 0;
                int strength      = 0;
                for (int i = 0; i < enums.Length; i++)
                {
                    // get a valid part of speech
                    Wnlib.PartsOfSpeech pos = enums[i];
                    if (pos == Wnlib.PartsOfSpeech.Unknown)
                    {
                        continue;
                    }

                    // determine part of speech and strength
                    strength += wordinfo.senseCounts[i];
                    if (wordinfo.senseCounts[i] > maxSenseCount)
                    {
                        maxSenseCount         = wordinfo.senseCounts[i];
                        wordinfo.partOfSpeech = pos;
                    }
                }

                // best match?
                if (strength > maxStrength)
                {
                    maxStrength  = strength;
                    bestWordInfo = wordinfo;
                }
            }

            return(bestWordInfo);
        }