Beispiel #1
0
        public static WordInfo Compine(ArrayList morphinfos)
        {
            WordInfo res = new WordInfo();

            res.senseCounts = new int[5];
            res.texts       = new ArrayList();
            foreach (WordInfo morph in morphinfos)
            {
                for (int i = 1; i < 5; i++)
                {
                    res.senseCounts[i] += morph.senseCounts[i];
                }
                if (!res.texts.Contains(morph.texts[0]))
                {
                    res.texts.Add(morph.texts[0]);
                }
            }
            return(res);
        }
Beispiel #2
0
        private static WordInfo GetMorphInfo(ArrayList morphinfos, string morph)
        {
            // Attempt to find the morph string in the list.
            // NOTE: Since the list should never get very large, a selection search will work just fine
            foreach (WordInfo morphinfo in morphinfos)
            {
                if (morphinfo.text == morph)
                {
                    return(morphinfo);
                }
            }

            // if not found, create a new one
            WordInfo wordinfo = new WordInfo();

            wordinfo.text        = morph;
            wordinfo.SenseCounts = new int[Enums.Length];
            return((WordInfo)morphinfos[morphinfos.Add(wordinfo)]);
        }
Beispiel #3
0
        public static WordInfo Compine(WordInfo word, WordInfo morph)
        {
            if (word.Strength == 0)
            {
                return(morph);
            }
            if (morph.Strength == 0)
            {
                return(word);
            }
            WordInfo result = new WordInfo();

            result.texts = new ArrayList();
            result.texts.AddRange(morph.texts);
            if (!result.texts.Contains(word.texts[0]))
            {
                result.texts.Add(word.texts[0]);
            }
            int MaxLen = Math.Max(word.senseCounts.Length, morph.senseCounts.Length);

            result.senseCounts = new int[MaxLen];
            for (int i = 0; i < MaxLen; i++)
            {
                result.senseCounts[i] = word.senseCounts[i] + morph.senseCounts[i];
            }
            Wnlib.PartsOfSpeech[] enums =
                (Wnlib.PartsOfSpeech[])Enum.GetValues(typeof(Wnlib.PartsOfSpeech));
            int MaxCount = 0;

            result.partOfSpeech = Wnlib.PartsOfSpeech.Unknown;
            for (int i = 0; i < enums.Length; i++)
            {
                if (result.senseCounts[i] > MaxCount)
                {
                    MaxCount            = result.senseCounts[i];
                    result.partOfSpeech = enums[i];
                }
            }
            return(result);
        }
		private static WordInfo getMorphInfo(ArrayList morphinfos, string morph)
		{
			// Attempt to find the morph string in the list.
			// NOTE: Since the list should never get very large, a selection search will work just fine
			foreach (WordInfo morphinfo in morphinfos)
				if (morphinfo.text == morph)
					return morphinfo;

			// if not found, create a new one
			WordInfo wordinfo = new WordInfo();
			wordinfo.text = morph;
			wordinfo.senseCounts = new int[enums.Length];
			return (WordInfo)morphinfos[morphinfos.Add(wordinfo)];
		}
		private static WordInfo lookupWordMorphs(string word)
		{
			// OVERVIEW: This functions only gets called when the word was not found with
			//           an exact match. So, enumerate all the parts of speech, then enumerate
			//           all of the word's morphs in each category. Perform a lookup on each
			//           morph and save the morph/strength/part-of-speech data sets. Finally,
			//           loop over all the data sets and then pick the strongest one.

			ArrayList wordinfos = new ArrayList();

			// for each part of speech...
			for (int i = 0; i < enums.Length; i++)
			{
				// get a valid part of speech
				Wnlib.PartsOfSpeech pos = enums[i];
				if (pos == Wnlib.PartsOfSpeech.Unknown)
					continue;

				// generate morph list
				Wnlib.MorphStr morphs = new Wnlib.MorphStr(word, Wnlib.PartOfSpeech.of(pos));
				string morph = "";
				while ((morph = morphs.next()) != null)
				{
					// get an index to a synset collection
					Wnlib.Index index = Wnlib.Index.lookup(morph, Wnlib.PartOfSpeech.of(pos));

					// none found?
					if (index == null)
						continue;

					// save the wordinfo
					WordInfo wordinfo = getMorphInfo(wordinfos, morph);
					wordinfo.senseCounts[i] = index.sense_cnt;
				}
			}

			// search the wordinfo list for the best match
			WordInfo bestWordInfo = new WordInfo();
			int maxStrength = 0;
			foreach (WordInfo wordinfo in wordinfos)
			{
				// for each part of speech...
				int maxSenseCount = 0;
				int strength = 0;
				for (int i = 0; i < enums.Length; i++)
				{
					// get a valid part of speech
					Wnlib.PartsOfSpeech pos = enums[i];
					if (pos == Wnlib.PartsOfSpeech.Unknown)
						continue;

					// determine part of speech and strength
					strength += wordinfo.senseCounts[i];
					if (wordinfo.senseCounts[i] > maxSenseCount)
					{
						maxSenseCount = wordinfo.senseCounts[i];
						wordinfo.partOfSpeech = pos;
					}
				}

				// best match?
				if (strength > maxStrength)
				{
					maxStrength = strength;
					bestWordInfo = wordinfo;
				}
			}

			return bestWordInfo;
		}
		private static WordInfo lookupWord(string word)
		{
			// OVERVIEW: For each part of speech, look for the word.
			//           Compare relative strengths of the synsets in each category
			//			 to determine the most probable part of speech.
			//
			// PROBLEM:  Word definitions are often context-based. It would be better
			//           to find a way to search in-context in stead of just singling
			//           out an individual word.
			//
			// SOLUTION: Modify FindPartOfSpeech to include a second argument, string
			//           context. The pass the entire sentence as the context for part
			//           of speech determination.
			//
			// PROBLEM:  That's difficult to do so I'm going to keep this simple for now.

			int maxCount = 0;
			WordInfo wordinfo = new WordInfo();
			wordinfo.partOfSpeech = Wnlib.PartsOfSpeech.Unknown;

			// for each part of speech...
			Wnlib.PartsOfSpeech[] enums = (Wnlib.PartsOfSpeech[])Enum.GetValues(typeof(Wnlib.PartsOfSpeech));
			wordinfo.senseCounts = new int[enums.Length];
			for (int i = 0; i < enums.Length; i++)
			{
				// get a valid part of speech
				Wnlib.PartsOfSpeech pos = enums[i];
				if (pos == Wnlib.PartsOfSpeech.Unknown)
					continue;

				// get an index to a synset collection
				Wnlib.Index index = Wnlib.Index.lookup(word, Wnlib.PartOfSpeech.of(pos));

				// none found?
				if (index == null)
					continue;

				// does this part of speech have a higher sense count?
				wordinfo.senseCounts[i] = index.sense_cnt;
				if (wordinfo.senseCounts[i] > maxCount)
				{
					maxCount = wordinfo.senseCounts[i];
					wordinfo.partOfSpeech = pos;
				}
			}

			return wordinfo;
		}
Beispiel #7
0
        private static WordInfo LookupWordMorphs(string word)
        {
            // OVERVIEW: This functions only gets called when the word was not found with
            //           an exact match. So, enumerate all the parts of speech, then enumerate
            //           all of the word's morphs in each category. Perform a lookup on each
            //           morph and save the morph/strength/part-of-speech data sets. Finally,
            //           loop over all the data sets and then pick the strongest one.

            ArrayList wordinfos = new ArrayList();

            // for each part of speech...
            for (int i = 0; i < Enums.Length; i++)
            {
                // get a valid part of speech
                Wnlib.PartsOfSpeech pos = Enums[i];
                if (pos == Wnlib.PartsOfSpeech.Unknown)
                {
                    continue;
                }

                // generate morph list
                Wnlib.MorphStr morphs = new Wnlib.MorphStr(word, Wnlib.PartOfSpeech.of(pos));
                string         morph  = "";
                while ((morph = morphs.next()) != null)
                {
                    // get an index to a synset collection
                    Wnlib.Index index = Wnlib.Index.lookup(morph, Wnlib.PartOfSpeech.of(pos));

                    // none found?
                    if (index == null)
                    {
                        continue;
                    }

                    // save the wordinfo
                    WordInfo wordinfo = GetMorphInfo(wordinfos, morph);
                    wordinfo.SenseCounts[i] = index.SenseCnt;
                }
            }

            // search the wordinfo list for the best match
            WordInfo bestWordInfo = new WordInfo();
            int      maxStrength  = 0;

            foreach (WordInfo wordinfo in wordinfos)
            {
                // for each part of speech...
                int maxSenseCount = 0;
                int strength      = 0;
                for (int i = 0; i < Enums.Length; i++)
                {
                    // get a valid part of speech
                    Wnlib.PartsOfSpeech pos = Enums[i];
                    if (pos == Wnlib.PartsOfSpeech.Unknown)
                    {
                        continue;
                    }

                    // determine part of speech and strength
                    strength += wordinfo.SenseCounts[i];
                    if (wordinfo.SenseCounts[i] > maxSenseCount)
                    {
                        maxSenseCount         = wordinfo.SenseCounts[i];
                        wordinfo.partOfSpeech = pos;
                    }
                }

                // best match?
                if (strength > maxStrength)
                {
                    maxStrength  = strength;
                    bestWordInfo = wordinfo;
                }
            }

            return(bestWordInfo);
        }
Beispiel #8
0
        public static string findadjinfo(string word, ArrayList texts)
        {
            foreach (string s in texts)
            {
                if (Adj.Contains(s.ToUpper()))
                {
                    ArrayList Adjects = (ArrayList)Adj[s.ToUpper()];
                    if (Adjects.Count == 1)
                    {
                        return("PADJ");
                    }
                    for (int i = 0; i < Adjects.Count; i++)
                    {
                        foreach (string r in ((string)Adjects[i]).Split(','))
                        {
                            if (word == r)
                            {
                                switch (i)
                                {
                                case 0:
                                    return("PADJ");

                                case 1:
                                    return("CADJ");

                                case 2:
                                    return("SADJ");
                                }
                            }
                        }
                    }
                }
                else if (word == s.ToUpper() + "ER")
                {
                    return("CADJ");
                }
                else if (word == s.ToUpper() + "EST")
                {
                    return("SADJ");
                }
                else if (s.ToUpper().EndsWith("E") && (word == s.ToUpper().Substring(0, s.Length - 1) + "ER") || word == s.ToUpper().Substring(0, s.Length - 1) + "EST")
                {
                    if (word == s.ToUpper().Substring(0, s.Length - 1) + "ER")
                    {
                        return("CADJ");
                    }
                    if (word == s.ToUpper().Substring(0, s.Length - 1) + "EST")
                    {
                        return("SADJ");
                    }
                }
                else
                {
                    WordInfo wi = FindWordInfo(word, tagged_only, false);
                    if (wi.senseCounts[3] > 0)
                    {
                        return("PADJ");
                    }
                }
            }
            return(null);
        }
Beispiel #9
0
        private static ArrayList findverbinfo(string word, ArrayList texts)
        {
            ArrayList verbinfo = new ArrayList();

            foreach (string s in texts)
            {
                if (Verbs.Contains(s.ToUpper()))
                {
                    ArrayList verbs = (ArrayList)Verbs[s.ToUpper()];
                    for (int i = 0; i < verbs.Count; i++)
                    {
                        foreach (string r in ((string)verbs[i]).Split(','))
                        {
                            if (word == r)
                            {
                                switch (i)
                                {
                                case 0:
                                    if (!verbinfo.Contains("VINF"))
                                    {
                                        verbinfo.Add("VINF");
                                    }
                                    break;

                                case 1:
                                    if (!verbinfo.Contains("V"))
                                    {
                                        verbinfo.Add("V");
                                    }
                                    break;

                                case 2:
                                    if (!verbinfo.Contains("VPSP"))
                                    {
                                        verbinfo.Add("VPSP");
                                    }
                                    break;

                                case 3:
                                    if (!verbinfo.Contains("VING"))
                                    {
                                        verbinfo.Add("VING");
                                    }
                                    break;

                                case 4:
                                    if (!verbinfo.Contains("V"))
                                    {
                                        verbinfo.Add("V");
                                    }
                                    break;
                                }
                            }
                        }
                    }
                    if (word == s.ToUpper() + "ING")
                    {
                        if (!verbinfo.Contains("VING"))
                        {
                            verbinfo.Add("VING");
                        }
                    }
                    if (s.ToUpper().EndsWith("E") && (word == s.ToUpper().Substring(0, s.Length - 1) + "ING" || word == s.ToUpper().Substring(0, s.Length - 1) + "ES"))
                    {
                        if (word == s.ToUpper().Substring(0, s.Length - 1) + "ING")
                        {
                            if (!verbinfo.Contains("VING"))
                            {
                                verbinfo.Add("VING");
                            }
                            else if (word == s.ToUpper().Substring(0, s.Length - 1) + "ES")
                            {
                                if (!verbinfo.Contains("V"))
                                {
                                    verbinfo.Add("V");
                                }
                            }
                        }
                    }
                    if (word == s.ToUpper() + "S" || word == s.ToUpper() + "ES")
                    {
                        if (!verbinfo.Contains("V"))
                        {
                            verbinfo.Add("V");
                        }
                    }
                    if (s.ToUpper().EndsWith("Y"))
                    {
                        if (word == s.ToUpper().Substring(0, s.Length - 1) + "IES")
                        {
                            if (!verbinfo.Contains("V"))
                            {
                                verbinfo.Add("V");
                            }
                        }
                    }
                }
                if (word == s.ToUpper() + "ED")
                {
                    if (!verbinfo.Contains("VPSP"))
                    {
                        verbinfo.Add("VPSP");
                    }
                }
                else if (word == s.ToUpper() + "ING")
                {
                    if (!verbinfo.Contains("VING"))
                    {
                        verbinfo.Add("VING");
                    }
                }
                else if (word == s.ToUpper() + "S" || word == s.ToUpper() + "ES")
                {
                    if (!verbinfo.Contains("V"))
                    {
                        verbinfo.Add("V");
                    }
                }
                else if (s.ToUpper().EndsWith("E") && (word == s.ToUpper().Substring(0, s.Length - 1) + "ING" || word == s.ToUpper().Substring(0, s.Length - 1) + "ES" || word == s.ToUpper().Substring(0, s.Length - 1) + "ED"))
                {
                    if (word == s.ToUpper().Substring(0, s.Length - 1) + "ING")
                    {
                        if (!verbinfo.Contains("VING"))
                        {
                            verbinfo.Add("VING");
                        }
                    }
                    else if (word == s.ToUpper().Substring(0, s.Length - 1) + "ES")
                    {
                        if (!verbinfo.Contains("V"))
                        {
                            verbinfo.Add("V");
                        }
                    }
                    else if (word == s.ToUpper().Substring(0, s.Length - 1) + "ED")
                    {
                        if (!verbinfo.Contains("VPSP"))
                        {
                            verbinfo.Add("VPSP");
                        }
                    }
                }
                else if (s.ToUpper().EndsWith("Y") && word == s.ToUpper().Substring(0, s.Length - 1) + "IES")
                {
                    if (word == s.ToUpper().Substring(0, s.Length - 1) + "IES")
                    {
                        if (!verbinfo.Contains("V"))
                        {
                            verbinfo.Add("V");
                        }
                    }
                }

                else
                {
                    WordInfo wi = FindWordInfo(word, tagged_only, false);
                    if (wi.senseCounts[2] > 0 && !verbinfo.Contains("VINF"))
                    {
                        verbinfo.Add("VINF");
                    }
                }
            }
            if (!verbinfo.Contains("V") &&
                (verbinfo.Contains("VINF") || verbinfo.Contains("VPSP")))
            {
                verbinfo.Add("V");
            }

            return(verbinfo);
        }
Beispiel #10
0
        public static bool IsPlural(string Word)
        {
            foreach (string s in Noun.Keys)
            {
                if (s.ToUpper() == Word)
                {
                    return(true);
                }
            }

            /*
             * "s", "ses", "xes", "zes", "ches", "shes", "men", "ies",*/
            /*"", "s",   "x",   "z",   "ch",   "sh",   "man",  "y",*/
            if (Word.EndsWith("CHES"))
            {
                Word  = Word.Remove(Word.Length - 4, 4);
                Word += "CH";
            }
            else if (Word.EndsWith("SHES"))
            {
                Word  = Word.Remove(Word.Length - 4, 4);
                Word += "SH";
            }
            else if (Word.EndsWith("SES"))
            {
                Word  = Word.Remove(Word.Length - 3, 3);
                Word += "S";
            }
            else if (Word.EndsWith("XES"))
            {
                Word  = Word.Remove(Word.Length - 3, 3);
                Word += "X";
            }
            else if (Word.EndsWith("ZES"))
            {
                Word  = Word.Remove(Word.Length - 3, 3);
                Word += "Z";
            }
            else if (Word.EndsWith("MEN"))
            {
                Word  = Word.Remove(Word.Length - 3, 3);
                Word += "MAN";
            }
            else if (Word.EndsWith("IES"))
            {
                Word  = Word.Remove(Word.Length - 3, 3);
                Word += "Y";
            }
            else if (Word.EndsWith("S"))
            {
                Word = Word.Remove(Word.Length - 1, 1);
            }
            else
            {
                return(false);
            }
            WordInfo wi = FindWordInfo(Word, tagged_only, false);

            if (wi.senseCounts[1] > 0)
            {
                return(true);
            }
            else
            {
                return(false);
            }
        }
Beispiel #11
0
        private static WordInfo lookupWordMorphs(string word, bool tagged_only)
        {
            // OVERVIEW: This functions only gets called when the word was not found with
            //           an exact match. So, enumerate all the parts of speech, then enumerate
            //           all of the word's morphs in each category. Perform a lookup on each
            //           morph and save the morph/strength/part-of-speech data sets. Finally,
            //           loop over all the data sets and then pick the strongest one.

            ArrayList wordinfos = new ArrayList();

            // for each part of speech...
            for (int i = 0; i < enums.Length; i++)
            {
                // get a valid part of speech
                Wnlib.PartsOfSpeech pos = enums[i];
                if (pos == Wnlib.PartsOfSpeech.Unknown)
                {
                    continue;
                }

                // generate morph list
                Wnlib.MorphStr morphs = new Wnlib.MorphStr(word, Wnlib.PartOfSpeech.of(pos));
                string         morph  = "";
                while ((morph = morphs.next()) != null)
                {
                    // get an index to a synset collection
                    Wnlib.Index index = Wnlib.Index.lookup(morph, Wnlib.PartOfSpeech.of(pos));

                    // none found?
                    if (index == null)
                    {
                        continue;
                    }
                    // none tagged
                    if (tagged_only && index.tagsense_cnt == 0)
                    {
                        continue;
                    }

                    // save the wordinfo
                    WordInfo wordinfo = getMorphInfo(wordinfos, morph);
                    if (tagged_only)
                    {
                        wordinfo.senseCounts[i] = index.tagsense_cnt;
                    }
                    else
                    {
                        wordinfo.senseCounts[i] = index.sense_cnt;
                    }
                }
            }

            return(WordInfo.Compine(wordinfos));

/*
 *                      // search the wordinfo list for the best match
 *                      WordInfo bestWordInfo = new WordInfo();
 *                      int maxStrength = 0;
 *                      foreach( WordInfo wordinfo in wordinfos )
 *                      {
 *                              // for each part of speech...
 *                              int maxSenseCount = 0;
 *                              int strength = 0;
 *                              for( int i=0; i<enums.Length; i++ )
 *                              {
 *                                      // get a valid part of speech
 *                                      Wnlib.PartsOfSpeech pos = enums[i];
 *                                      if( pos == Wnlib.PartsOfSpeech.Unknown )
 *                                              continue;
 *
 *                                      // determine part of speech and strength
 *                                      strength += wordinfo.senseCounts[i];
 *                                      if( wordinfo.senseCounts[i] > maxSenseCount )
 *                                      {
 *                                              maxSenseCount = wordinfo.senseCounts[i];
 *                                              wordinfo.partOfSpeech = pos;
 *                                      }
 *                              }
 *
 *                              // best match?
 *                              if( strength > maxStrength )
 *                              {
 *                                      maxStrength = strength;
 *                                      bestWordInfo = wordinfo;
 *                              }
 *                      }
 *
 *                      return bestWordInfo;
 */
        }
Beispiel #12
0
        private static WordInfo lookupWord(string word, bool tagged_only)
        {
            // OVERVIEW: For each part of speech, look for the word.
            //           Compare relative strengths of the synsets in each category
            //			 to determine the most probable part of speech.
            //
            // PROBLEM:  Word definitions are often context-based. It would be better
            //           to find a way to search in-context in stead of just singling
            //           out an individual word.
            //
            // SOLUTION: Modify FindPartOfSpeech to include a second argument, string
            //           context. The pass the entire sentence as the context for part
            //           of speech determination.
            //
            // PROBLEM:  That's difficult to do so I'm going to keep this simple for now.

            int      maxCount = 0;
            WordInfo wordinfo = new WordInfo();

            wordinfo.partOfSpeech = Wnlib.PartsOfSpeech.Unknown;
            wordinfo.texts        = new ArrayList();
            wordinfo.texts.Add(word);

            // for each part of speech...
            Wnlib.PartsOfSpeech[] enums = (Wnlib.PartsOfSpeech[])Enum.GetValues(typeof(Wnlib.PartsOfSpeech));
            wordinfo.senseCounts = new int[enums.Length];
            for (int i = 0; i < enums.Length; i++)
            {
                // get a valid part of speech
                Wnlib.PartsOfSpeech pos = enums[i];
                if (pos == Wnlib.PartsOfSpeech.Unknown)
                {
                    continue;
                }

                // get an index to a synset collection
                Wnlib.Index index = Wnlib.Index.lookup(word, Wnlib.PartOfSpeech.of(pos));

                // none found?
                if (index == null)
                {
                    continue;
                }
                // none tagged
                if (tagged_only && index.tagsense_cnt == 0)
                {
                    continue;
                }

                // does this part of speech have a higher sense count?
                if (tagged_only)
                {
                    wordinfo.senseCounts[i] = index.tagsense_cnt;
                }
                else
                {
                    wordinfo.senseCounts[i] = index.sense_cnt;
                }
                if (wordinfo.senseCounts[i] > maxCount)
                {
                    maxCount = wordinfo.senseCounts[i];
                    wordinfo.partOfSpeech = pos;
                }
            }

            return(wordinfo);
        }