Beispiel #1
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Adds the given alternate (inflected) form of this word to the collection of words
		/// that will be considered as equivalent words.
		/// </summary>
		/// ------------------------------------------------------------------------------------
		public void AddAlternateForm(Word inflectedForm)
		{
			HashSet<Word> inflectedForms;
			if (!s_inflectedWords.TryGetValue(this, out inflectedForms))
				s_inflectedWords[this] = inflectedForms = new HashSet<Word>();
			inflectedForms.Add(inflectedForm);
		}
Beispiel #2
0
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Determines whether the specified other word is equivalent to this word (either the
		/// same word or an inflected form of it).
		/// </summary>
		/// ------------------------------------------------------------------------------------
		public bool IsEquivalent(Word otherWord)
		{
			if (this == otherWord)
				return true;
			HashSet<Word> inflectedForms;
			return (s_inflectedWords.TryGetValue(this, out inflectedForms) && inflectedForms.Contains(otherWord));
		}
Beispiel #3
0
		public void AddWord(Word word)
		{
			if (word == null)
				throw new ArgumentNullException("word");
			m_words.Add(word);
		}
		/// ------------------------------------------------------------------------------------
		/// <summary>
		/// Gets all the possible surface forms represented by this metaword (which could have
		/// an optional part, indicated by parentheses). If this is a completely optional word,
		/// this will include a null. If it is part of an optional phrase, it will return an
		/// empty list until it gets to the last word in the phrase, at which point it returns
		/// a list representing the whole phrase.
		/// </summary>
		/// ------------------------------------------------------------------------------------
		private List<Word> AllWords(Word metaWord, bool firstWordOfPhrase)
		{
			List<Word> list = new List<Word>();
			int iOpenParen = (m_fInOptionalPhrase) ? 0 : metaWord.Text.IndexOf('(');
			if (iOpenParen >= 0)
			{
				int iCloseParen = metaWord.Text.IndexOf(')', iOpenParen);
				if (iCloseParen > iOpenParen)
				{
					if (m_fInOptionalPhrase)
					{
						list = m_optionalPhraseWords;
						list.Add(metaWord.Text.Remove(iCloseParen));
						m_optionalPhraseWords = null;
					}
					else
					{
						string opt = metaWord.Text.Remove(iOpenParen, iCloseParen - iOpenParen + 1);
						list.Add(opt == string.Empty ? null : opt);
						list.Add(metaWord.Text.Remove(iCloseParen, 1).Remove(iOpenParen, 1));
					}
				}
				else if (m_fInOptionalPhrase)
				{
					m_optionalPhraseWords.Add(metaWord);
				}
				else if (iOpenParen == 0)
				{
					m_optionalPhraseWords = new List<Word>();
					m_optionalPhraseWords.Add(metaWord.Text.Remove(0, 1));
					m_fInOptionalPhrase = true;
				}
				else
				{
					Debug.Fail("Found opening parenthesis with no closer");
				}
			}
			else
			{
				if (firstWordOfPhrase && metaWord == "to")
					list.Add(null);
				list.Add(metaWord);
			}

			return list;
		}
Beispiel #5
0
        /// ------------------------------------------------------------------------------------
        /// <summary>
        /// Finds the best key term in the list of words starting at m_iStartMatch and including
        /// up to m_iNextWord. As new words are considered, the list possible matches
        /// (m_matches) is reduced by any that no longer match until there is exactly one match
        /// that exactly equals the words in the key term or the list is empty.
        /// </summary>
        /// ------------------------------------------------------------------------------------
        private KeyTermMatch FindBestKeyTerm()
        {
            Word nextWord = m_words[m_iNextWord];

            if (m_iStartMatch == m_iNextWord)
            {
                List <KeyTermMatch> matches;
                if (!m_keyTermsTable.TryGetValue(nextWord, out matches))
                {
                    Word stem = s_stemmer.stemTerm(nextWord);
                    if (m_keyTermsTable.TryGetValue(stem, out matches))
                    {
                        stem.AddAlternateForm(nextWord);
                    }
                    else
                    {
                        m_iStartMatch++;
                        return(null);
                    }
                }

                m_matches = new List <KeyTermMatch>(matches.Where(m => m.AppliesTo(m_phrase.StartRef, m_phrase.EndRef)));

                // If we found a one-word exact match and there are no other key terms that start
                // with that word, then we return it. The code below would handle this, but it's such
                // a common case, we want it to be fast. If there are one or more multi-word key
                // terms that start with this word, we need to keep looking.
                if (m_matches.Count == 1 && m_matches[0].Words.Count() == 1)
                {
                    return(m_matches[0]);
                }
            }

            int          cMatchingWordsInTermSoFar = m_iNextWord - m_iStartMatch + 1;
            int          lengthOfBestMatch         = 0;
            KeyTermMatch longestMatch = null;

            // Remove from the possible matches any that don't match so far
            for (int iTerm = 0; iTerm < m_matches.Count; iTerm++)
            {
                KeyTermMatch term = m_matches[iTerm];
                if (!PhraseEqualsKeyTermSoFar(term, cMatchingWordsInTermSoFar) ||
                    (AtEndOfPhrase && term.m_words.Count > cMatchingWordsInTermSoFar))
                {
                    m_matches.RemoveAt(iTerm--);
                }
                else if (term.m_words.Count > lengthOfBestMatch)
                {
                    lengthOfBestMatch = term.m_words.Count;
                    longestMatch      = term;
                }
            }

            if (m_matches.Count == 0)
            {
                // The only matches we had were multi-word matches, and the addition of the current
                // word made it so that none of them matched. Therefore, we don't have a key term
                // starting at iStartMatch.
                m_iNextWord = m_iStartMatch;                 // The for loop in Parse will increment this.
                m_iStartMatch++;
                return(null);
            }

            if ((m_matches.Count == 1 && lengthOfBestMatch < cMatchingWordsInTermSoFar) || (lengthOfBestMatch == cMatchingWordsInTermSoFar))
            {
                return(longestMatch);
            }

            return(null);
        }