/// ------------------------------------------------------------------------------------ /// <summary> /// Adds the given alternate (inflected) form of this word to the collection of words /// that will be considered as equivalent words. /// </summary> /// ------------------------------------------------------------------------------------ public void AddAlternateForm(Word inflectedForm) { HashSet<Word> inflectedForms; if (!s_inflectedWords.TryGetValue(this, out inflectedForms)) s_inflectedWords[this] = inflectedForms = new HashSet<Word>(); inflectedForms.Add(inflectedForm); }
/// ------------------------------------------------------------------------------------ /// <summary> /// Determines whether the specified other word is equivalent to this word (either the /// same word or an inflected form of it). /// </summary> /// ------------------------------------------------------------------------------------ public bool IsEquivalent(Word otherWord) { if (this == otherWord) return true; HashSet<Word> inflectedForms; return (s_inflectedWords.TryGetValue(this, out inflectedForms) && inflectedForms.Contains(otherWord)); }
public void AddWord(Word word) { if (word == null) throw new ArgumentNullException("word"); m_words.Add(word); }
/// ------------------------------------------------------------------------------------ /// <summary> /// Gets all the possible surface forms represented by this metaword (which could have /// an optional part, indicated by parentheses). If this is a completely optional word, /// this will include a null. If it is part of an optional phrase, it will return an /// empty list until it gets to the last word in the phrase, at which point it returns /// a list representing the whole phrase. /// </summary> /// ------------------------------------------------------------------------------------ private List<Word> AllWords(Word metaWord, bool firstWordOfPhrase) { List<Word> list = new List<Word>(); int iOpenParen = (m_fInOptionalPhrase) ? 0 : metaWord.Text.IndexOf('('); if (iOpenParen >= 0) { int iCloseParen = metaWord.Text.IndexOf(')', iOpenParen); if (iCloseParen > iOpenParen) { if (m_fInOptionalPhrase) { list = m_optionalPhraseWords; list.Add(metaWord.Text.Remove(iCloseParen)); m_optionalPhraseWords = null; } else { string opt = metaWord.Text.Remove(iOpenParen, iCloseParen - iOpenParen + 1); list.Add(opt == string.Empty ? null : opt); list.Add(metaWord.Text.Remove(iCloseParen, 1).Remove(iOpenParen, 1)); } } else if (m_fInOptionalPhrase) { m_optionalPhraseWords.Add(metaWord); } else if (iOpenParen == 0) { m_optionalPhraseWords = new List<Word>(); m_optionalPhraseWords.Add(metaWord.Text.Remove(0, 1)); m_fInOptionalPhrase = true; } else { Debug.Fail("Found opening parenthesis with no closer"); } } else { if (firstWordOfPhrase && metaWord == "to") list.Add(null); list.Add(metaWord); } return list; }
/// ------------------------------------------------------------------------------------ /// <summary> /// Finds the best key term in the list of words starting at m_iStartMatch and including /// up to m_iNextWord. As new words are considered, the list possible matches /// (m_matches) is reduced by any that no longer match until there is exactly one match /// that exactly equals the words in the key term or the list is empty. /// </summary> /// ------------------------------------------------------------------------------------ private KeyTermMatch FindBestKeyTerm() { Word nextWord = m_words[m_iNextWord]; if (m_iStartMatch == m_iNextWord) { List <KeyTermMatch> matches; if (!m_keyTermsTable.TryGetValue(nextWord, out matches)) { Word stem = s_stemmer.stemTerm(nextWord); if (m_keyTermsTable.TryGetValue(stem, out matches)) { stem.AddAlternateForm(nextWord); } else { m_iStartMatch++; return(null); } } m_matches = new List <KeyTermMatch>(matches.Where(m => m.AppliesTo(m_phrase.StartRef, m_phrase.EndRef))); // If we found a one-word exact match and there are no other key terms that start // with that word, then we return it. The code below would handle this, but it's such // a common case, we want it to be fast. If there are one or more multi-word key // terms that start with this word, we need to keep looking. if (m_matches.Count == 1 && m_matches[0].Words.Count() == 1) { return(m_matches[0]); } } int cMatchingWordsInTermSoFar = m_iNextWord - m_iStartMatch + 1; int lengthOfBestMatch = 0; KeyTermMatch longestMatch = null; // Remove from the possible matches any that don't match so far for (int iTerm = 0; iTerm < m_matches.Count; iTerm++) { KeyTermMatch term = m_matches[iTerm]; if (!PhraseEqualsKeyTermSoFar(term, cMatchingWordsInTermSoFar) || (AtEndOfPhrase && term.m_words.Count > cMatchingWordsInTermSoFar)) { m_matches.RemoveAt(iTerm--); } else if (term.m_words.Count > lengthOfBestMatch) { lengthOfBestMatch = term.m_words.Count; longestMatch = term; } } if (m_matches.Count == 0) { // The only matches we had were multi-word matches, and the addition of the current // word made it so that none of them matched. Therefore, we don't have a key term // starting at iStartMatch. m_iNextWord = m_iStartMatch; // The for loop in Parse will increment this. m_iStartMatch++; return(null); } if ((m_matches.Count == 1 && lengthOfBestMatch < cMatchingWordsInTermSoFar) || (lengthOfBestMatch == cMatchingWordsInTermSoFar)) { return(longestMatch); } return(null); }