public void IsEquivalent_DifferentWord_ReturnsFalse() { Word word = "monkey"; Assert.IsFalse(word.IsEquivalent("frog")); }
/// ------------------------------------------------------------------------------------ /// <summary> /// Finds the best key term in the list of words starting at m_iStartMatch and including /// up to m_iNextWord. As new words are considered, the list of possible matches /// (m_matches) is reduced by any that no longer match until there is exactly one match /// that exactly equals the words in the key term or the list is empty. /// </summary> /// ------------------------------------------------------------------------------------ private KeyTermMatch FindBestKeyTerm() { if (m_keyTermsTable == null) { return(null); } Word nextWord = m_words[m_iNextWord]; if (m_iStartMatch == m_iNextWord) { List <KeyTermMatch> matches; m_matches = null; if (m_keyTermsTable.TryGetValue(nextWord, out matches)) { m_matches = new List <KeyTermMatch>(matches.Where(m => m.AppliesTo(m_phrase.StartRef, m_phrase.EndRef))); } if (m_matches == null || m_matches.All(m => m.WordCount > 1)) { var baseWord = nextWord.Text; Word stem = s_stemmer.stemTerm(baseWord); while (stem.Text != baseWord) { if (m_keyTermsTable.TryGetValue(stem, out matches)) { stem.AddAlternateForm(nextWord); matches = new List <KeyTermMatch>(matches.Where(m => m.AppliesTo(m_phrase.StartRef, m_phrase.EndRef))); if (m_matches == null) { m_matches = matches; } else { m_matches.AddRange(matches); } } baseWord = stem.Text; stem = s_stemmer.stemTerm(baseWord); } if (m_matches == null || m_matches.Count == 0) { m_iStartMatch++; return(null); } } // If we found a one-word exact match and there are no other key terms that start // with that word, then we return it. The code below would handle this, but it's such // a common case, we want it to be fast. If there are one or more multi-word key // terms that start with this word, we need to keep looking. if (m_matches.Count == 1 && m_matches[0].WordCount == 1) { return(m_matches[0]); } } int cMatchingWordsInTermSoFar = m_iNextWord - m_iStartMatch + 1; int lengthOfBestMatch = 0; KeyTermMatch longestMatch = null; // Remove from the possible matches any that don't match so far for (int iTerm = 0; iTerm < m_matches.Count; iTerm++) { KeyTermMatch term = m_matches[iTerm]; if (!PhraseEqualsKeyTermSoFar(term, cMatchingWordsInTermSoFar) || (AtEndOfPhrase && term.WordCount > cMatchingWordsInTermSoFar)) { m_matches.RemoveAt(iTerm--); } else if (term.WordCount > lengthOfBestMatch) { lengthOfBestMatch = term.WordCount; longestMatch = term; } } if (m_matches.Count == 0) { // The only matches we had were multi-word matches, and the addition of the current // word made it so that none of them matched. Therefore, we don't have a key term // starting at iStartMatch. m_iNextWord = m_iStartMatch; // The for loop in Parse will increment this. m_iStartMatch++; return(null); } if ((m_matches.Count == 1 && lengthOfBestMatch < cMatchingWordsInTermSoFar) || (lengthOfBestMatch == cMatchingWordsInTermSoFar)) { return(longestMatch); } return(null); }
public void FirstWord_StringWithLeadingSpaces_ReturnsCorrectWord() { Word word = "tom"; Assert.AreEqual(word, Word.FirstWord(" tom is a dude")); }
public void IsEquivalent_SameWord_ReturnsTrue() { Word word = "monkey"; Assert.IsTrue(word.IsEquivalent(word)); }
public void FirstWord_StringWithOnlySpaces_ReturnsNull() { Assert.IsNull(Word.FirstWord(" ")); }
public void FirstWord_EmptyString_ReturnsNull() { Assert.IsNull(Word.FirstWord("")); }
public void FirstWord_NullString_ReturnsNull() { Assert.IsNull(Word.FirstWord(null)); }
public void FirstWord_StringsWithDifferentFirstWords_ReturnsDifferentWords() { Assert.AreNotEqual(Word.FirstWord("tim is nice"), Word.FirstWord("tom has a frog")); }
public void FirstWord_DifferentStringsWithSameFirstWord_ReturnsSameWord() { Assert.AreEqual(Word.FirstWord("tom is nice"), Word.FirstWord("tom has a frog")); }