public void AppliesTo_TermDoesNotOccurInRange_ReturnsFalse() { IKeyTerm term = KeyTermMatchBuilderTests.AddMockedKeyTerm("tom", 002003001, 002003003); KeyTermMatch match = new KeyTermMatch(new List <Word>(new Word[] { "tom" }), term, true); Assert.IsFalse(match.AppliesTo(002003002, 002003002)); }
public void AppliesTo_TermAllowedToMatchAnywhere_ReturnsTrue() { IKeyTerm term = KeyTermMatchBuilderTests.AddMockedKeyTerm("tom"); KeyTermMatch match = new KeyTermMatch(new List <Word>(new Word[] { "tom" }), term, false); Assert.IsTrue(match.AppliesTo(-1, -1)); }
/// ------------------------------------------------------------------------------------ /// <summary> /// Copy constructor (only valid for an object having a single term). /// </summary> /// <param name="matchBase">The base match from which this match will be created.</param> /// ------------------------------------------------------------------------------------ internal KeyTermMatch(KeyTermMatch matchBase) : this(matchBase.m_words, matchBase.m_terms[0], matchBase.MatchForRefOnly) { if (matchBase.m_terms.Count != 1) { throw new ArgumentException("KeyTermMatch copy constructor only valid for making" + " copies of a new in-progress match with a single underlying key term."); } }
/// ------------------------------------------------------------------------------------ /// <summary> /// Determines whether the part of the phrase we're considering matches the key term so /// far. /// </summary> /// ------------------------------------------------------------------------------------ private bool PhraseEqualsKeyTermSoFar(KeyTermMatch term, int cMatchingWordsInTermSoFar) { int cCompare = Math.Min(term.WordCount, cMatchingWordsInTermSoFar); for (int iWord = m_iStartMatch; iWord < cCompare + m_iStartMatch; iWord++) { if (!term[iWord - m_iStartMatch].IsEquivalent(m_words[iWord])) { return(false); } } return(true); }
public void RuleToLimitMatchToTermRefs() { Dictionary <string, KeyTermRule> rules = new Dictionary <string, KeyTermRule>(); KeyTermRule rule = new KeyTermRule(); rule.id = "ask"; rule.Rule = KeyTermRule.RuleType.MatchForRefOnly; rules[rule.id] = rule; KeyTermMatchBuilder bldr = new KeyTermMatchBuilder(AddMockedKeyTerm(rule.id, 34), new ReadonlyDictionary <string, KeyTermRule>(rules), null); Assert.AreEqual(1, bldr.Matches.Count()); KeyTermMatch ktm = VerifyKeyTermMatch(bldr, 0, false, "ask"); Assert.IsFalse(ktm.AppliesTo(30, 33)); Assert.IsTrue(ktm.AppliesTo(34, 34)); Assert.IsFalse(ktm.AppliesTo(35, 39)); }
/// ------------------------------------------------------------------------------------ /// <summary> /// Verifies the key term match. /// </summary> /// ------------------------------------------------------------------------------------ private static KeyTermMatch VerifyKeyTermMatch(KeyTermMatchBuilder bldr, int iMatch, bool matchAnywhere, params string[] words) { KeyTermMatch ktm = bldr.Matches.ElementAt(iMatch); Assert.AreEqual(words.Length, ktm.WordCount); for (int i = 0; i < words.Length; i++) { Assert.AreEqual(words[i], ktm[i].Text); } Assert.IsTrue(ktm.MatchForRefOnly != matchAnywhere); // The following is really a test of the KeyTermMatch.AppliesTo method: if (matchAnywhere) { Random r = new Random(DateTime.Now.Millisecond); Assert.IsTrue(ktm.AppliesTo(r.Next(), r.Next())); } return(ktm); }
/// ------------------------------------------------------------------------------------ /// <summary> /// Populates the key terms table. /// </summary> /// ------------------------------------------------------------------------------------ private void PopulateKeyTermsTable(IEnumerable <IKeyTerm> keyTerms, KeyTermRules rules) { KeyTermMatchBuilder matchBuilder; foreach (IKeyTerm keyTerm in keyTerms) { matchBuilder = new KeyTermMatchBuilder(keyTerm, rules == null ? null : rules.RulesDictionary, rules == null ? null : rules.RegexRules); foreach (KeyTermMatch matcher in matchBuilder.Matches.Where(matcher => matcher.WordCount != 0)) { List <KeyTermMatch> foundMatchers; Word firstWord = matcher[0]; if (!m_keyTermsTable.TryGetValue(firstWord, out foundMatchers)) { m_keyTermsTable[firstWord] = foundMatchers = new List <KeyTermMatch>(); } KeyTermMatch existingMatcher = foundMatchers.FirstOrDefault(m => m.Equals(matcher)); if (existingMatcher == null) { foundMatchers.Add(matcher); } else { existingMatcher.AddTerm(keyTerm); } } } #if DEBUG if (rules != null) { string unUsedRules = rules.RulesDictionary.Values.Where(r => !r.Used).ToString(Environment.NewLine); if (unUsedRules.Length > 0) { MessageBox.Show("Unused KeyTerm Rules: \n" + unUsedRules, "Transcelerator"); } } #endif }
/// ------------------------------------------------------------------------------------ /// <summary> /// Finds the best key term in the list of words starting at m_iStartMatch and including /// up to m_iNextWord. As new words are considered, the list of possible matches /// (m_matches) is reduced by any that no longer match until there is exactly one match /// that exactly equals the words in the key term or the list is empty. /// </summary> /// ------------------------------------------------------------------------------------ private KeyTermMatch FindBestKeyTerm() { if (m_keyTermsTable == null) { return(null); } Word nextWord = m_words[m_iNextWord]; if (m_iStartMatch == m_iNextWord) { List <KeyTermMatch> matches; m_matches = null; if (m_keyTermsTable.TryGetValue(nextWord, out matches)) { m_matches = new List <KeyTermMatch>(matches.Where(m => m.AppliesTo(m_phrase.StartRef, m_phrase.EndRef))); } if (m_matches == null || m_matches.All(m => m.WordCount > 1)) { var baseWord = nextWord.Text; Word stem = s_stemmer.stemTerm(baseWord); while (stem.Text != baseWord) { if (m_keyTermsTable.TryGetValue(stem, out matches)) { stem.AddAlternateForm(nextWord); matches = new List <KeyTermMatch>(matches.Where(m => m.AppliesTo(m_phrase.StartRef, m_phrase.EndRef))); if (m_matches == null) { m_matches = matches; } else { m_matches.AddRange(matches); } } baseWord = stem.Text; stem = s_stemmer.stemTerm(baseWord); } if (m_matches == null || m_matches.Count == 0) { m_iStartMatch++; return(null); } } // If we found a one-word exact match and there are no other key terms that start // with that word, then we return it. The code below would handle this, but it's such // a common case, we want it to be fast. If there are one or more multi-word key // terms that start with this word, we need to keep looking. if (m_matches.Count == 1 && m_matches[0].WordCount == 1) { return(m_matches[0]); } } int cMatchingWordsInTermSoFar = m_iNextWord - m_iStartMatch + 1; int lengthOfBestMatch = 0; KeyTermMatch longestMatch = null; // Remove from the possible matches any that don't match so far for (int iTerm = 0; iTerm < m_matches.Count; iTerm++) { KeyTermMatch term = m_matches[iTerm]; if (!PhraseEqualsKeyTermSoFar(term, cMatchingWordsInTermSoFar) || (AtEndOfPhrase && term.WordCount > cMatchingWordsInTermSoFar)) { m_matches.RemoveAt(iTerm--); } else if (term.WordCount > lengthOfBestMatch) { lengthOfBestMatch = term.WordCount; longestMatch = term; } } if (m_matches.Count == 0) { // The only matches we had were multi-word matches, and the addition of the current // word made it so that none of them matched. Therefore, we don't have a key term // starting at iStartMatch. m_iNextWord = m_iStartMatch; // The for loop in Parse will increment this. m_iStartMatch++; return(null); } if ((m_matches.Count == 1 && lengthOfBestMatch < cMatchingWordsInTermSoFar) || (lengthOfBestMatch == cMatchingWordsInTermSoFar)) { return(longestMatch); } return(null); }
/// ------------------------------------------------------------------------------------ /// <summary> /// Cleans up the given source-language phrase (makes it all lowercase and removes /// punctuation and extraneous whitespace) and returns a collection of phrase parts, /// broken up by key terms. /// </summary> /// <returns>Collection of phrase parts</returns> /// ------------------------------------------------------------------------------------ internal IEnumerable <ParsedPart> Parse() { KeyTermMatch bestKeyTerm = null; int minUnhandled = m_iStartMatch = m_iNextWord = 0; if (m_questionWords != null) { foreach (int count in m_questionWords.Keys.OrderByDescending(k => k)) { foreach (List <Word> questionPhrase in m_questionWords[count]) { bool match = true; int i; for (i = 0; i < count; i++) { if (m_iNextWord + i >= m_words.Count || questionPhrase[i] != m_words[m_iNextWord + i]) { match = false; break; } } if (match) { yield return(YieldTranslatablePart(m_words.Take(count), m_phrase)); m_iStartMatch = m_iNextWord = minUnhandled = count; break; } } } } for (; m_iNextWord < m_words.Count;) { bestKeyTerm = FindBestKeyTerm(); if (bestKeyTerm == null) { if (m_words[m_iNextWord].IsNumber) { if (m_iNextWord > minUnhandled) { yield return(YieldTranslatablePart(m_words.Skip(minUnhandled).Take(m_iNextWord - minUnhandled), m_phrase)); } yield return(new ParsedPart(Int32.Parse(m_words[m_iNextWord].Text))); m_iStartMatch = minUnhandled = ++m_iNextWord; } else { m_iNextWord++; } } else { // We've found the best key term we're going to find. int keyTermWordCount = bestKeyTerm.WordCount; if (m_iStartMatch > minUnhandled) { yield return(YieldTranslatablePart(m_words.Skip(minUnhandled).Take(m_iStartMatch - minUnhandled), m_phrase)); } m_keyTermsUsedForPhrase.Add(bestKeyTerm); bestKeyTerm.InUse = true; yield return(new ParsedPart(bestKeyTerm)); m_iStartMatch = m_iNextWord = minUnhandled = m_iStartMatch + keyTermWordCount; } } if (minUnhandled < m_words.Count) { yield return(YieldTranslatablePart(m_words.Skip(minUnhandled), m_phrase)); } }