public void AppliesTo_TermDoesNotOccurInRange_ReturnsFalse()
        {
            IKeyTerm     term  = KeyTermMatchBuilderTests.AddMockedKeyTerm("tom", 002003001, 002003003);
            KeyTermMatch match = new KeyTermMatch(new List <Word>(new Word[] { "tom" }), term, true);

            Assert.IsFalse(match.AppliesTo(002003002, 002003002));
        }
        public void AppliesTo_TermAllowedToMatchAnywhere_ReturnsTrue()
        {
            IKeyTerm     term  = KeyTermMatchBuilderTests.AddMockedKeyTerm("tom");
            KeyTermMatch match = new KeyTermMatch(new List <Word>(new Word[] { "tom" }), term, false);

            Assert.IsTrue(match.AppliesTo(-1, -1));
        }
 /// ------------------------------------------------------------------------------------
 /// <summary>
 /// Copy constructor (only valid for an object having a single term).
 /// </summary>
 /// <param name="matchBase">The base match from which this match will be created.</param>
 /// ------------------------------------------------------------------------------------
 internal KeyTermMatch(KeyTermMatch matchBase) :
     this(matchBase.m_words, matchBase.m_terms[0], matchBase.MatchForRefOnly)
 {
     if (matchBase.m_terms.Count != 1)
     {
         throw new ArgumentException("KeyTermMatch copy constructor only valid for making" +
                                     " copies of a new in-progress match with a single underlying key term.");
     }
 }
Example #4
0
        /// ------------------------------------------------------------------------------------
        /// <summary>
        /// Determines whether the part of the phrase we're considering matches the key term so
        /// far.
        /// </summary>
        /// ------------------------------------------------------------------------------------
        private bool PhraseEqualsKeyTermSoFar(KeyTermMatch term, int cMatchingWordsInTermSoFar)
        {
            int cCompare = Math.Min(term.WordCount, cMatchingWordsInTermSoFar);

            for (int iWord = m_iStartMatch; iWord < cCompare + m_iStartMatch; iWord++)
            {
                if (!term[iWord - m_iStartMatch].IsEquivalent(m_words[iWord]))
                {
                    return(false);
                }
            }
            return(true);
        }
        public void RuleToLimitMatchToTermRefs()
        {
            Dictionary <string, KeyTermRule> rules = new Dictionary <string, KeyTermRule>();
            KeyTermRule rule = new KeyTermRule();

            rule.id        = "ask";
            rule.Rule      = KeyTermRule.RuleType.MatchForRefOnly;
            rules[rule.id] = rule;
            KeyTermMatchBuilder bldr = new KeyTermMatchBuilder(AddMockedKeyTerm(rule.id, 34),
                                                               new ReadonlyDictionary <string, KeyTermRule>(rules), null);

            Assert.AreEqual(1, bldr.Matches.Count());
            KeyTermMatch ktm = VerifyKeyTermMatch(bldr, 0, false, "ask");

            Assert.IsFalse(ktm.AppliesTo(30, 33));
            Assert.IsTrue(ktm.AppliesTo(34, 34));
            Assert.IsFalse(ktm.AppliesTo(35, 39));
        }
        /// ------------------------------------------------------------------------------------
        /// <summary>
        /// Verifies the key term match.
        /// </summary>
        /// ------------------------------------------------------------------------------------
        private static KeyTermMatch VerifyKeyTermMatch(KeyTermMatchBuilder bldr, int iMatch,
                                                       bool matchAnywhere, params string[] words)
        {
            KeyTermMatch ktm = bldr.Matches.ElementAt(iMatch);

            Assert.AreEqual(words.Length, ktm.WordCount);
            for (int i = 0; i < words.Length; i++)
            {
                Assert.AreEqual(words[i], ktm[i].Text);
            }
            Assert.IsTrue(ktm.MatchForRefOnly != matchAnywhere);
            // The following is really a test of the KeyTermMatch.AppliesTo method:
            if (matchAnywhere)
            {
                Random r = new Random(DateTime.Now.Millisecond);
                Assert.IsTrue(ktm.AppliesTo(r.Next(), r.Next()));
            }
            return(ktm);
        }
        /// ------------------------------------------------------------------------------------
        /// <summary>
        /// Populates the key terms table.
        /// </summary>
        /// ------------------------------------------------------------------------------------
        private void PopulateKeyTermsTable(IEnumerable <IKeyTerm> keyTerms, KeyTermRules rules)
        {
            KeyTermMatchBuilder matchBuilder;

            foreach (IKeyTerm keyTerm in keyTerms)
            {
                matchBuilder = new KeyTermMatchBuilder(keyTerm,
                                                       rules == null ? null : rules.RulesDictionary, rules == null ? null : rules.RegexRules);

                foreach (KeyTermMatch matcher in matchBuilder.Matches.Where(matcher => matcher.WordCount != 0))
                {
                    List <KeyTermMatch> foundMatchers;
                    Word firstWord = matcher[0];
                    if (!m_keyTermsTable.TryGetValue(firstWord, out foundMatchers))
                    {
                        m_keyTermsTable[firstWord] = foundMatchers = new List <KeyTermMatch>();
                    }

                    KeyTermMatch existingMatcher = foundMatchers.FirstOrDefault(m => m.Equals(matcher));
                    if (existingMatcher == null)
                    {
                        foundMatchers.Add(matcher);
                    }
                    else
                    {
                        existingMatcher.AddTerm(keyTerm);
                    }
                }
            }

#if DEBUG
            if (rules != null)
            {
                string unUsedRules = rules.RulesDictionary.Values.Where(r => !r.Used).ToString(Environment.NewLine);
                if (unUsedRules.Length > 0)
                {
                    MessageBox.Show("Unused KeyTerm Rules: \n" + unUsedRules, "Transcelerator");
                }
            }
#endif
        }
Example #8
0
        /// ------------------------------------------------------------------------------------
        /// <summary>
        /// Finds the best key term in the list of words starting at m_iStartMatch and including
        /// up to m_iNextWord. As new words are considered, the list of possible matches
        /// (m_matches) is reduced by any that no longer match until there is exactly one match
        /// that exactly equals the words in the key term or the list is empty.
        /// </summary>
        /// ------------------------------------------------------------------------------------
        private KeyTermMatch FindBestKeyTerm()
        {
            if (m_keyTermsTable == null)
            {
                return(null);
            }

            Word nextWord = m_words[m_iNextWord];

            if (m_iStartMatch == m_iNextWord)
            {
                List <KeyTermMatch> matches;
                m_matches = null;
                if (m_keyTermsTable.TryGetValue(nextWord, out matches))
                {
                    m_matches = new List <KeyTermMatch>(matches.Where(m => m.AppliesTo(m_phrase.StartRef, m_phrase.EndRef)));
                }
                if (m_matches == null || m_matches.All(m => m.WordCount > 1))
                {
                    var  baseWord = nextWord.Text;
                    Word stem     = s_stemmer.stemTerm(baseWord);

                    while (stem.Text != baseWord)
                    {
                        if (m_keyTermsTable.TryGetValue(stem, out matches))
                        {
                            stem.AddAlternateForm(nextWord);
                            matches = new List <KeyTermMatch>(matches.Where(m => m.AppliesTo(m_phrase.StartRef, m_phrase.EndRef)));
                            if (m_matches == null)
                            {
                                m_matches = matches;
                            }
                            else
                            {
                                m_matches.AddRange(matches);
                            }
                        }
                        baseWord = stem.Text;
                        stem     = s_stemmer.stemTerm(baseWord);
                    }
                    if (m_matches == null || m_matches.Count == 0)
                    {
                        m_iStartMatch++;
                        return(null);
                    }
                }

                // If we found a one-word exact match and there are no other key terms that start
                // with that word, then we return it. The code below would handle this, but it's such
                // a common case, we want it to be fast. If there are one or more multi-word key
                // terms that start with this word, we need to keep looking.
                if (m_matches.Count == 1 && m_matches[0].WordCount == 1)
                {
                    return(m_matches[0]);
                }
            }

            int          cMatchingWordsInTermSoFar = m_iNextWord - m_iStartMatch + 1;
            int          lengthOfBestMatch         = 0;
            KeyTermMatch longestMatch = null;

            // Remove from the possible matches any that don't match so far
            for (int iTerm = 0; iTerm < m_matches.Count; iTerm++)
            {
                KeyTermMatch term = m_matches[iTerm];
                if (!PhraseEqualsKeyTermSoFar(term, cMatchingWordsInTermSoFar) ||
                    (AtEndOfPhrase && term.WordCount > cMatchingWordsInTermSoFar))
                {
                    m_matches.RemoveAt(iTerm--);
                }
                else if (term.WordCount > lengthOfBestMatch)
                {
                    lengthOfBestMatch = term.WordCount;
                    longestMatch      = term;
                }
            }

            if (m_matches.Count == 0)
            {
                // The only matches we had were multi-word matches, and the addition of the current
                // word made it so that none of them matched. Therefore, we don't have a key term
                // starting at iStartMatch.
                m_iNextWord = m_iStartMatch;                 // The for loop in Parse will increment this.
                m_iStartMatch++;
                return(null);
            }

            if ((m_matches.Count == 1 && lengthOfBestMatch < cMatchingWordsInTermSoFar) || (lengthOfBestMatch == cMatchingWordsInTermSoFar))
            {
                return(longestMatch);
            }

            return(null);
        }
Example #9
0
        /// ------------------------------------------------------------------------------------
        /// <summary>
        /// Cleans up the given source-language phrase (makes it all lowercase and removes
        /// punctuation and extraneous whitespace) and returns a collection of phrase parts,
        /// broken up by key terms.
        /// </summary>
        /// <returns>Collection of phrase parts</returns>
        /// ------------------------------------------------------------------------------------
        internal IEnumerable <ParsedPart> Parse()
        {
            KeyTermMatch bestKeyTerm  = null;
            int          minUnhandled = m_iStartMatch = m_iNextWord = 0;

            if (m_questionWords != null)
            {
                foreach (int count in m_questionWords.Keys.OrderByDescending(k => k))
                {
                    foreach (List <Word> questionPhrase in m_questionWords[count])
                    {
                        bool match = true;
                        int  i;
                        for (i = 0; i < count; i++)
                        {
                            if (m_iNextWord + i >= m_words.Count || questionPhrase[i] != m_words[m_iNextWord + i])
                            {
                                match = false;
                                break;
                            }
                        }
                        if (match)
                        {
                            yield return(YieldTranslatablePart(m_words.Take(count), m_phrase));

                            m_iStartMatch = m_iNextWord = minUnhandled = count;
                            break;
                        }
                    }
                }
            }

            for (; m_iNextWord < m_words.Count;)
            {
                bestKeyTerm = FindBestKeyTerm();
                if (bestKeyTerm == null)
                {
                    if (m_words[m_iNextWord].IsNumber)
                    {
                        if (m_iNextWord > minUnhandled)
                        {
                            yield return(YieldTranslatablePart(m_words.Skip(minUnhandled).Take(m_iNextWord - minUnhandled), m_phrase));
                        }
                        yield return(new ParsedPart(Int32.Parse(m_words[m_iNextWord].Text)));

                        m_iStartMatch = minUnhandled = ++m_iNextWord;
                    }
                    else
                    {
                        m_iNextWord++;
                    }
                }
                else
                {
                    // We've found the best key term we're going to find.
                    int keyTermWordCount = bestKeyTerm.WordCount;
                    if (m_iStartMatch > minUnhandled)
                    {
                        yield return(YieldTranslatablePart(m_words.Skip(minUnhandled).Take(m_iStartMatch - minUnhandled), m_phrase));
                    }
                    m_keyTermsUsedForPhrase.Add(bestKeyTerm);
                    bestKeyTerm.InUse = true;
                    yield return(new ParsedPart(bestKeyTerm));

                    m_iStartMatch = m_iNextWord = minUnhandled = m_iStartMatch + keyTermWordCount;
                }
            }

            if (minUnhandled < m_words.Count)
            {
                yield return(YieldTranslatablePart(m_words.Skip(minUnhandled), m_phrase));
            }
        }