/// ------------------------------------------------------------------------------------ /// <summary> /// Initializes a new instance of the <see cref="KeyTermMatchBuilder"/> class. /// </summary> /// <param name="keyTerm">The key term.</param> /// <param name="rules">Optional dictionary of (English) key terms to rules indicating /// special handling neeeded.</param> /// ------------------------------------------------------------------------------------ public KeyTermMatchBuilder(IKeyTerm keyTerm, Dictionary<string, KeyTermRule> rules) { string normalizedLcTerm = keyTerm.Term.ToLowerInvariant().Normalize(NormalizationForm.FormD); KeyTermRule ktRule; if (rules != null && rules.TryGetValue(normalizedLcTerm, out ktRule)) { bool fExcludeMainTerm = false; if (ktRule.Rule != null) { switch (ktRule.Rule) { case KeyTermRule.RuleType.Exclude: fExcludeMainTerm = true; break; case KeyTermRule.RuleType.MatchForRefOnly: m_fMatchForRefOnly = true; break; } } if (ktRule.Alternates != null) { foreach (string phrase in ktRule.Alternates.Select(a => a.Name)) ProcessKeyTermPhrase(keyTerm, phrase); } if (fExcludeMainTerm) return; } foreach (string phrase in normalizedLcTerm.Split(new[] { ", or ", ",", "=" }, StringSplitOptions.RemoveEmptyEntries)) ProcessKeyTermPhrase(keyTerm, phrase); }
public void GetNormalRenderings() { IKeyTerm ktFun = KeyTermMatchBuilderTests.AddMockedKeyTerm("diversion"); ktFun.Stub(kt => kt.Renderings).Return(new [] { "abc", "xyz" }); KeyTermMatchBuilder bldr = new KeyTermMatchBuilder(ktFun); KeyTermMatch matchFun = bldr.Matches.First(); Assert.IsTrue(matchFun.Renderings.SequenceEqual(ktFun.Renderings)); }
public void CanRenderingBeDeleted_NonExistentRendering() { IKeyTerm ktFun = KeyTermMatchBuilderTests.AddMockedKeyTerm("having a blast"); ktFun.Stub(kt => kt.Renderings).Return(new[] { "abc" }); KeyTermMatchBuilder bldr = new KeyTermMatchBuilder(ktFun); KeyTermMatch matchFun = bldr.Matches.First(); Assert.IsFalse(matchFun.CanRenderingBeDeleted("xyz")); }
public void AddRenderingFailsToAddDuplicate() { IKeyTerm ktFun = KeyTermMatchBuilderTests.AddMockedKeyTerm("good times"); ktFun.Stub(kt => kt.Renderings).Return(new[] { "abc", "xyz" }); KeyTermMatchBuilder bldr = new KeyTermMatchBuilder(ktFun); KeyTermMatch matchFun = bldr.Matches.First(); Assert.Throws(typeof(ArgumentException), () => matchFun.AddRendering("abc")); }
/// ------------------------------------------------------------------------------------ /// <summary> /// Initializes a new instance of the <see cref="KeyTermMatch"/> class. /// </summary> /// <param name="words">The words.</param> /// <param name="term">The term.</param> /// <param name="matchForRefOnly">if set to <c>true</c> [match for ref only].</param> /// ------------------------------------------------------------------------------------ internal KeyTermMatch(IEnumerable<Word> words, IKeyTerm term, bool matchForRefOnly) { m_matchForRefOnly = matchForRefOnly; m_words = words.ToList(); m_terms = new List<IKeyTerm>(); m_terms.Add(term); KeyTermRenderingInfo info = m_keyTermRenderingInfo.FirstOrDefault(i => i.TermId == Term); if (info != null) m_bestTranslation = info.PreferredRendering; }
public void CanRenderingBeDeleted_DefaultRendering() { IKeyTerm ktFun = KeyTermMatchBuilderTests.AddMockedKeyTerm("time of my life"); ktFun.Stub(kt => kt.Renderings).Return(new[] { "abc" }); KeyTermMatchBuilder bldr = new KeyTermMatchBuilder(ktFun); KeyTermMatch matchFun = bldr.Matches.First(); matchFun.AddRendering("bestest"); matchFun.BestRendering = "bestest"; Assert.IsFalse(matchFun.CanRenderingBeDeleted("bestest")); }
/// ------------------------------------------------------------------------------------ /// <summary> /// Initializes a new instance of the <see cref="KeyTermMatch"/> class. /// </summary> /// <param name="words">The words.</param> /// <param name="term">The term.</param> /// <param name="matchForRefOnly">if set to <c>true</c> [match for ref only].</param> /// ------------------------------------------------------------------------------------ internal KeyTermMatch(IEnumerable <Word> words, IKeyTerm term, bool matchForRefOnly) { m_matchForRefOnly = matchForRefOnly; m_words = words.ToList(); m_terms = new List <IKeyTerm>(); m_terms.Add(term); KeyTermRenderingInfo info = m_keyTermRenderingInfo.FirstOrDefault(i => i.TermId == Term); if (info != null) { m_bestTranslation = info.PreferredRendering; } }
/// ------------------------------------------------------------------------------------ /// <summary> /// Processes a (potentially multi-word) phrase for a key term, adding at least one new /// KeyTermMatch and appending the new word(s) to any existing match(es) that should /// include this phrase as part of the sequence of words used to form a match. /// </summary> /// ------------------------------------------------------------------------------------ private void AddMatchesForPhrase(IKeyTerm keyTerm, string phrase, bool fMatchForRefOnly, bool createExtraMatchIfPhraseStartsWithTo, int startOfListForPhrase) { // Initially, we add one empty list m_list.Add(new KeyTermMatch(new Word[0], keyTerm, fMatchForRefOnly)); foreach (Word metaWord in phrase.Split(new[] { ' ', '\u00a0' }, StringSplitOptions.RemoveEmptyEntries).Select(s => s.Trim('\''))) { List <Word> allWords = AllWords(metaWord, createExtraMatchIfPhraseStartsWithTo); if (allWords.Count > 0) { AddWordsToMatches(allWords, startOfListForPhrase); } createExtraMatchIfPhraseStartsWithTo = false; } }
public void AddAndRemoveRenderings() { IKeyTerm ktFun = KeyTermMatchBuilderTests.AddMockedKeyTerm("fun"); ktFun.Stub(kt => kt.Renderings).Return(new [] { "abc", "xyz" }); KeyTermMatchBuilder bldr = new KeyTermMatchBuilder(ktFun); KeyTermMatch matchFun = bldr.Matches.First(); matchFun.AddRendering("wunkyboo"); Assert.AreEqual(3, matchFun.Renderings.Count()); Assert.IsTrue(matchFun.Renderings.Contains("wunkyboo")); Assert.IsTrue(matchFun.CanRenderingBeDeleted("wunkyboo")); Assert.IsFalse(matchFun.CanRenderingBeDeleted("abc")); matchFun.DeleteRendering("wunkyboo"); Assert.IsFalse(matchFun.Renderings.Contains("wunkyboo")); }
/// ------------------------------------------------------------------------------------ /// <summary> /// Processes a single alternative word or phrase for a key term. Most key terms have a /// simple "source" (actually English) rendering that consists of a single word or /// phrase. But some have multiple alternative words or phrases; hence, this method. /// </summary> /// ------------------------------------------------------------------------------------ private void ProcessKeyTermPhrase(IKeyTerm keyTerm, string phrase) { int startOfListForPhrase = m_list.Count; string[] orParts = phrase.Split(new [] { " or " }, 2, StringSplitOptions.RemoveEmptyEntries); if (orParts.Length == 2) { int ichEndOfPreOrPhrase = orParts[0].Length; int ichStartOfPostOrPhrase = 0; int ichPre, ichPost; do { ichPre = orParts[0].LastIndexOf(' ', ichEndOfPreOrPhrase - 1); ichPost = orParts[1].IndexOf(' ', ichStartOfPostOrPhrase + 1); ichEndOfPreOrPhrase = (ichPre >= 0) ? ichPre : 0; ichStartOfPostOrPhrase = (ichPost >= 0) ? ichPost : orParts[1].Length; } while (ichEndOfPreOrPhrase > 0 && ichPost >= 0); if (ichEndOfPreOrPhrase > 0) { ichEndOfPreOrPhrase++; } ProcessKeyTermPhrase(keyTerm, orParts[0] + orParts[1].Substring(ichStartOfPostOrPhrase)); ProcessKeyTermPhrase(keyTerm, orParts[0].Substring(0, ichEndOfPreOrPhrase) + orParts[1]); return; } // Initially, we add one empty list m_list.Add(new KeyTermMatch(new Word[0], keyTerm, m_fMatchForRefOnly)); bool firstWordOfPhrase = true; foreach (Word metaWord in phrase.Split(new[] { ' ' }, StringSplitOptions.RemoveEmptyEntries).Select(s => s.Trim('\''))) { List <Word> allWords = AllWords(metaWord, firstWordOfPhrase); if (allWords.Count > 0) { AddWordsToMatches(keyTerm, allWords, startOfListForPhrase); } firstWordOfPhrase = false; } if (m_fInOptionalPhrase) { AddWordsToMatches(keyTerm, m_optionalPhraseWords, startOfListForPhrase); } }
/// ------------------------------------------------------------------------------------ /// <summary> /// Processes a single alternative word or phrase for a key term. Most key terms have a /// simple "source" (actually English) rendering that consists of a single word or /// phrase. But some have multiple alternative words or phrases; hence, this method. /// </summary> /// ------------------------------------------------------------------------------------ private void ProcessKeyTermPhrase(IKeyTerm keyTerm, string phrase) { int startOfListForPhrase = m_list.Count; string[] orParts = phrase.Split(new [] {" or "}, 2, StringSplitOptions.RemoveEmptyEntries); if (orParts.Length == 2) { int ichEndOfPreOrPhrase = orParts[0].Length; int ichStartOfPostOrPhrase = 0; int ichPre, ichPost; do { ichPre = orParts[0].LastIndexOf(' ', ichEndOfPreOrPhrase - 1); ichPost = orParts[1].IndexOf(' ', ichStartOfPostOrPhrase + 1); ichEndOfPreOrPhrase = (ichPre >= 0) ? ichPre : 0; ichStartOfPostOrPhrase = (ichPost >= 0) ? ichPost : orParts[1].Length; } while (ichEndOfPreOrPhrase > 0 && ichPost >= 0); if (ichEndOfPreOrPhrase > 0) ichEndOfPreOrPhrase++; ProcessKeyTermPhrase(keyTerm, orParts[0] + orParts[1].Substring(ichStartOfPostOrPhrase)); ProcessKeyTermPhrase(keyTerm, orParts[0].Substring(0, ichEndOfPreOrPhrase) + orParts[1]); return; } // Initially, we add one empty list m_list.Add(new KeyTermMatch(new Word[0], keyTerm, m_fMatchForRefOnly)); bool firstWordOfPhrase = true; foreach (Word metaWord in phrase.Split(new[]{' '}, StringSplitOptions.RemoveEmptyEntries).Select(s => s.Trim('\''))) { List<Word> allWords = AllWords(metaWord, firstWordOfPhrase); if (allWords.Count > 0) AddWordsToMatches(keyTerm, allWords, startOfListForPhrase); firstWordOfPhrase = false; } if (m_fInOptionalPhrase) AddWordsToMatches(keyTerm, m_optionalPhraseWords, startOfListForPhrase); }
/// ------------------------------------------------------------------------------------ /// <summary> /// Initializes a new instance of the <see cref="KeyTermMatchBuilder"/> class. /// </summary> /// ------------------------------------------------------------------------------------ public KeyTermMatchBuilder(IKeyTerm keyTerm) : this(keyTerm, null) { }
public DummyKeyTermRenderingInfo(IKeyTerm keyTerm, int endOffsetOfPrev) { Term = keyTerm; EndOffsetOfRenderingOfPreviousOccurrenceOfThisTerm = endOffsetOfPrev; }
private KeyTermMatchSurrogate m_surrogate; // Cached for efficiency #endregion #region Constructors /// ------------------------------------------------------------------------------------ /// <summary> /// Initializes a new instance of the <see cref="KeyTermMatch"/> class that doesn't /// (yet) match on any words. /// </summary> /// <param name="term">The term.</param> /// <param name="matchForRefOnly">if set to <c>true</c> [match for ref only].</param> /// ------------------------------------------------------------------------------------ internal KeyTermMatch(IKeyTerm term, bool matchForRefOnly) : this(new Word[0], term, matchForRefOnly) { }
public void AddTerm(IKeyTerm keyTerm) { if (keyTerm == null) throw new ArgumentNullException("keyTerm"); m_terms.Add(keyTerm); }
/// ------------------------------------------------------------------------------------ /// <summary> /// Adds the words to matches. If adding more than one word, then this represents an /// optional word/phrase, which results in doubling the number of matches for the /// current phrase. /// </summary> /// <param name="keyTerm">The key term.</param> /// <param name="words">The words to append to the matches' word lists.</param> /// <param name="startOfListForPhrase">The index of the position in m_list that /// corresponds to the start of the matches relevant to the current phrase.</param> /// ------------------------------------------------------------------------------------ private void AddWordsToMatches(IKeyTerm keyTerm, List<Word> words, int startOfListForPhrase) { int originalCount = m_list.Count; if (words.Count > 1) { // Spawn a new copy of each matching phrase for this metaword. m_list.AddRange(m_list.Skip(startOfListForPhrase).Select(k => new KeyTermMatch(k.Words, keyTerm, m_fMatchForRefOnly)).ToList()); } Word word = words[0]; for (int index = (word == null || m_fInOptionalPhrase) ? originalCount : startOfListForPhrase; index < m_list.Count; index++) { if (m_fInOptionalPhrase) m_list[index].AddWords(words); else { if (index == originalCount) word = words[1]; m_list[index].AddWord(word); } } m_fInOptionalPhrase = false; }
/// ------------------------------------------------------------------------------------ /// <summary> /// Initializes a new instance of the <see cref="KeyTermMatchBuilder"/> class. /// </summary> /// <param name="keyTerm">The key term.</param> /// <param name="rules">Optional dictionary of (English) key terms to rules indicating /// special handling neeeded.</param> /// <param name="regexRules">regular-expression-based rules. If any term matches a /// regular expression in this collection, terms will be extracted using the variable /// "term". If the term variable is not present in the regular expression, this term /// will be excluded.</param> /// ------------------------------------------------------------------------------------ public KeyTermMatchBuilder(IKeyTerm keyTerm, IReadonlyDictionary <string, KeyTermRule> rules, IEnumerable <Regex> regexRules) { string normalizedLcTerm = keyTerm.Term.ToLowerInvariant().Normalize(NormalizationForm.FormC); KeyTermRule ktRule; bool fMatchForRefOnly = false; if (rules != null && rules.TryGetValue(normalizedLcTerm, out ktRule)) { ktRule.Used = true; bool fExcludeMainTerm = false; if (ktRule.Rule != null) { switch (ktRule.Rule) { case KeyTermRule.RuleType.Exclude: fExcludeMainTerm = true; break; case KeyTermRule.RuleType.MatchForRefOnly: fMatchForRefOnly = true; break; } } if (ktRule.Alternates != null) { foreach (KeyTermRulesKeyTermRuleAlternate alt in ktRule.Alternates) { AddMatchesForPhrase(keyTerm, alt.Name, fMatchForRefOnly || alt.MatchForRefOnly, false, m_list.Count); } } if (fExcludeMainTerm) { return; } } else if (regexRules != null) { foreach (Regex regexRule in regexRules) { Match match = regexRule.Match(normalizedLcTerm); while (match.Success) { string term = match.Result("${term}"); if (term == "${term}") { return; // No "term" variable found, so this rule excludes the term } foreach (string phrase in term.Split(new[] { ", or ", "," }, StringSplitOptions.RemoveEmptyEntries)) { ProcessKeyTermPhrase(keyTerm, phrase, false); // for now, at least reg-ex based rules can't be reference-dependent } match = match.NextMatch(); } if (m_list.Count > 0) { return; } } } foreach (string phrase in normalizedLcTerm.Split(new[] { ", or ", ",", ";", "=" }, StringSplitOptions.RemoveEmptyEntries)) { ProcessKeyTermPhrase(keyTerm, phrase, fMatchForRefOnly); } }