public void Split() { Assert.That(Icu.Split(Icu.UBreakIteratorType.UBRK_WORD, "en", "word"), Is.EqualTo(new[] { "word" })); Assert.That(Icu.Split(Icu.UBreakIteratorType.UBRK_WORD, "en", "This is some text, and some more text."), Is.EqualTo(new[] { "This", " ", "is", " ", "some", " ", "text", ",", " ", "and", " ", "some", " ", "more", " ", "text", "." })); Assert.That(Icu.Split(Icu.UBreakIteratorType.UBRK_SENTENCE, "en", "Sentence one. Sentence two."), Is.EqualTo(new[] { "Sentence one. ", "Sentence two." })); Assert.That(Icu.Split(Icu.UBreakIteratorType.UBRK_CHARACTER, "en", "word"), Is.EqualTo(new[] { "w", "o", "r", "d" })); Assert.That(Icu.Split(Icu.UBreakIteratorType.UBRK_LINE, "en", "This is some hyphenated-text."), Is.EqualTo(new[] { "This ", "is ", "some ", "hyphenated-", "text." })); }
/// <summary> /// Initializes a new instance of the <see cref="StringSearcher<T>"/> class. /// </summary> /// <param name="type">The type.</param> /// <param name="wsManager">The writing system store.</param> public StringSearcher(SearchType type, WritingSystemManager wsManager) { if (wsManager == null) { throw new ArgumentNullException("wsManager"); } m_type = type; m_sortKeySelector = (ws, text) => wsManager.Get(ws).DefaultCollation.Collator.GetSortKey(text).KeyData; m_tokenizer = (ws, text) => Icu.Split(Icu.UBreakIteratorType.UBRK_WORD, wsManager.Get(ws).IcuLocale, text); }
private void Add(int indexId, int wsId, string text, T item) { SortKeyIndex index = GetIndex(indexId, wsId); IWritingSystem ws = m_wsManager.Get(wsId); ICollator collator = ws.Collator; switch (m_type) { case SearchType.Exact: case SearchType.Prefix: index.Add(collator.GetSortKey(text).KeyData, item); break; case SearchType.FullText: foreach (string token in Icu.Split(Icu.UBreakIteratorType.UBRK_WORD, ws.IcuLocale, text)) { index.Add(collator.GetSortKey(token).KeyData, item); } break; } }
/// <summary> /// Searches an index for the specified string. /// </summary> /// <param name="indexId">The index ID.</param> /// <param name="tss">The string.</param> /// <returns>The search results.</returns> public IEnumerable <T> Search(int indexId, ITsString tss) { if (tss == null || string.IsNullOrEmpty(tss.Text)) { return(Enumerable.Empty <T>()); } HashSet <T> results = null; foreach (Tuple <int, string> wsStr in GetWsStrings(tss)) { SortKeyIndex index = GetIndex(indexId, wsStr.Item1); ICollator collator = m_wsManager.Get(wsStr.Item1).Collator; switch (m_type) { case SearchType.Exact: case SearchType.Prefix: { byte[] sortKey = collator.GetSortKey(wsStr.Item2).KeyData; var lower = new byte[wsStr.Item2.Length * SortKeyFactor]; Icu.GetSortKeyBound(sortKey, Icu.UColBoundMode.UCOL_BOUND_LOWER, ref lower); var upper = new byte[wsStr.Item2.Length * SortKeyFactor]; Icu.GetSortKeyBound(sortKey, m_type == SearchType.Exact ? Icu.UColBoundMode.UCOL_BOUND_UPPER : Icu.UColBoundMode.UCOL_BOUND_UPPER_LONG, ref upper); IEnumerable <T> items = index.GetItems(lower, upper); if (results == null) { results = new HashSet <T>(items); } else { results.IntersectWith(items); } break; } case SearchType.FullText: string locale = m_wsManager.GetStrFromWs(wsStr.Item1); string[] tokens = Icu.Split(Icu.UBreakIteratorType.UBRK_WORD, locale, wsStr.Item2).ToArray(); for (int i = 0; i < tokens.Length; i++) { byte[] sortKey = collator.GetSortKey(tokens[i]).KeyData; var lower = new byte[tokens[i].Length * SortKeyFactor]; Icu.GetSortKeyBound(sortKey, Icu.UColBoundMode.UCOL_BOUND_LOWER, ref lower); var upper = new byte[tokens[i].Length * SortKeyFactor]; Icu.GetSortKeyBound(sortKey, i < tokens.Length - 1 ? Icu.UColBoundMode.UCOL_BOUND_UPPER : Icu.UColBoundMode.UCOL_BOUND_UPPER_LONG, ref upper); IEnumerable <T> items = index.GetItems(lower, upper); if (results == null) { results = new HashSet <T>(items); } else { results.IntersectWith(items); } } break; } } return(results ?? Enumerable.Empty <T>()); }