Esempio n. 1
0
 public void Split()
 {
     Assert.That(Icu.Split(Icu.UBreakIteratorType.UBRK_WORD, "en", "word"), Is.EqualTo(new[] { "word" }));
     Assert.That(Icu.Split(Icu.UBreakIteratorType.UBRK_WORD, "en", "This is some text, and some more text."),
                 Is.EqualTo(new[] { "This", " ", "is", " ", "some", " ", "text", ",", " ", "and", " ", "some", " ", "more", " ", "text", "." }));
     Assert.That(Icu.Split(Icu.UBreakIteratorType.UBRK_SENTENCE, "en", "Sentence one. Sentence two."), Is.EqualTo(new[] { "Sentence one. ", "Sentence two." }));
     Assert.That(Icu.Split(Icu.UBreakIteratorType.UBRK_CHARACTER, "en", "word"), Is.EqualTo(new[] { "w", "o", "r", "d" }));
     Assert.That(Icu.Split(Icu.UBreakIteratorType.UBRK_LINE, "en", "This is some hyphenated-text."), Is.EqualTo(new[] { "This ", "is ", "some ", "hyphenated-", "text." }));
 }
Esempio n. 2
0
        /// <summary>
        /// Initializes a new instance of the <see cref="StringSearcher&lt;T&gt;"/> class.
        /// </summary>
        /// <param name="type">The type.</param>
        /// <param name="wsManager">The writing system store.</param>
        public StringSearcher(SearchType type, WritingSystemManager wsManager)
        {
            if (wsManager == null)
            {
                throw new ArgumentNullException("wsManager");
            }

            m_type            = type;
            m_sortKeySelector = (ws, text) => wsManager.Get(ws).DefaultCollation.Collator.GetSortKey(text).KeyData;
            m_tokenizer       = (ws, text) => Icu.Split(Icu.UBreakIteratorType.UBRK_WORD, wsManager.Get(ws).IcuLocale, text);
        }
        private void Add(int indexId, int wsId, string text, T item)
        {
            SortKeyIndex   index    = GetIndex(indexId, wsId);
            IWritingSystem ws       = m_wsManager.Get(wsId);
            ICollator      collator = ws.Collator;

            switch (m_type)
            {
            case SearchType.Exact:
            case SearchType.Prefix:
                index.Add(collator.GetSortKey(text).KeyData, item);
                break;

            case SearchType.FullText:
                foreach (string token in Icu.Split(Icu.UBreakIteratorType.UBRK_WORD, ws.IcuLocale, text))
                {
                    index.Add(collator.GetSortKey(token).KeyData, item);
                }
                break;
            }
        }
        /// <summary>
        /// Searches an index for the specified string.
        /// </summary>
        /// <param name="indexId">The index ID.</param>
        /// <param name="tss">The string.</param>
        /// <returns>The search results.</returns>
        public IEnumerable <T> Search(int indexId, ITsString tss)
        {
            if (tss == null || string.IsNullOrEmpty(tss.Text))
            {
                return(Enumerable.Empty <T>());
            }

            HashSet <T> results = null;

            foreach (Tuple <int, string> wsStr in GetWsStrings(tss))
            {
                SortKeyIndex index    = GetIndex(indexId, wsStr.Item1);
                ICollator    collator = m_wsManager.Get(wsStr.Item1).Collator;
                switch (m_type)
                {
                case SearchType.Exact:
                case SearchType.Prefix:
                {
                    byte[] sortKey = collator.GetSortKey(wsStr.Item2).KeyData;
                    var    lower   = new byte[wsStr.Item2.Length * SortKeyFactor];
                    Icu.GetSortKeyBound(sortKey, Icu.UColBoundMode.UCOL_BOUND_LOWER, ref lower);
                    var upper = new byte[wsStr.Item2.Length * SortKeyFactor];
                    Icu.GetSortKeyBound(sortKey,
                                        m_type == SearchType.Exact
                                                                                                        ? Icu.UColBoundMode.UCOL_BOUND_UPPER
                                                                                                        : Icu.UColBoundMode.UCOL_BOUND_UPPER_LONG, ref upper);
                    IEnumerable <T> items = index.GetItems(lower, upper);
                    if (results == null)
                    {
                        results = new HashSet <T>(items);
                    }
                    else
                    {
                        results.IntersectWith(items);
                    }
                    break;
                }

                case SearchType.FullText:
                    string   locale = m_wsManager.GetStrFromWs(wsStr.Item1);
                    string[] tokens = Icu.Split(Icu.UBreakIteratorType.UBRK_WORD, locale, wsStr.Item2).ToArray();
                    for (int i = 0; i < tokens.Length; i++)
                    {
                        byte[] sortKey = collator.GetSortKey(tokens[i]).KeyData;
                        var    lower   = new byte[tokens[i].Length * SortKeyFactor];
                        Icu.GetSortKeyBound(sortKey, Icu.UColBoundMode.UCOL_BOUND_LOWER, ref lower);
                        var upper = new byte[tokens[i].Length * SortKeyFactor];
                        Icu.GetSortKeyBound(sortKey,
                                            i < tokens.Length - 1
                                                                                                        ? Icu.UColBoundMode.UCOL_BOUND_UPPER
                                                                                                        : Icu.UColBoundMode.UCOL_BOUND_UPPER_LONG, ref upper);
                        IEnumerable <T> items = index.GetItems(lower, upper);
                        if (results == null)
                        {
                            results = new HashSet <T>(items);
                        }
                        else
                        {
                            results.IntersectWith(items);
                        }
                    }
                    break;
                }
            }
            return(results ?? Enumerable.Empty <T>());
        }