IsSplitChar() public static method

Determines whether a char is a split char.
public static IsSplitChar ( char current ) : bool
current char The current char.
return bool
Exemplo n.º 1
0
        /// <summary>
        /// Tokenizes a string.
        /// </summary>
        /// <param name="text">The text to tokenize.</param>
        /// <param name="location">The location of the words that are extracted.</param>
        /// <returns>The tokens.</returns>
        /// <exception cref="ArgumentNullException">If <paramref name="text"/> is <c>null</c>.</exception>
        public static WordInfo[] Tokenize(string text, WordLocation location)
        {
            if (text == null)
            {
                throw new ArgumentNullException("text");
            }

            List <WordInfo> words = new List <WordInfo>(text.Length / 5);           // Average 5 chars/word

            ushort currentIndex = 0, currentWordStart;

            // Skip all trailing splitChars
            currentIndex = SkipSplitChars(0, text);

            currentWordStart = currentIndex;

            while (currentIndex < text.Length && currentIndex < 65500)
            {
                while (currentIndex < text.Length && !Tools.IsSplitChar(text[currentIndex]))
                {
                    currentIndex++;
                }
                string w = text.Substring(currentWordStart, currentIndex - currentWordStart);
                w = Tools.RemoveDiacriticsAndPunctuation(w, true);
                if (!string.IsNullOrEmpty(w))
                {
                    words.Add(new WordInfo(w, currentWordStart, (ushort)words.Count, location));
                }
                currentIndex     = SkipSplitChars((ushort)(currentIndex + 1), text);
                currentWordStart = currentIndex;
            }

            return(words.ToArray());
        }
Exemplo n.º 2
0
        /// <summary>
        ///     Prepares a query for searching.
        /// </summary>
        /// <param name="query">The query.</param>
        /// <returns>The prepared query.</returns>
        private static string PrepareQuery(string query)
        {
            var sb = new StringBuilder(query.Length);

            // This behavior is slightly different from RemoveDiacriticsAndPunctuation
            foreach (var c in query)
            {
                if (!Tools.IsSplitChar(c))
                {
                    sb.Append(c);
                }
                else
                {
                    sb.Append(" ");
                }
            }

            var normalized = Tools.RemoveDiacriticsAndPunctuation(sb.ToString(), false);

            return(normalized);
        }