public static IsSplitChar ( char current ) : bool | ||
current | char | The current char. |
리턴 | bool |
/// <summary> /// Tokenizes a string. /// </summary> /// <param name="text">The text to tokenize.</param> /// <param name="location">The location of the words that are extracted.</param> /// <returns>The tokens.</returns> /// <exception cref="ArgumentNullException">If <paramref name="text"/> is <c>null</c>.</exception> public static WordInfo[] Tokenize(string text, WordLocation location) { if (text == null) { throw new ArgumentNullException("text"); } List <WordInfo> words = new List <WordInfo>(text.Length / 5); // Average 5 chars/word ushort currentIndex = 0, currentWordStart; // Skip all trailing splitChars currentIndex = SkipSplitChars(0, text); currentWordStart = currentIndex; while (currentIndex < text.Length && currentIndex < 65500) { while (currentIndex < text.Length && !Tools.IsSplitChar(text[currentIndex])) { currentIndex++; } string w = text.Substring(currentWordStart, currentIndex - currentWordStart); w = Tools.RemoveDiacriticsAndPunctuation(w, true); if (!string.IsNullOrEmpty(w)) { words.Add(new WordInfo(w, currentWordStart, (ushort)words.Count, location)); } currentIndex = SkipSplitChars((ushort)(currentIndex + 1), text); currentWordStart = currentIndex; } return(words.ToArray()); }
/// <summary> /// Prepares a query for searching. /// </summary> /// <param name="query">The query.</param> /// <returns>The prepared query.</returns> private static string PrepareQuery(string query) { var sb = new StringBuilder(query.Length); // This behavior is slightly different from RemoveDiacriticsAndPunctuation foreach (var c in query) { if (!Tools.IsSplitChar(c)) { sb.Append(c); } else { sb.Append(" "); } } var normalized = Tools.RemoveDiacriticsAndPunctuation(sb.ToString(), false); return(normalized); }