Ejemplo n.º 1
0
        /// <summary>
        /// Splits the string into an array of individual words
        /// </summary>
        /// <param name="value">The value to split into words</param>
        /// <returns>An array of extracted words</returns>
        public static string[] SplitIntoWords
        (
            this string value
        )
        {
            if (String.IsNullOrEmpty(value))
            {
                return(new string[] { });
            }

            // Trim and remove double spaces to avoid empty words
            value = value.Trim().Replace("  ", " ");

            var matchingWords = new List <string>();
            var wordBuilder   = new StringBuilder();
            var currentIndex  = 0;
            var previousChar  = '\0';

            foreach (char currentChar in value)
            {
                if (Char.IsLetterOrDigit(currentChar) || Char.IsSymbol(currentChar))
                {
                    wordBuilder.Append(currentChar);
                }
                else if (Char.IsWhiteSpace(currentChar) || Char.IsPunctuation(currentChar))
                {
                    if (wordBuilder.Length > 0)
                    {
                        var flushWord = true;

                        // Check ahead of the current position to see if the next character is a digit
                        // If the current digit is a number, the current punctuation is a full stop
                        // and the next digit is also a number, then treat it as part of the word
                        // (e.g. "1.50" would be treated as a whole word instead of "1" and "50")
                        if (Char.IsNumber(previousChar) && currentChar == '.' && (currentIndex + 1) < value.Length)
                        {
                            var nextChar = value[currentIndex + 1];

                            if (Char.IsNumber(nextChar))
                            {
                                wordBuilder.Append(currentChar);
                                flushWord = false;
                            }
                        }

                        // Flush the word into the matching words collection
                        if (flushWord)
                        {
                            matchingWords.Add(wordBuilder.ToString());
                            wordBuilder.Clear();
                        }
                    }
                }

                currentIndex++;
                previousChar = currentChar;
            }

            // Add anything not yet flushed to the words list
            if (wordBuilder.Length > 0)
            {
                matchingWords.Add(wordBuilder.ToString());
            }

            return(matchingWords.ToArray());
        }