/// <summary> /// Splits the string into an array of individual words /// </summary> /// <param name="value">The value to split into words</param> /// <returns>An array of extracted words</returns> public static string[] SplitIntoWords ( this string value ) { if (String.IsNullOrEmpty(value)) { return(new string[] { }); } // Trim and remove double spaces to avoid empty words value = value.Trim().Replace(" ", " "); var matchingWords = new List <string>(); var wordBuilder = new StringBuilder(); var currentIndex = 0; var previousChar = '\0'; foreach (char currentChar in value) { if (Char.IsLetterOrDigit(currentChar) || Char.IsSymbol(currentChar)) { wordBuilder.Append(currentChar); } else if (Char.IsWhiteSpace(currentChar) || Char.IsPunctuation(currentChar)) { if (wordBuilder.Length > 0) { var flushWord = true; // Check ahead of the current position to see if the next character is a digit // If the current digit is a number, the current punctuation is a full stop // and the next digit is also a number, then treat it as part of the word // (e.g. "1.50" would be treated as a whole word instead of "1" and "50") if (Char.IsNumber(previousChar) && currentChar == '.' && (currentIndex + 1) < value.Length) { var nextChar = value[currentIndex + 1]; if (Char.IsNumber(nextChar)) { wordBuilder.Append(currentChar); flushWord = false; } } // Flush the word into the matching words collection if (flushWord) { matchingWords.Add(wordBuilder.ToString()); wordBuilder.Clear(); } } } currentIndex++; previousChar = currentChar; } // Add anything not yet flushed to the words list if (wordBuilder.Length > 0) { matchingWords.Add(wordBuilder.ToString()); } return(matchingWords.ToArray()); }