public Collection <string> Tokenize(string word) { Collection <string> collection = new Collection <string>(); if (word != null) { int length; for (int i = 0; i < word.Length; i = length) { char c = word[i]; if (char.IsWhiteSpace(c)) { i++; } length = word.Length; for (int j = 0; j < Delimiters.Length; j++) { int index = word.IndexOf(Delimiters[j], i); if (index < length && index != -1) { length = index; } } string termToTest = word.Substring(i, length - i); if (!StopWordHandler.IsWord(termToTest)) { collection.Add(termToTest); } } } return(collection); }
public Collection <string> Tokenize(string word, bool extended, int tokenLength, int characterCombinationIndexValue) { int num3; if (string.IsNullOrEmpty(word)) { return(null); } SuppliedWord = word; Collection <string> collection = new Collection <string>(); int length = word.Length; int count = 0; if (tokenLength > 0) { count = tokenLength - 1; } StringBuilder builder = new StringBuilder(length + 2 * count); if (extended) { builder.Insert(0, DefaultStartPadCharacter, count); } builder.Append(word); if (extended) { builder.Insert(builder.Length, DefaultEndPadCharacter, count); } string str = builder.ToString(); if (extended) { num3 = length + count; } else { num3 = length - tokenLength + 1; } for (int i = 0; i < num3; i++) { string termToTest = str.Substring(i, tokenLength); if (!StopWordHandler.IsWord(termToTest)) { collection.Add(termToTest); } } if (characterCombinationIndexValue != 0) { str = builder.ToString(); num3--; for (int j = 0; j < num3; j++) { string str3 = str.Substring(j, count) + str.Substring(j + tokenLength, 1); if (!StopWordHandler.IsWord(str3) && !collection.Contains(str3)) { collection.Add(str3); } } } return(collection); }