private void CheckWordAndPunct(WordAndPunct wordAndPunct, string word, string punct, int offset) { Assert.AreEqual(word, wordAndPunct.Word, "The word is not correct"); Assert.AreEqual(punct, wordAndPunct.Punct, "The punctuation is not correct"); Assert.AreEqual(offset, wordAndPunct.Offset, "The offset is not correct"); }
//public string PunctuationCharacters { get { return punctuationCharacters; } } public virtual List <WordAndPunct> WordAndPuncts(string text) { char cc; int punctOffset; List <WordAndPunct> waps = new List <WordAndPunct>(); for (int i = 0; i < text.Length;) { WordAndPunct wap = new WordAndPunct(); // Ignore any initial separator characters while (i < text.Length && char.IsSeparator(text[i])) { i++; } wap.Offset = i; bool isFirstCharacterInWord = true; while (i < text.Length) { cc = text[i]; if (IsSingleCharacterWord(cc)) { if (isFirstCharacterInWord) { // Single Character key is the first character in the key. // It forms a key all by itself. i = i + 1; } else { // Single Character key is NOT the first character in the key. // It ends the key currently being formed. // 'i' is not incremented } break; } else if (IsWordMedialPunctuation(cc)) { // Word medial punctuation only counts if this is not the first // character in a key AND the next character is key forming. //! can we have multiple key medial punctuation? if (isFirstCharacterInWord) { break; } if (i + 1 >= text.Length || !IsWordFormingCharacter(text[i + 1])) { break; } } else if (char.IsDigit(cc)) { // allow digits in words } else if (!IsWordFormingCharacter(cc)) { break; } i = i + 1; isFirstCharacterInWord = false; } wap.Word = text.Substring(wap.Offset, i - wap.Offset); punctOffset = i; while (i < text.Length) { cc = text[i]; if (IsWordFormingCharacter(cc) || char.IsDigit(cc)) { break; } i = i + 1; } wap.Punct = text.Substring(punctOffset, i - punctOffset); waps.Add(wap); } return(waps); }