public static uint?NthCodepoint(AbstractString str, int codepointIndex, int relativeToCharacterIndex = 0) { foreach (var cp in str.Codepoints(relativeToCharacterIndex)) { if (codepointIndex == 0) { return(cp.Codepoint); } codepointIndex--; } return(null); }
public static Pair <int> FindWordBoundary(AbstractString str, int?searchFromCodepointIndex = null, int?searchFromCharacterIndex = null) { int firstWhitespaceCharacter = -1, lastWhitespaceCharacter = -1, firstWordCharacter = -1, lastWordCharacter = -1; if ((searchFromCharacterIndex == null) && (searchFromCodepointIndex == null)) { throw new ArgumentException("Either a starting codepoint index or character index must be provided"); } bool searchStartedInWhiteSpace = false, inWord = false; foreach (var cp in str.Codepoints()) { bool transitioned = false; var isWhiteSpace = IsWhiteSpace(cp.Codepoint); if ( (cp.CodepointIndex == searchFromCodepointIndex) || (cp.CharacterIndex == searchFromCharacterIndex) ) { searchStartedInWhiteSpace = isWhiteSpace; } if (isWhiteSpace) { if (inWord || firstWhitespaceCharacter < 0) { transitioned = inWord; inWord = false; firstWhitespaceCharacter = cp.CharacterIndex; } lastWhitespaceCharacter = cp.CharacterIndex; } else { if (!inWord || firstWordCharacter < 0) { transitioned = !inWord; inWord = true; firstWordCharacter = cp.CharacterIndex; } lastWordCharacter = cp.CharacterIndex; } if (transitioned && ( (searchFromCodepointIndex.HasValue && (cp.CodepointIndex > searchFromCodepointIndex)) || (searchFromCharacterIndex.HasValue && (cp.CharacterIndex > searchFromCharacterIndex)) ) ) { break; } } if (searchStartedInWhiteSpace) { return(new Pair <int>(firstWhitespaceCharacter, lastWhitespaceCharacter + 1)); } else { if ((lastWordCharacter > 0) && char.IsHighSurrogate(str[lastWordCharacter])) { lastWordCharacter++; } return(new Pair <int>(firstWordCharacter, lastWordCharacter + 1)); } }