Пример #1
0
        public static uint?NthCodepoint(AbstractString str, int codepointIndex, int relativeToCharacterIndex = 0)
        {
            foreach (var cp in str.Codepoints(relativeToCharacterIndex))
            {
                if (codepointIndex == 0)
                {
                    return(cp.Codepoint);
                }
                codepointIndex--;
            }

            return(null);
        }
Пример #2
0
        public static Pair <int> FindWordBoundary(AbstractString str, int?searchFromCodepointIndex = null, int?searchFromCharacterIndex = null)
        {
            int firstWhitespaceCharacter = -1,
                lastWhitespaceCharacter  = -1,
                firstWordCharacter       = -1,
                lastWordCharacter        = -1;

            if ((searchFromCharacterIndex == null) && (searchFromCodepointIndex == null))
            {
                throw new ArgumentException("Either a starting codepoint index or character index must be provided");
            }

            bool searchStartedInWhiteSpace = false, inWord = false;

            foreach (var cp in str.Codepoints())
            {
                bool transitioned = false;
                var  isWhiteSpace = IsWhiteSpace(cp.Codepoint);
                if (
                    (cp.CodepointIndex == searchFromCodepointIndex) ||
                    (cp.CharacterIndex == searchFromCharacterIndex)
                    )
                {
                    searchStartedInWhiteSpace = isWhiteSpace;
                }

                if (isWhiteSpace)
                {
                    if (inWord || firstWhitespaceCharacter < 0)
                    {
                        transitioned             = inWord;
                        inWord                   = false;
                        firstWhitespaceCharacter = cp.CharacterIndex;
                    }
                    lastWhitespaceCharacter = cp.CharacterIndex;
                }
                else
                {
                    if (!inWord || firstWordCharacter < 0)
                    {
                        transitioned       = !inWord;
                        inWord             = true;
                        firstWordCharacter = cp.CharacterIndex;
                    }
                    lastWordCharacter = cp.CharacterIndex;
                }

                if (transitioned &&
                    (
                        (searchFromCodepointIndex.HasValue && (cp.CodepointIndex > searchFromCodepointIndex)) ||
                        (searchFromCharacterIndex.HasValue && (cp.CharacterIndex > searchFromCharacterIndex))
                    )
                    )
                {
                    break;
                }
            }

            if (searchStartedInWhiteSpace)
            {
                return(new Pair <int>(firstWhitespaceCharacter, lastWhitespaceCharacter + 1));
            }
            else
            {
                if ((lastWordCharacter > 0) && char.IsHighSurrogate(str[lastWordCharacter]))
                {
                    lastWordCharacter++;
                }
                return(new Pair <int>(firstWordCharacter, lastWordCharacter + 1));
            }
        }