示例#1
0
        /// <summary>
        /// Given an ICU normalizer, enumerate the limit indices of the "segments" of this string.
        /// A "segment" is defined as a group of characters that interact with each other in this
        /// normalization, and which therefore can't be split apart and normalized separately without
        /// changing the result of the normalization. For example, under NFC, if LATIN SMALL LETTER C (U+0063)
        /// is followed by COMBINING CEDILLA (U+0327) which is followed by LATIN SMALL LETTER D (U+0064),
        /// then the c and cedilla will form one "segment": splitting them apart and normalizing them
        /// separately would produce a different result than normalizing them together. So this function
        /// would yield (among other values) the index of LATIN SMALL LETTER D, the first index that is
        /// not part of the segment (that is, the limit index).
        ///
        /// The last index yielded by this function will be equal to the length of the string, and it
        /// will never yield the index 0. (If the string is empty, it will return an empty enumerable).
        /// Therefore, it is always safe to do GetChars(previousIndex, thisIndex) in a foreach loop to get
        /// the "current" segment (assuming previousIndex is set to 0 the first time through the loop).
        /// </summary>
        /// <param name="icuNormalizer">IntPtr to the ICU normalizer to use (get this from Icu.GetIcuNormalizer)</param>
        /// <returns>An enumerable of indexes into "this" TsString, at all the normalization "segment" boundaries, suitable for passing into GetChars(prevIdx, thisIdx)</returns>
        private IEnumerable <int> EnumerateSegmentLimits(IntPtr icuNormalizer)
        {
            if (String.IsNullOrEmpty(Text))
            {
                yield break;
            }
            int i = 0;

            while (i < Text.Length)
            {
                int codepoint = Char.ConvertToUtf32(Text, i);
                if (Icu.HasNormalizationBoundaryBefore(icuNormalizer, codepoint) && i > 0)
                {
                    yield return(i);
                }
                i += codepoint > 0xffff ? 2 : 1;
            }
            yield return(Text.Length);
        }