/// <summary> /// Symmetrical with <see cref="Span(ICharSequence, int, SpanCondition, out int)"/>. /// Span the trailing substring for which each character c has <paramref name="spanCondition"/>==Contains(c). /// It must be <paramref name="s"/>.Length >= limit and <paramref name="spanCondition"/>==0 or 1. /// </summary> /// <returns>The string index which starts the span (i.e. inclusive).</returns> public int SpanBack(ICharSequence s, int limit, SpanCondition spanCondition) { char c, c2; if (SpanCondition.NotContained != spanCondition) { // span for (; ;) { c = s[--limit]; if (c <= 0xff) { if (!latin1Contains[c]) { break; } } else if (c <= 0x7ff) { if ((table7FF[c & 0x3f] & (1 << (c >> 6))) == 0) { break; } } else if (c < 0xd800 || c < 0xdc00 || 0 == limit || (c2 = s[limit - 1]) < 0xd800 || c2 >= 0xdc00) { int lead = c >> 12; int twoBits = (bmpBlockBits[(c >> 6) & 0x3f] >> lead) & 0x10001; if (twoBits <= 1) { // All 64 code points with the same bits 15..6 // are either in the set or not. if (twoBits == 0) { break; } } else { // Look up the code point in its 4k block of code points. if (!ContainsSlow(c, list4kStarts[lead], list4kStarts[lead + 1])) { break; } } } else { // surrogate pair int supplementary = Character.ToCodePoint(c2, c); if (!ContainsSlow(supplementary, list4kStarts[0x10], list4kStarts[0x11])) { break; } --limit; } if (0 == limit) { return(0); } } } else { // span not for (; ;) { c = s[--limit]; if (c <= 0xff) { if (latin1Contains[c]) { break; } } else if (c <= 0x7ff) { if ((table7FF[c & 0x3f] & (1 << (c >> 6))) != 0) { break; } } else if (c < 0xd800 || c < 0xdc00 || 0 == limit || (c2 = s[limit - 1]) < 0xd800 || c2 >= 0xdc00) { int lead = c >> 12; int twoBits = (bmpBlockBits[(c >> 6) & 0x3f] >> lead) & 0x10001; if (twoBits <= 1) { // All 64 code points with the same bits 15..6 // are either in the set or not. if (twoBits != 0) { break; } } else { // Look up the code point in its 4k block of code points. if (ContainsSlow(c, list4kStarts[lead], list4kStarts[lead + 1])) { break; } } } else { // surrogate pair int supplementary = Character.ToCodePoint(c2, c); if (ContainsSlow(supplementary, list4kStarts[0x10], list4kStarts[0x11])) { break; } --limit; } if (0 == limit) { return(0); } } } return(limit + 1); }
/// <summary> /// Span the initial substring for which each character c has <paramref name="spanCondition"/>==Contains(c). /// It must be <paramref name="spanCondition"/>==0 or 1. /// </summary> /// <param name="s"></param> /// <param name="start">The start index.</param> /// <param name="spanCondition"></param> /// <param name="outCount">If not null: Receives the number of code points in the span.</param> /// <returns>The limit (exclusive end) of the span.</returns> /// <remarks> /// NOTE: to reduce the overhead of function call to Contains(c), it is manually inlined here. Check for /// sufficient length for trail unit for each surrogate pair. Handle single surrogates as surrogate code points /// as usual in ICU. /// </remarks> public int Span(ICharSequence s, int start, SpanCondition spanCondition, out int outCount) { char c, c2; int i = start; int limit = s.Length; int numSupplementary = 0; if (SpanCondition.NotContained != spanCondition) { // span while (i < limit) { c = s[i]; if (c <= 0xff) { if (!latin1Contains[c]) { break; } } else if (c <= 0x7ff) { if ((table7FF[c & 0x3f] & (1 << (c >> 6))) == 0) { break; } } else if (c < 0xd800 || c >= 0xdc00 || (i + 1) == limit || (c2 = s[i + 1]) < 0xdc00 || c2 >= 0xe000) { int lead = c >> 12; int twoBits = (bmpBlockBits[(c >> 6) & 0x3f] >> lead) & 0x10001; if (twoBits <= 1) { // All 64 code points with the same bits 15..6 // are either in the set or not. if (twoBits == 0) { break; } } else { // Look up the code point in its 4k block of code points. if (!ContainsSlow(c, list4kStarts[lead], list4kStarts[lead + 1])) { break; } } } else { // surrogate pair int supplementary = Character.ToCodePoint(c, c2); if (!ContainsSlow(supplementary, list4kStarts[0x10], list4kStarts[0x11])) { break; } ++numSupplementary; ++i; } ++i; } } else { // span not while (i < limit) { c = s[i]; if (c <= 0xff) { if (latin1Contains[c]) { break; } } else if (c <= 0x7ff) { if ((table7FF[c & 0x3f] & (1 << (c >> 6))) != 0) { break; } } else if (c < 0xd800 || c >= 0xdc00 || (i + 1) == limit || (c2 = s[i + 1]) < 0xdc00 || c2 >= 0xe000) { int lead = c >> 12; int twoBits = (bmpBlockBits[(c >> 6) & 0x3f] >> lead) & 0x10001; if (twoBits <= 1) { // All 64 code points with the same bits 15..6 // are either in the set or not. if (twoBits != 0) { break; } } else { // Look up the code point in its 4k block of code points. if (ContainsSlow(c, list4kStarts[lead], list4kStarts[lead + 1])) { break; } } } else { // surrogate pair int supplementary = Character.ToCodePoint(c, c2); if (ContainsSlow(supplementary, list4kStarts[0x10], list4kStarts[0x11])) { break; } ++numSupplementary; ++i; } ++i; } } int spanLength = i - start; outCount = spanLength - numSupplementary; // number of code points return(i); }