/// <summary> /// Computes the options value for the compare functions /// and writes the precomputed primary weights. /// Returns -1 if the Latin fastpath is not supported for the data and settings. /// The capacity must be <see cref="LatinLimit"/>. /// </summary> public static int GetOptions(CollationData data, CollationSettings settings, char[] primaries) { char[] header = data.fastLatinTableHeader; if (header == null) { return(-1); } Debug.Assert((header[0] >> 8) == Version); if (primaries.Length != LatinLimit) { Debug.Assert(false); return(-1); } int miniVarTop; if ((settings.Options & CollationSettings.AlternateMask) == 0) { // No mini primaries are variable, set a variableTop just below the // lowest long mini primary. miniVarTop = MIN_LONG - 1; } else { int headerLength = header[0] & 0xff; int i = 1 + settings.MaxVariable; if (i >= headerLength) { return(-1); // variableTop >= digits, should not occur } miniVarTop = header[i]; } bool digitsAreReordered = false; if (settings.HasReordering) { long prevStart = 0; long beforeDigitStart = 0; long digitStart = 0; long afterDigitStart = 0; for (int group = ReorderCodes.First; group < ReorderCodes.First + CollationData.MAX_NUM_SPECIAL_REORDER_CODES; ++group) { long start = data.GetFirstPrimaryForGroup(group); start = settings.Reorder(start); if (group == ReorderCodes.Digit) { beforeDigitStart = prevStart; digitStart = start; } else if (start != 0) { if (start < prevStart) { // The permutation affects the groups up to Latin. return(-1); } // In the future, there might be a special group between digits & Latin. if (digitStart != 0 && afterDigitStart == 0 && prevStart == beforeDigitStart) { afterDigitStart = start; } prevStart = start; } } long latinStart = data.GetFirstPrimaryForGroup(UScript.Latin); latinStart = settings.Reorder(latinStart); if (latinStart < prevStart) { return(-1); } if (afterDigitStart == 0) { afterDigitStart = latinStart; } if (!(beforeDigitStart < digitStart && digitStart < afterDigitStart)) { digitsAreReordered = true; } } char[] table = data.FastLatinTable; // skip the header for (int c = 0; c < LatinLimit; ++c) { int p = table[c]; if (p >= MIN_SHORT) { p &= SHORT_PRIMARY_MASK; } else if (p > miniVarTop) { p &= LONG_PRIMARY_MASK; } else { p = 0; } primaries[c] = (char)p; } if (digitsAreReordered || (settings.Options & CollationSettings.Numeric) != 0) { // Bail out for digits. for (int c = 0x30; c <= 0x39; ++c) { primaries[c] = (char)0; } } // Shift the miniVarTop above other options. return((miniVarTop << 16) | settings.Options); }
/** * Writes the sort key bytes for minLevel up to the iterator data's strength. Optionally writes * the case level. Stops writing levels when callback.needToWrite(level) returns false. * Separates levels with the LEVEL_SEPARATOR_BYTE but does not write a TERMINATOR_BYTE. */ public static void WriteSortKeyUpToQuaternary(CollationIterator iter, bool[] compressibleBytes, CollationSettings settings, SortKeyByteSink sink, int minLevel, LevelCallback callback, bool preflight) { int options = settings.Options; // Set of levels to process and write. int levels = levelMasks[(int)CollationSettings.GetStrength(options)]; if ((options & CollationSettings.CASE_LEVEL) != 0) { levels |= Collation.CASE_LEVEL_FLAG; } // Minus the levels below minLevel. levels &= ~((1 << minLevel) - 1); if (levels == 0) { return; } long variableTop; if ((options & CollationSettings.ALTERNATE_MASK) == 0) { variableTop = 0; } else { // +1 so that we can use "<" and primary ignorables test out early. variableTop = settings.VariableTop + 1; } int tertiaryMask = CollationSettings.GetTertiaryMask(options); byte[] p234 = new byte[3]; SortKeyLevel cases = GetSortKeyLevel(levels, Collation.CASE_LEVEL_FLAG); SortKeyLevel secondaries = GetSortKeyLevel(levels, Collation.SECONDARY_LEVEL_FLAG); SortKeyLevel tertiaries = GetSortKeyLevel(levels, Collation.TERTIARY_LEVEL_FLAG); SortKeyLevel quaternaries = GetSortKeyLevel(levels, Collation.QUATERNARY_LEVEL_FLAG); long prevReorderedPrimary = 0; // 0==no compression int commonCases = 0; int commonSecondaries = 0; int commonTertiaries = 0; int commonQuaternaries = 0; int prevSecondary = 0; int secSegmentStart = 0; for (; ;) { // No need to keep all CEs in the buffer when we write a sort key. iter.ClearCEsIfNoneRemaining(); long ce = iter.NextCE(); long p = ce.TripleShift(32); if (p < variableTop && p > Collation.MERGE_SEPARATOR_PRIMARY) { // Variable CE, shift it to quaternary level. // Ignore all following primary ignorables, and shift further variable CEs. if (commonQuaternaries != 0) { --commonQuaternaries; while (commonQuaternaries >= QUAT_COMMON_MAX_COUNT) { quaternaries.AppendByte(QUAT_COMMON_MIDDLE); commonQuaternaries -= QUAT_COMMON_MAX_COUNT; } // Shifted primary weights are lower than the common weight. quaternaries.AppendByte(QUAT_COMMON_LOW + commonQuaternaries); commonQuaternaries = 0; } do { if ((levels & Collation.QUATERNARY_LEVEL_FLAG) != 0) { if (settings.HasReordering) { p = settings.Reorder(p); } if (((int)p.TripleShift(24)) >= QUAT_SHIFTED_LIMIT_BYTE) { // Prevent shifted primary lead bytes from // overlapping with the common compression range. quaternaries.AppendByte(QUAT_SHIFTED_LIMIT_BYTE); } quaternaries.AppendWeight32(p); } do { ce = iter.NextCE(); p = ce.TripleShift(32); } while (p == 0); } while (p < variableTop && p > Collation.MERGE_SEPARATOR_PRIMARY); } // ce could be primary ignorable, or NO_CE, or the merge separator, // or a regular primary CE, but it is not variable. // If ce==NO_CE, then write nothing for the primary level but // terminate compression on all levels and then exit the loop. if (p > Collation.NO_CE_PRIMARY && (levels & Collation.PRIMARY_LEVEL_FLAG) != 0) { // Test the un-reordered primary for compressibility. bool isCompressible = compressibleBytes[(int)p.TripleShift(24)]; if (settings.HasReordering) { p = settings.Reorder(p); } int p1 = (int)p.TripleShift(24); if (!isCompressible || p1 != ((int)prevReorderedPrimary.TripleShift(24))) { if (prevReorderedPrimary != 0) { if (p < prevReorderedPrimary) { // No primary compression terminator // at the end of the level or merged segment. if (p1 > Collation.MERGE_SEPARATOR_BYTE) { sink.Append(Collation.PRIMARY_COMPRESSION_LOW_BYTE); } } else { sink.Append(Collation.PRIMARY_COMPRESSION_HIGH_BYTE); } } sink.Append(p1); if (isCompressible) { prevReorderedPrimary = p; } else { prevReorderedPrimary = 0; } } byte p2 = (byte)(p.TripleShift(16)); if (p2 != 0) { p234[0] = p2; p234[1] = (byte)(p.TripleShift(8)); p234[2] = (byte)p; sink.Append(p234, (p234[1] == 0) ? 1 : (p234[2] == 0) ? 2 : 3); } // Optimization for internalNextSortKeyPart(): // When the primary level overflows we can stop because we need not // calculate (preflight) the whole sort key length. if (!preflight && sink.Overflowed) { // not used in Java -- if (!sink.IsOk()) { // Java porting note: U_MEMORY_ALLOCATION_ERROR is set here in // C implementation. IsOk() in Java always returns true, so this // is a dead code. return; } } int lower32 = (int)ce; if (lower32 == 0) { continue; } // completely ignorable, no secondary/case/tertiary/quaternary if ((levels & Collation.SECONDARY_LEVEL_FLAG) != 0) { int s = lower32.TripleShift(16); // 16 bits if (s == 0) { // secondary ignorable } else if (s == Collation.COMMON_WEIGHT16 && ((options & CollationSettings.BACKWARD_SECONDARY) == 0 || p != Collation.MERGE_SEPARATOR_PRIMARY)) { // s is a common secondary weight, and // backwards-secondary is off or the ce is not the merge separator. ++commonSecondaries; } else if ((options & CollationSettings.BACKWARD_SECONDARY) == 0) { if (commonSecondaries != 0) { --commonSecondaries; while (commonSecondaries >= SEC_COMMON_MAX_COUNT) { secondaries.AppendByte(SEC_COMMON_MIDDLE); commonSecondaries -= SEC_COMMON_MAX_COUNT; } int b; if (s < Collation.COMMON_WEIGHT16) { b = SEC_COMMON_LOW + commonSecondaries; } else { b = SEC_COMMON_HIGH - commonSecondaries; } secondaries.AppendByte(b); commonSecondaries = 0; } secondaries.AppendWeight16(s); } else { if (commonSecondaries != 0) { --commonSecondaries; // Append reverse weights. The level will be re-reversed later. int remainder = commonSecondaries % SEC_COMMON_MAX_COUNT; int b; if (prevSecondary < Collation.COMMON_WEIGHT16) { b = SEC_COMMON_LOW + remainder; } else { b = SEC_COMMON_HIGH - remainder; } secondaries.AppendByte(b); commonSecondaries -= remainder; // commonSecondaries is now a multiple of SEC_COMMON_MAX_COUNT. while (commonSecondaries > 0) { // same as >= SEC_COMMON_MAX_COUNT secondaries.AppendByte(SEC_COMMON_MIDDLE); commonSecondaries -= SEC_COMMON_MAX_COUNT; } // commonSecondaries == 0 } if (0 < p && p <= Collation.MERGE_SEPARATOR_PRIMARY) { // The backwards secondary level compares secondary weights backwards // within segments separated by the merge separator (U+FFFE). byte[] secs = secondaries.Data(); int last = secondaries.Length - 1; while (secSegmentStart < last) { byte b = secs[secSegmentStart]; secs[secSegmentStart++] = secs[last]; secs[last--] = b; } secondaries.AppendByte(p == Collation.NO_CE_PRIMARY ? Collation.LEVEL_SEPARATOR_BYTE : Collation.MERGE_SEPARATOR_BYTE); prevSecondary = 0; secSegmentStart = secondaries.Length; } else { secondaries.AppendReverseWeight16(s); prevSecondary = s; } } } if ((levels & Collation.CASE_LEVEL_FLAG) != 0) { if ((CollationSettings.GetStrength(options) == (int)CollationStrength.Primary) ? p == 0 : (lower32.TripleShift(16)) == 0) { // Primary+caseLevel: Ignore case level weights of primary ignorables. // Otherwise: Ignore case level weights of secondary ignorables. // For details see the comments in the CollationCompare class. } else { int c = (lower32.TripleShift(8)) & 0xff; // case bits & tertiary lead byte Debug.Assert((c & 0xc0) != 0xc0); if ((c & 0xc0) == 0 && c > Collation.LEVEL_SEPARATOR_BYTE) { ++commonCases; } else { if ((options & CollationSettings.UPPER_FIRST) == 0) { // lowerFirst: Compress common weights to nibbles 1..7..13, mixed=14, // upper=15. // If there are only common (=lowest) weights in the whole level, // then we need not write anything. // Level length differences are handled already on the next-higher level. if (commonCases != 0 && (c > Collation.LEVEL_SEPARATOR_BYTE || !cases.IsEmpty)) { --commonCases; while (commonCases >= CASE_LOWER_FIRST_COMMON_MAX_COUNT) { cases.AppendByte(CASE_LOWER_FIRST_COMMON_MIDDLE << 4); commonCases -= CASE_LOWER_FIRST_COMMON_MAX_COUNT; } int b; if (c <= Collation.LEVEL_SEPARATOR_BYTE) { b = CASE_LOWER_FIRST_COMMON_LOW + commonCases; } else { b = CASE_LOWER_FIRST_COMMON_HIGH - commonCases; } cases.AppendByte(b << 4); commonCases = 0; } if (c > Collation.LEVEL_SEPARATOR_BYTE) { c = (CASE_LOWER_FIRST_COMMON_HIGH + (c.TripleShift(6))) << 4; // 14 or 15 } } else { // upperFirst: Compress common weights to nibbles 3..15, mixed=2, // upper=1. // The compressed common case weights only go up from the "low" value // because with upperFirst the common weight is the highest one. if (commonCases != 0) { --commonCases; while (commonCases >= CASE_UPPER_FIRST_COMMON_MAX_COUNT) { cases.AppendByte(CASE_UPPER_FIRST_COMMON_LOW << 4); commonCases -= CASE_UPPER_FIRST_COMMON_MAX_COUNT; } cases.AppendByte((CASE_UPPER_FIRST_COMMON_LOW + commonCases) << 4); commonCases = 0; } if (c > Collation.LEVEL_SEPARATOR_BYTE) { c = (CASE_UPPER_FIRST_COMMON_LOW - (c.TripleShift(6))) << 4; // 2 or 1 } } // c is a separator byte 01, // or a left-shifted nibble 0x10, 0x20, ... 0xf0. cases.AppendByte(c); } } } if ((levels & Collation.TERTIARY_LEVEL_FLAG) != 0) { int t = lower32 & tertiaryMask; Debug.Assert((lower32 & 0xc000) != 0xc000); if (t == Collation.COMMON_WEIGHT16) { ++commonTertiaries; } else if ((tertiaryMask & 0x8000) == 0) { // Tertiary weights without case bits. // Move lead bytes 06..3F to C6..FF for a large common-weight range. if (commonTertiaries != 0) { --commonTertiaries; while (commonTertiaries >= TER_ONLY_COMMON_MAX_COUNT) { tertiaries.AppendByte(TER_ONLY_COMMON_MIDDLE); commonTertiaries -= TER_ONLY_COMMON_MAX_COUNT; } int b; if (t < Collation.COMMON_WEIGHT16) { b = TER_ONLY_COMMON_LOW + commonTertiaries; } else { b = TER_ONLY_COMMON_HIGH - commonTertiaries; } tertiaries.AppendByte(b); commonTertiaries = 0; } if (t > Collation.COMMON_WEIGHT16) { t += 0xc000; } tertiaries.AppendWeight16(t); } else if ((options & CollationSettings.UPPER_FIRST) == 0) { // Tertiary weights with caseFirst=lowerFirst. // Move lead bytes 06..BF to 46..FF for the common-weight range. if (commonTertiaries != 0) { --commonTertiaries; while (commonTertiaries >= TER_LOWER_FIRST_COMMON_MAX_COUNT) { tertiaries.AppendByte(TER_LOWER_FIRST_COMMON_MIDDLE); commonTertiaries -= TER_LOWER_FIRST_COMMON_MAX_COUNT; } int b; if (t < Collation.COMMON_WEIGHT16) { b = TER_LOWER_FIRST_COMMON_LOW + commonTertiaries; } else { b = TER_LOWER_FIRST_COMMON_HIGH - commonTertiaries; } tertiaries.AppendByte(b); commonTertiaries = 0; } if (t > Collation.COMMON_WEIGHT16) { t += 0x4000; } tertiaries.AppendWeight16(t); } else { // Tertiary weights with caseFirst=upperFirst. // Do not change the artificial uppercase weight of a tertiary CE (0.0.ut), // to keep tertiary CEs well-formed. // Their case+tertiary weights must be greater than those of // primary and secondary CEs. // // Separator 01 -> 01 (unchanged) // Lowercase 02..04 -> 82..84 (includes uncased) // Common weight 05 -> 85..C5 (common-weight compression range) // Lowercase 06..3F -> C6..FF // Mixed case 42..7F -> 42..7F // Uppercase 82..BF -> 02..3F // Tertiary CE 86..BF -> C6..FF if (t <= Collation.NO_CE_WEIGHT16) { // Keep separators unchanged. } else if ((lower32.TripleShift(16)) != 0) { // Invert case bits of primary & secondary CEs. t ^= 0xc000; if (t < (TER_UPPER_FIRST_COMMON_HIGH << 8)) { t -= 0x4000; } } else { // Keep uppercase bits of tertiary CEs. Debug.Assert(0x8600 <= t && t <= 0xbfff); t += 0x4000; } if (commonTertiaries != 0) { --commonTertiaries; while (commonTertiaries >= TER_UPPER_FIRST_COMMON_MAX_COUNT) { tertiaries.AppendByte(TER_UPPER_FIRST_COMMON_MIDDLE); commonTertiaries -= TER_UPPER_FIRST_COMMON_MAX_COUNT; } int b; if (t < (TER_UPPER_FIRST_COMMON_LOW << 8)) { b = TER_UPPER_FIRST_COMMON_LOW + commonTertiaries; } else { b = TER_UPPER_FIRST_COMMON_HIGH - commonTertiaries; } tertiaries.AppendByte(b); commonTertiaries = 0; } tertiaries.AppendWeight16(t); } } if ((levels & Collation.QUATERNARY_LEVEL_FLAG) != 0) { int q = lower32 & 0xffff; if ((q & 0xc0) == 0 && q > Collation.NO_CE_WEIGHT16) { ++commonQuaternaries; } else if (q == Collation.NO_CE_WEIGHT16 && (options & CollationSettings.ALTERNATE_MASK) == 0 && quaternaries.IsEmpty) { // If alternate=non-ignorable and there are only common quaternary weights, // then we need not write anything. // The only weights greater than the merge separator and less than the common // weight // are shifted primary weights, which are not generated for // alternate=non-ignorable. // There are also exactly as many quaternary weights as tertiary weights, // so level length differences are handled already on tertiary level. // Any above-common quaternary weight will compare greater regardless. quaternaries.AppendByte(Collation.LEVEL_SEPARATOR_BYTE); } else { if (q == Collation.NO_CE_WEIGHT16) { q = Collation.LEVEL_SEPARATOR_BYTE; } else { q = 0xfc + ((q.TripleShift(6)) & 3); } if (commonQuaternaries != 0) { --commonQuaternaries; while (commonQuaternaries >= QUAT_COMMON_MAX_COUNT) { quaternaries.AppendByte(QUAT_COMMON_MIDDLE); commonQuaternaries -= QUAT_COMMON_MAX_COUNT; } int b; if (q < QUAT_COMMON_LOW) { b = QUAT_COMMON_LOW + commonQuaternaries; } else { b = QUAT_COMMON_HIGH - commonQuaternaries; } quaternaries.AppendByte(b); commonQuaternaries = 0; } quaternaries.AppendByte(q); } } if ((lower32.TripleShift(24)) == Collation.LEVEL_SEPARATOR_BYTE) { break; } // ce == NO_CE } // Append the beyond-primary levels. // not used in Java -- boolean ok = true; if ((levels & Collation.SECONDARY_LEVEL_FLAG) != 0) { if (!callback.NeedToWrite(Collation.SECONDARY_LEVEL)) { return; } // not used in Java -- ok &= secondaries.isOk(); sink.Append(Collation.LEVEL_SEPARATOR_BYTE); secondaries.AppendTo(sink); } if ((levels & Collation.CASE_LEVEL_FLAG) != 0) { if (!callback.NeedToWrite(Collation.CASE_LEVEL)) { return; } // not used in Java -- ok &= cases.isOk(); sink.Append(Collation.LEVEL_SEPARATOR_BYTE); // Write pairs of nibbles as bytes, except separator bytes as themselves. int length = cases.Length - 1; // Ignore the trailing NO_CE. byte b = 0; for (int i = 0; i < length; ++i) { byte c = cases.GetAt(i); Debug.Assert((c & 0xf) == 0 && c != 0); if (b == 0) { b = c; } else { sink.Append(b | ((c >> 4) & 0xf)); b = 0; } } if (b != 0) { sink.Append(b); } } if ((levels & Collation.TERTIARY_LEVEL_FLAG) != 0) { if (!callback.NeedToWrite(Collation.TERTIARY_LEVEL)) { return; } // not used in Java -- ok &= tertiaries.isOk(); sink.Append(Collation.LEVEL_SEPARATOR_BYTE); tertiaries.AppendTo(sink); } if ((levels & Collation.QUATERNARY_LEVEL_FLAG) != 0) { if (!callback.NeedToWrite(Collation.QUATERNARY_LEVEL)) { return; } // not used in Java -- ok &= quaternaries.isOk(); sink.Append(Collation.LEVEL_SEPARATOR_BYTE); quaternaries.AppendTo(sink); } // not used in Java -- if (!ok || !sink.IsOk()) { // Java porting note: U_MEMORY_ALLOCATION_ERROR is set here in // C implementation. IsOk() in Java always returns true, so this // is a dead code. }
public static int CompareUpToQuaternary(CollationIterator left, CollationIterator right, CollationSettings settings) { int options = settings.Options; long variableTop; if ((options & CollationSettings.ALTERNATE_MASK) == 0) { variableTop = 0; } else { // +1 so that we can use "<" and primary ignorables test out early. variableTop = settings.VariableTop + 1; } bool anyVariable = false; // Fetch CEs, compare primaries, store secondary & tertiary weights. for (; ;) { // We fetch CEs until we get a non-ignorable primary or reach the end. long leftPrimary; do { long ce = left.NextCE(); leftPrimary = ce.TripleShift(32); if (leftPrimary < variableTop && leftPrimary > Collation.MERGE_SEPARATOR_PRIMARY) { // Variable CE, shift it to quaternary level. // Ignore all following primary ignorables, and shift further variable CEs. anyVariable = true; do { // Store only the primary of the variable CE. left.SetCurrentCE(ce & unchecked ((long)0xffffffff00000000L)); for (; ;) { ce = left.NextCE(); leftPrimary = ce.TripleShift(32); if (leftPrimary == 0) { left.SetCurrentCE(0); } else { break; } } } while (leftPrimary < variableTop && leftPrimary > Collation.MERGE_SEPARATOR_PRIMARY); } } while (leftPrimary == 0); long rightPrimary; do { long ce = right.NextCE(); rightPrimary = ce.TripleShift(32); if (rightPrimary < variableTop && rightPrimary > Collation.MERGE_SEPARATOR_PRIMARY) { // Variable CE, shift it to quaternary level. // Ignore all following primary ignorables, and shift further variable CEs. anyVariable = true; do { // Store only the primary of the variable CE. right.SetCurrentCE(ce & unchecked ((long)0xffffffff00000000L)); for (; ;) { ce = right.NextCE(); rightPrimary = ce.TripleShift(32); if (rightPrimary == 0) { right.SetCurrentCE(0); } else { break; } } } while (rightPrimary < variableTop && rightPrimary > Collation.MERGE_SEPARATOR_PRIMARY); } } while (rightPrimary == 0); if (leftPrimary != rightPrimary) { // Return the primary difference, with script reordering. if (settings.HasReordering) { leftPrimary = settings.Reorder(leftPrimary); rightPrimary = settings.Reorder(rightPrimary); } return((leftPrimary < rightPrimary) ? Collation.LESS : Collation.GREATER); } if (leftPrimary == Collation.NO_CE_PRIMARY) { break; } } // Compare the buffered secondary & tertiary weights. // We might skip the secondary level but continue with the case level // which is turned on separately. if (CollationSettings.GetStrength(options) >= CollationStrength.Secondary) { if ((options & CollationSettings.BACKWARD_SECONDARY) == 0) { int leftIndex2 = 0; int rightIndex2 = 0; for (; ;) { int leftSecondary; do { leftSecondary = ((int)left.GetCE(leftIndex2++)).TripleShift(16); } while (leftSecondary == 0); int rightSecondary; do { rightSecondary = ((int)right.GetCE(rightIndex2++)).TripleShift(16); } while (rightSecondary == 0); if (leftSecondary != rightSecondary) { return((leftSecondary < rightSecondary) ? Collation.LESS : Collation.GREATER); } if (leftSecondary == Collation.NO_CE_WEIGHT16) { break; } } } else { // The backwards secondary level compares secondary weights backwards // within segments separated by the merge separator (U+FFFE, weight 02). int leftStart = 0; int rightStart = 0; for (; ;) { // Find the merge separator or the NO_CE terminator. long p; int leftLimit = leftStart; while ((p = left.GetCE(leftLimit).TripleShift(32)) > Collation.MERGE_SEPARATOR_PRIMARY || p == 0) { ++leftLimit; } int rightLimit = rightStart; while ((p = right.GetCE(rightLimit).TripleShift(32)) > Collation.MERGE_SEPARATOR_PRIMARY || p == 0) { ++rightLimit; } // Compare the segments. int leftIndex3 = leftLimit; int rightIndex3 = rightLimit; for (; ;) { int leftSecondary = 0; while (leftSecondary == 0 && leftIndex3 > leftStart) { leftSecondary = ((int)left.GetCE(--leftIndex3)).TripleShift(16); } int rightSecondary = 0; while (rightSecondary == 0 && rightIndex3 > rightStart) { rightSecondary = ((int)right.GetCE(--rightIndex3)).TripleShift(16); } if (leftSecondary != rightSecondary) { return((leftSecondary < rightSecondary) ? Collation.LESS : Collation.GREATER); } if (leftSecondary == 0) { break; } } // Did we reach the end of either string? // Both strings have the same number of merge separators, // or else there would have been a primary-level difference. Debug.Assert(left.GetCE(leftLimit) == right.GetCE(rightLimit)); if (p == Collation.NO_CE_PRIMARY) { break; } // Skip both merge separators and continue. leftStart = leftLimit + 1; rightStart = rightLimit + 1; } } } if ((options & CollationSettings.CASE_LEVEL) != 0) { CollationStrength strength = CollationSettings.GetStrength(options); int leftIndex4 = 0; int rightIndex4 = 0; for (; ;) { int leftCase, leftLower32, rightCase; if (strength == CollationStrength.Primary) { // Primary+caseLevel: Ignore case level weights of primary ignorables. // Otherwise we would get a-umlaut > a // which is not desirable for accent-insensitive sorting. // Check for (lower 32 bits) == 0 as well because variable CEs are stored // with only primary weights. long ce; do { ce = left.GetCE(leftIndex4++); leftCase = (int)ce; } while ((ce.TripleShift(32)) == 0 || leftCase == 0); leftLower32 = leftCase; leftCase &= 0xc000; do { ce = right.GetCE(rightIndex4++); rightCase = (int)ce; } while ((ce.TripleShift(32)) == 0 || rightCase == 0); rightCase &= 0xc000; } else { // Secondary+caseLevel: By analogy with the above, // ignore case level weights of secondary ignorables. // // Note: A tertiary CE has uppercase case bits (0.0.ut) // to keep tertiary+caseFirst well-formed. // // Tertiary+caseLevel: Also ignore case level weights of secondary ignorables. // Otherwise a tertiary CE's uppercase would be no greater than // a primary/secondary CE's uppercase. // (See UCA well-formedness condition 2.) // We could construct a special case weight higher than uppercase, // but it's simpler to always ignore case weights of secondary ignorables, // turning 0.0.ut into 0.0.0.t. // (See LDML Collation, Case Parameters.) do { leftCase = (int)left.GetCE(leftIndex4++); } while ((leftCase & 0xffff0000) == 0); leftLower32 = leftCase; leftCase &= 0xc000; do { rightCase = (int)right.GetCE(rightIndex4++); } while ((rightCase & 0xffff0000) == 0); rightCase &= 0xc000; } // No need to handle NO_CE and MERGE_SEPARATOR specially: // There is one case weight for each previous-level weight, // so level length differences were handled there. if (leftCase != rightCase) { if ((options & CollationSettings.UPPER_FIRST) == 0) { return((leftCase < rightCase) ? Collation.LESS : Collation.GREATER); } else { return((leftCase < rightCase) ? Collation.GREATER : Collation.LESS); } } if ((leftLower32.TripleShift(16)) == Collation.NO_CE_WEIGHT16) { break; } } } if (CollationSettings.GetStrength(options) <= CollationStrength.Secondary) { return(Collation.EQUAL); } int tertiaryMask = CollationSettings.GetTertiaryMask(options); int leftIndex = 0; int rightIndex = 0; int anyQuaternaries = 0; for (; ;) { int leftLower32, leftTertiary; do { leftLower32 = (int)left.GetCE(leftIndex++); anyQuaternaries |= leftLower32; Debug.Assert((leftLower32 & Collation.ONLY_TERTIARY_MASK) != 0 || (leftLower32 & 0xc0c0) == 0); leftTertiary = leftLower32 & tertiaryMask; } while (leftTertiary == 0); int rightLower32, rightTertiary; do { rightLower32 = (int)right.GetCE(rightIndex++); anyQuaternaries |= rightLower32; Debug.Assert((rightLower32 & Collation.ONLY_TERTIARY_MASK) != 0 || (rightLower32 & 0xc0c0) == 0); rightTertiary = rightLower32 & tertiaryMask; } while (rightTertiary == 0); if (leftTertiary != rightTertiary) { if (CollationSettings.SortsTertiaryUpperCaseFirst(options)) { // Pass through NO_CE and keep real tertiary weights larger than that. // Do not change the artificial uppercase weight of a tertiary CE (0.0.ut), // to keep tertiary CEs well-formed. // Their case+tertiary weights must be greater than those of // primary and secondary CEs. if (leftTertiary > Collation.NO_CE_WEIGHT16) { if ((leftLower32 & 0xffff0000) != 0) { leftTertiary ^= 0xc000; } else { leftTertiary += 0x4000; } } if (rightTertiary > Collation.NO_CE_WEIGHT16) { if ((rightLower32 & 0xffff0000) != 0) { rightTertiary ^= 0xc000; } else { rightTertiary += 0x4000; } } } return((leftTertiary < rightTertiary) ? Collation.LESS : Collation.GREATER); } if (leftTertiary == Collation.NO_CE_WEIGHT16) { break; } } if (CollationSettings.GetStrength(options) <= CollationStrength.Tertiary) { return(Collation.EQUAL); } if (!anyVariable && (anyQuaternaries & 0xc0) == 0) { // If there are no "variable" CEs and no non-zero quaternary weights, // then there are no quaternary differences. return(Collation.EQUAL); } leftIndex = 0; rightIndex = 0; for (; ;) { long leftQuaternary; do { long ce = left.GetCE(leftIndex++); leftQuaternary = ce & 0xffff; if (leftQuaternary <= Collation.NO_CE_WEIGHT16) { // Variable primary or completely ignorable or NO_CE. leftQuaternary = ce.TripleShift(32); } else { // Regular CE, not tertiary ignorable. // Preserve the quaternary weight in bits 7..6. leftQuaternary |= 0xffffff3fL; } } while (leftQuaternary == 0); long rightQuaternary; do { long ce = right.GetCE(rightIndex++); rightQuaternary = ce & 0xffff; if (rightQuaternary <= Collation.NO_CE_WEIGHT16) { // Variable primary or completely ignorable or NO_CE. rightQuaternary = ce.TripleShift(32); } else { // Regular CE, not tertiary ignorable. // Preserve the quaternary weight in bits 7..6. rightQuaternary |= 0xffffff3fL; } } while (rightQuaternary == 0); if (leftQuaternary != rightQuaternary) { // Return the difference, with script reordering. if (settings.HasReordering) { leftQuaternary = settings.Reorder(leftQuaternary); rightQuaternary = settings.Reorder(rightQuaternary); } return((leftQuaternary < rightQuaternary) ? Collation.LESS : Collation.GREATER); } if (leftQuaternary == Collation.NO_CE_PRIMARY) { break; } } return(Collation.EQUAL); }