public static int CompareUTF16(char[] table, char[] primaries, int options, ICharSequence left, ICharSequence right, int startIndex) { // This is a modified copy of CollationCompare.compareUpToQuaternary(), // optimized for common Latin text. // Keep them in sync! int variableTop = options >> 16; // see getOptions() options &= 0xffff; // needed for CollationSettings.getStrength() to work // Check for supported characters, fetch mini CEs, and compare primaries. int leftIndex = startIndex, rightIndex = startIndex; // Single mini CE or a pair. // The current mini CE is in the lower 16 bits, the next one is in the upper 16 bits. // If there is only one, then it is in the lower bits, and the upper bits are 0. int leftPair = 0, rightPair = 0; for (; ;) { // We fetch CEs until we get a non-ignorable primary or reach the end. while (leftPair == 0) { if (leftIndex == left.Length) { leftPair = EOS; break; } int c = left[leftIndex++]; if (c <= LatinMax) { leftPair = primaries[c]; if (leftPair != 0) { break; } if (c <= 0x39 && c >= 0x30 && (options & CollationSettings.Numeric) != 0) { return(BailOutResult); } leftPair = table[c]; } else if (PUNCT_START <= c && c < PUNCT_LIMIT) { leftPair = table[c - PUNCT_START + LatinLimit]; } else { leftPair = Lookup(table, c); } if (leftPair >= MIN_SHORT) { leftPair &= SHORT_PRIMARY_MASK; break; } else if (leftPair > variableTop) { leftPair &= LONG_PRIMARY_MASK; break; } else { long pairAndInc = NextPair(table, c, leftPair, left, leftIndex); if (pairAndInc < 0) { ++leftIndex; pairAndInc = ~pairAndInc; } leftPair = (int)pairAndInc; if (leftPair == BAIL_OUT) { return(BailOutResult); } leftPair = GetPrimaries(variableTop, leftPair); } } while (rightPair == 0) { if (rightIndex == right.Length) { rightPair = EOS; break; } int c = right[rightIndex++]; if (c <= LatinMax) { rightPair = primaries[c]; if (rightPair != 0) { break; } if (c <= 0x39 && c >= 0x30 && (options & CollationSettings.Numeric) != 0) { return(BailOutResult); } rightPair = table[c]; } else if (PUNCT_START <= c && c < PUNCT_LIMIT) { rightPair = table[c - PUNCT_START + LatinLimit]; } else { rightPair = Lookup(table, c); } if (rightPair >= MIN_SHORT) { rightPair &= SHORT_PRIMARY_MASK; break; } else if (rightPair > variableTop) { rightPair &= LONG_PRIMARY_MASK; break; } else { long pairAndInc = NextPair(table, c, rightPair, right, rightIndex); if (pairAndInc < 0) { ++rightIndex; pairAndInc = ~pairAndInc; } rightPair = (int)pairAndInc; if (rightPair == BAIL_OUT) { return(BailOutResult); } rightPair = GetPrimaries(variableTop, rightPair); } } if (leftPair == rightPair) { if (leftPair == EOS) { break; } leftPair = rightPair = 0; continue; } int leftPrimary = leftPair & 0xffff; int rightPrimary = rightPair & 0xffff; if (leftPrimary != rightPrimary) { // Return the primary difference. return((leftPrimary < rightPrimary) ? Collation.Less : Collation.Greater); } if (leftPair == EOS) { break; } //leftPair >>>= 16; //rightPair >>>= 16; leftPair = leftPair.TripleShift(16); rightPair = rightPair.TripleShift(16); } // In the following, we need to re-fetch each character because we did not buffer the CEs, // but we know that the string is well-formed and // only contains supported characters and mappings. // We might skip the secondary level but continue with the case level // which is turned on separately. if (CollationSettings.GetStrength(options) >= CollationStrength.Secondary) { leftIndex = rightIndex = startIndex; leftPair = rightPair = 0; for (; ;) { while (leftPair == 0) { if (leftIndex == left.Length) { leftPair = EOS; break; } int c = left[leftIndex++]; if (c <= LatinMax) { leftPair = table[c]; } else if (PUNCT_START <= c && c < PUNCT_LIMIT) { leftPair = table[c - PUNCT_START + LatinLimit]; } else { leftPair = Lookup(table, c); } if (leftPair >= MIN_SHORT) { leftPair = GetSecondariesFromOneShortCE(leftPair); break; } else if (leftPair > variableTop) { leftPair = COMMON_SEC_PLUS_OFFSET; break; } else { long pairAndInc = NextPair(table, c, leftPair, left, leftIndex); if (pairAndInc < 0) { ++leftIndex; pairAndInc = ~pairAndInc; } leftPair = GetSecondaries(variableTop, (int)pairAndInc); } } while (rightPair == 0) { if (rightIndex == right.Length) { rightPair = EOS; break; } int c = right[rightIndex++]; if (c <= LatinMax) { rightPair = table[c]; } else if (PUNCT_START <= c && c < PUNCT_LIMIT) { rightPair = table[c - PUNCT_START + LatinLimit]; } else { rightPair = Lookup(table, c); } if (rightPair >= MIN_SHORT) { rightPair = GetSecondariesFromOneShortCE(rightPair); break; } else if (rightPair > variableTop) { rightPair = COMMON_SEC_PLUS_OFFSET; break; } else { long pairAndInc = NextPair(table, c, rightPair, right, rightIndex); if (pairAndInc < 0) { ++rightIndex; pairAndInc = ~pairAndInc; } rightPair = GetSecondaries(variableTop, (int)pairAndInc); } } if (leftPair == rightPair) { if (leftPair == EOS) { break; } leftPair = rightPair = 0; continue; } int leftSecondary = leftPair & 0xffff; int rightSecondary = rightPair & 0xffff; if (leftSecondary != rightSecondary) { if ((options & CollationSettings.BackwardSecondary) != 0) { // Full support for backwards secondary requires backwards contraction matching // and moving backwards between merge separators. return(BailOutResult); } return((leftSecondary < rightSecondary) ? Collation.Less : Collation.Greater); } if (leftPair == EOS) { break; } //leftPair >>>= 16; //rightPair >>>= 16; leftPair = leftPair.TripleShift(16); rightPair = rightPair.TripleShift(16); } } if ((options & CollationSettings.CaseLevel) != 0) { bool strengthIsPrimary = CollationSettings.GetStrength(options) == CollationStrength.Primary; leftIndex = rightIndex = startIndex; leftPair = rightPair = 0; for (; ;) { while (leftPair == 0) { if (leftIndex == left.Length) { leftPair = EOS; break; } int c = left[leftIndex++]; leftPair = (c <= LatinMax) ? table[c] : Lookup(table, c); if (leftPair < MIN_LONG) { long pairAndInc = NextPair(table, c, leftPair, left, leftIndex); if (pairAndInc < 0) { ++leftIndex; pairAndInc = ~pairAndInc; } leftPair = (int)pairAndInc; } leftPair = GetCases(variableTop, strengthIsPrimary, leftPair); } while (rightPair == 0) { if (rightIndex == right.Length) { rightPair = EOS; break; } int c = right[rightIndex++]; rightPair = (c <= LatinMax) ? table[c] : Lookup(table, c); if (rightPair < MIN_LONG) { long pairAndInc = NextPair(table, c, rightPair, right, rightIndex); if (pairAndInc < 0) { ++rightIndex; pairAndInc = ~pairAndInc; } rightPair = (int)pairAndInc; } rightPair = GetCases(variableTop, strengthIsPrimary, rightPair); } if (leftPair == rightPair) { if (leftPair == EOS) { break; } leftPair = rightPair = 0; continue; } int leftCase = leftPair & 0xffff; int rightCase = rightPair & 0xffff; if (leftCase != rightCase) { if ((options & CollationSettings.UpperFirst) == 0) { return((leftCase < rightCase) ? Collation.Less : Collation.Greater); } else { return((leftCase < rightCase) ? Collation.Greater : Collation.Less); } } if (leftPair == EOS) { break; } //leftPair >>>= 16; //rightPair >>>= 16; leftPair = leftPair.TripleShift(16); rightPair = rightPair.TripleShift(16); } } if (CollationSettings.GetStrength(options) <= CollationStrength.Secondary) { return(Collation.Equal); } // Remove the case bits from the tertiary weight when caseLevel is on or caseFirst is off. bool withCaseBits = CollationSettings.IsTertiaryWithCaseBits(options); leftIndex = rightIndex = startIndex; leftPair = rightPair = 0; for (; ;) { while (leftPair == 0) { if (leftIndex == left.Length) { leftPair = EOS; break; } int c = left[leftIndex++]; leftPair = (c <= LatinMax) ? table[c] : Lookup(table, c); if (leftPair < MIN_LONG) { long pairAndInc = NextPair(table, c, leftPair, left, leftIndex); if (pairAndInc < 0) { ++leftIndex; pairAndInc = ~pairAndInc; } leftPair = (int)pairAndInc; } leftPair = GetTertiaries(variableTop, withCaseBits, leftPair); } while (rightPair == 0) { if (rightIndex == right.Length) { rightPair = EOS; break; } int c = right[rightIndex++]; rightPair = (c <= LatinMax) ? table[c] : Lookup(table, c); if (rightPair < MIN_LONG) { long pairAndInc = NextPair(table, c, rightPair, right, rightIndex); if (pairAndInc < 0) { ++rightIndex; pairAndInc = ~pairAndInc; } rightPair = (int)pairAndInc; } rightPair = GetTertiaries(variableTop, withCaseBits, rightPair); } if (leftPair == rightPair) { if (leftPair == EOS) { break; } leftPair = rightPair = 0; continue; } int leftTertiary = leftPair & 0xffff; int rightTertiary = rightPair & 0xffff; if (leftTertiary != rightTertiary) { if (CollationSettings.SortsTertiaryUpperCaseFirst(options)) { // Pass through EOS and MERGE_WEIGHT // and keep real tertiary weights larger than the MERGE_WEIGHT. // Tertiary CEs (secondary ignorables) are not supported in fast Latin. if (leftTertiary > MERGE_WEIGHT) { leftTertiary ^= CASE_MASK; } if (rightTertiary > MERGE_WEIGHT) { rightTertiary ^= CASE_MASK; } } return((leftTertiary < rightTertiary) ? Collation.Less : Collation.Greater); } if (leftPair == EOS) { break; } //leftPair >>>= 16; //rightPair >>>= 16; leftPair = leftPair.TripleShift(16); rightPair = rightPair.TripleShift(16); } if (CollationSettings.GetStrength(options) <= CollationStrength.Tertiary) { return(Collation.Equal); } leftIndex = rightIndex = startIndex; leftPair = rightPair = 0; for (; ;) { while (leftPair == 0) { if (leftIndex == left.Length) { leftPair = EOS; break; } int c = left[leftIndex++]; leftPair = (c <= LatinMax) ? table[c] : Lookup(table, c); if (leftPair < MIN_LONG) { long pairAndInc = NextPair(table, c, leftPair, left, leftIndex); if (pairAndInc < 0) { ++leftIndex; pairAndInc = ~pairAndInc; } leftPair = (int)pairAndInc; } leftPair = GetQuaternaries(variableTop, leftPair); } while (rightPair == 0) { if (rightIndex == right.Length) { rightPair = EOS; break; } int c = right[rightIndex++]; rightPair = (c <= LatinMax) ? table[c] : Lookup(table, c); if (rightPair < MIN_LONG) { long pairAndInc = NextPair(table, c, rightPair, right, rightIndex); if (pairAndInc < 0) { ++rightIndex; pairAndInc = ~pairAndInc; } rightPair = (int)pairAndInc; } rightPair = GetQuaternaries(variableTop, rightPair); } if (leftPair == rightPair) { if (leftPair == EOS) { break; } leftPair = rightPair = 0; continue; } int leftQuaternary = leftPair & 0xffff; int rightQuaternary = rightPair & 0xffff; if (leftQuaternary != rightQuaternary) { return((leftQuaternary < rightQuaternary) ? Collation.Less : Collation.Greater); } if (leftPair == EOS) { break; } //leftPair >>>= 16; //rightPair >>>= 16; leftPair = leftPair.TripleShift(16); rightPair = rightPair.TripleShift(16); } return(Collation.Equal); }
public static int CompareUpToQuaternary(CollationIterator left, CollationIterator right, CollationSettings settings) { int options = settings.Options; long variableTop; if ((options & CollationSettings.ALTERNATE_MASK) == 0) { variableTop = 0; } else { // +1 so that we can use "<" and primary ignorables test out early. variableTop = settings.VariableTop + 1; } bool anyVariable = false; // Fetch CEs, compare primaries, store secondary & tertiary weights. for (; ;) { // We fetch CEs until we get a non-ignorable primary or reach the end. long leftPrimary; do { long ce = left.NextCE(); leftPrimary = ce.TripleShift(32); if (leftPrimary < variableTop && leftPrimary > Collation.MERGE_SEPARATOR_PRIMARY) { // Variable CE, shift it to quaternary level. // Ignore all following primary ignorables, and shift further variable CEs. anyVariable = true; do { // Store only the primary of the variable CE. left.SetCurrentCE(ce & unchecked ((long)0xffffffff00000000L)); for (; ;) { ce = left.NextCE(); leftPrimary = ce.TripleShift(32); if (leftPrimary == 0) { left.SetCurrentCE(0); } else { break; } } } while (leftPrimary < variableTop && leftPrimary > Collation.MERGE_SEPARATOR_PRIMARY); } } while (leftPrimary == 0); long rightPrimary; do { long ce = right.NextCE(); rightPrimary = ce.TripleShift(32); if (rightPrimary < variableTop && rightPrimary > Collation.MERGE_SEPARATOR_PRIMARY) { // Variable CE, shift it to quaternary level. // Ignore all following primary ignorables, and shift further variable CEs. anyVariable = true; do { // Store only the primary of the variable CE. right.SetCurrentCE(ce & unchecked ((long)0xffffffff00000000L)); for (; ;) { ce = right.NextCE(); rightPrimary = ce.TripleShift(32); if (rightPrimary == 0) { right.SetCurrentCE(0); } else { break; } } } while (rightPrimary < variableTop && rightPrimary > Collation.MERGE_SEPARATOR_PRIMARY); } } while (rightPrimary == 0); if (leftPrimary != rightPrimary) { // Return the primary difference, with script reordering. if (settings.HasReordering) { leftPrimary = settings.Reorder(leftPrimary); rightPrimary = settings.Reorder(rightPrimary); } return((leftPrimary < rightPrimary) ? Collation.LESS : Collation.GREATER); } if (leftPrimary == Collation.NO_CE_PRIMARY) { break; } } // Compare the buffered secondary & tertiary weights. // We might skip the secondary level but continue with the case level // which is turned on separately. if (CollationSettings.GetStrength(options) >= CollationStrength.Secondary) { if ((options & CollationSettings.BACKWARD_SECONDARY) == 0) { int leftIndex2 = 0; int rightIndex2 = 0; for (; ;) { int leftSecondary; do { leftSecondary = ((int)left.GetCE(leftIndex2++)).TripleShift(16); } while (leftSecondary == 0); int rightSecondary; do { rightSecondary = ((int)right.GetCE(rightIndex2++)).TripleShift(16); } while (rightSecondary == 0); if (leftSecondary != rightSecondary) { return((leftSecondary < rightSecondary) ? Collation.LESS : Collation.GREATER); } if (leftSecondary == Collation.NO_CE_WEIGHT16) { break; } } } else { // The backwards secondary level compares secondary weights backwards // within segments separated by the merge separator (U+FFFE, weight 02). int leftStart = 0; int rightStart = 0; for (; ;) { // Find the merge separator or the NO_CE terminator. long p; int leftLimit = leftStart; while ((p = left.GetCE(leftLimit).TripleShift(32)) > Collation.MERGE_SEPARATOR_PRIMARY || p == 0) { ++leftLimit; } int rightLimit = rightStart; while ((p = right.GetCE(rightLimit).TripleShift(32)) > Collation.MERGE_SEPARATOR_PRIMARY || p == 0) { ++rightLimit; } // Compare the segments. int leftIndex3 = leftLimit; int rightIndex3 = rightLimit; for (; ;) { int leftSecondary = 0; while (leftSecondary == 0 && leftIndex3 > leftStart) { leftSecondary = ((int)left.GetCE(--leftIndex3)).TripleShift(16); } int rightSecondary = 0; while (rightSecondary == 0 && rightIndex3 > rightStart) { rightSecondary = ((int)right.GetCE(--rightIndex3)).TripleShift(16); } if (leftSecondary != rightSecondary) { return((leftSecondary < rightSecondary) ? Collation.LESS : Collation.GREATER); } if (leftSecondary == 0) { break; } } // Did we reach the end of either string? // Both strings have the same number of merge separators, // or else there would have been a primary-level difference. Debug.Assert(left.GetCE(leftLimit) == right.GetCE(rightLimit)); if (p == Collation.NO_CE_PRIMARY) { break; } // Skip both merge separators and continue. leftStart = leftLimit + 1; rightStart = rightLimit + 1; } } } if ((options & CollationSettings.CASE_LEVEL) != 0) { CollationStrength strength = CollationSettings.GetStrength(options); int leftIndex4 = 0; int rightIndex4 = 0; for (; ;) { int leftCase, leftLower32, rightCase; if (strength == CollationStrength.Primary) { // Primary+caseLevel: Ignore case level weights of primary ignorables. // Otherwise we would get a-umlaut > a // which is not desirable for accent-insensitive sorting. // Check for (lower 32 bits) == 0 as well because variable CEs are stored // with only primary weights. long ce; do { ce = left.GetCE(leftIndex4++); leftCase = (int)ce; } while ((ce.TripleShift(32)) == 0 || leftCase == 0); leftLower32 = leftCase; leftCase &= 0xc000; do { ce = right.GetCE(rightIndex4++); rightCase = (int)ce; } while ((ce.TripleShift(32)) == 0 || rightCase == 0); rightCase &= 0xc000; } else { // Secondary+caseLevel: By analogy with the above, // ignore case level weights of secondary ignorables. // // Note: A tertiary CE has uppercase case bits (0.0.ut) // to keep tertiary+caseFirst well-formed. // // Tertiary+caseLevel: Also ignore case level weights of secondary ignorables. // Otherwise a tertiary CE's uppercase would be no greater than // a primary/secondary CE's uppercase. // (See UCA well-formedness condition 2.) // We could construct a special case weight higher than uppercase, // but it's simpler to always ignore case weights of secondary ignorables, // turning 0.0.ut into 0.0.0.t. // (See LDML Collation, Case Parameters.) do { leftCase = (int)left.GetCE(leftIndex4++); } while ((leftCase & 0xffff0000) == 0); leftLower32 = leftCase; leftCase &= 0xc000; do { rightCase = (int)right.GetCE(rightIndex4++); } while ((rightCase & 0xffff0000) == 0); rightCase &= 0xc000; } // No need to handle NO_CE and MERGE_SEPARATOR specially: // There is one case weight for each previous-level weight, // so level length differences were handled there. if (leftCase != rightCase) { if ((options & CollationSettings.UPPER_FIRST) == 0) { return((leftCase < rightCase) ? Collation.LESS : Collation.GREATER); } else { return((leftCase < rightCase) ? Collation.GREATER : Collation.LESS); } } if ((leftLower32.TripleShift(16)) == Collation.NO_CE_WEIGHT16) { break; } } } if (CollationSettings.GetStrength(options) <= CollationStrength.Secondary) { return(Collation.EQUAL); } int tertiaryMask = CollationSettings.GetTertiaryMask(options); int leftIndex = 0; int rightIndex = 0; int anyQuaternaries = 0; for (; ;) { int leftLower32, leftTertiary; do { leftLower32 = (int)left.GetCE(leftIndex++); anyQuaternaries |= leftLower32; Debug.Assert((leftLower32 & Collation.ONLY_TERTIARY_MASK) != 0 || (leftLower32 & 0xc0c0) == 0); leftTertiary = leftLower32 & tertiaryMask; } while (leftTertiary == 0); int rightLower32, rightTertiary; do { rightLower32 = (int)right.GetCE(rightIndex++); anyQuaternaries |= rightLower32; Debug.Assert((rightLower32 & Collation.ONLY_TERTIARY_MASK) != 0 || (rightLower32 & 0xc0c0) == 0); rightTertiary = rightLower32 & tertiaryMask; } while (rightTertiary == 0); if (leftTertiary != rightTertiary) { if (CollationSettings.SortsTertiaryUpperCaseFirst(options)) { // Pass through NO_CE and keep real tertiary weights larger than that. // Do not change the artificial uppercase weight of a tertiary CE (0.0.ut), // to keep tertiary CEs well-formed. // Their case+tertiary weights must be greater than those of // primary and secondary CEs. if (leftTertiary > Collation.NO_CE_WEIGHT16) { if ((leftLower32 & 0xffff0000) != 0) { leftTertiary ^= 0xc000; } else { leftTertiary += 0x4000; } } if (rightTertiary > Collation.NO_CE_WEIGHT16) { if ((rightLower32 & 0xffff0000) != 0) { rightTertiary ^= 0xc000; } else { rightTertiary += 0x4000; } } } return((leftTertiary < rightTertiary) ? Collation.LESS : Collation.GREATER); } if (leftTertiary == Collation.NO_CE_WEIGHT16) { break; } } if (CollationSettings.GetStrength(options) <= CollationStrength.Tertiary) { return(Collation.EQUAL); } if (!anyVariable && (anyQuaternaries & 0xc0) == 0) { // If there are no "variable" CEs and no non-zero quaternary weights, // then there are no quaternary differences. return(Collation.EQUAL); } leftIndex = 0; rightIndex = 0; for (; ;) { long leftQuaternary; do { long ce = left.GetCE(leftIndex++); leftQuaternary = ce & 0xffff; if (leftQuaternary <= Collation.NO_CE_WEIGHT16) { // Variable primary or completely ignorable or NO_CE. leftQuaternary = ce.TripleShift(32); } else { // Regular CE, not tertiary ignorable. // Preserve the quaternary weight in bits 7..6. leftQuaternary |= 0xffffff3fL; } } while (leftQuaternary == 0); long rightQuaternary; do { long ce = right.GetCE(rightIndex++); rightQuaternary = ce & 0xffff; if (rightQuaternary <= Collation.NO_CE_WEIGHT16) { // Variable primary or completely ignorable or NO_CE. rightQuaternary = ce.TripleShift(32); } else { // Regular CE, not tertiary ignorable. // Preserve the quaternary weight in bits 7..6. rightQuaternary |= 0xffffff3fL; } } while (rightQuaternary == 0); if (leftQuaternary != rightQuaternary) { // Return the difference, with script reordering. if (settings.HasReordering) { leftQuaternary = settings.Reorder(leftQuaternary); rightQuaternary = settings.Reorder(rightQuaternary); } return((leftQuaternary < rightQuaternary) ? Collation.LESS : Collation.GREATER); } if (leftQuaternary == Collation.NO_CE_PRIMARY) { break; } } return(Collation.EQUAL); }