/// <summary> /// Transforms the string into a series of characters that can be compared /// with CollationKey.compareTo. This overrides java.text.Collator.getCollationKey. /// It can be overriden in a subclass. /// </summary> public override CollationKey GetCollationKey(String source) { lock (this) { // // The basic algorithm here is to find all of the collation elements for each // character in the source string, convert them to a char representation, // and put them into the collation key. But it's trickier than that. // Each collation element in a string has three components: primary (A vs B), // secondary (A vs A-acute), and tertiary (A' vs a); and a primary difference // at the end of a string takes precedence over a secondary or tertiary // difference earlier in the string. // // To account for this, we put all of the primary orders at the beginning of the // string, followed by the secondary and tertiary orders, separated by nulls. // // Here's a hypothetical example, with the collation element represented as // a three-digit number, one digit for primary, one for secondary, etc. // // String: A a B \u00e9 <--(e-acute) // Collation Elements: 101 100 201 510 // // Collation Key: 1125<null>0001<null>1010 // // To make things even trickier, secondary differences (accent marks) are compared // starting at the *end* of the string in languages with French secondary ordering. // But when comparing the accent marks on a single base character, they are compared // from the beginning. To handle this, we reverse all of the accents that belong // to each base character, then we reverse the entire string of secondary orderings // at the end. Taking the same example above, a French collator might return // this instead: // // Collation Key: 1125<null>1000<null>1010 // if (source == null) { return(null); } if (PrimResult == null) { PrimResult = new StringBuffer(); SecResult = new StringBuffer(); TerResult = new StringBuffer(); } else { PrimResult.Length = 0; SecResult.Length = 0; TerResult.Length = 0; } int order = 0; bool compareSec = (Strength >= Collator.SECONDARY); bool compareTer = (Strength >= Collator.TERTIARY); int secOrder = CollationElementIterator.NULLORDER; int terOrder = CollationElementIterator.NULLORDER; int preSecIgnore = 0; if (SourceCursor == null) { SourceCursor = GetCollationElementIterator(source); } else { SourceCursor.Text = source; } // walk through each character while ((order = SourceCursor.Next()) != CollationElementIterator.NULLORDER) { secOrder = CollationElementIterator.SecondaryOrder(order); terOrder = CollationElementIterator.TertiaryOrder(order); if (!CollationElementIterator.IsIgnorable(order)) { PrimResult.Append((char)(CollationElementIterator.PrimaryOrder(order) + COLLATIONKEYOFFSET)); if (compareSec) { // // accumulate all of the ignorable/secondary characters attached // to a given base character // if (Tables_Renamed.FrenchSec && preSecIgnore < SecResult.Length()) { // // We're doing reversed secondary ordering and we've hit a base // (non-ignorable) character. Reverse any secondary orderings // that applied to the last base character. (see block comment above.) // RBCollationTables.Reverse(SecResult, preSecIgnore, SecResult.Length()); } // Remember where we are in the secondary orderings - this is how far // back to go if we need to reverse them later. SecResult.Append((char)(secOrder + COLLATIONKEYOFFSET)); preSecIgnore = SecResult.Length(); } if (compareTer) { TerResult.Append((char)(terOrder + COLLATIONKEYOFFSET)); } } else { if (compareSec && secOrder != 0) { SecResult.Append((char)(secOrder + Tables_Renamed.MaxSecOrder + COLLATIONKEYOFFSET)); } if (compareTer && terOrder != 0) { TerResult.Append((char)(terOrder + Tables_Renamed.MaxTerOrder + COLLATIONKEYOFFSET)); } } } if (Tables_Renamed.FrenchSec) { if (preSecIgnore < SecResult.Length()) { // If we've accumulated any secondary characters after the last base character, // reverse them. RBCollationTables.Reverse(SecResult, preSecIgnore, SecResult.Length()); } // And now reverse the entire secResult to get French secondary ordering. RBCollationTables.Reverse(SecResult, 0, SecResult.Length()); } PrimResult.Append((char)0); SecResult.Append((char)0); SecResult.Append(TerResult.ToString()); PrimResult.Append(SecResult.ToString()); if (Strength == IDENTICAL) { PrimResult.Append((char)0); int mode = Decomposition; if (mode == CANONICAL_DECOMPOSITION) { PrimResult.Append(Normalizer.Normalize(source, Normalizer.Form.NFD)); } else if (mode == FULL_DECOMPOSITION) { PrimResult.Append(Normalizer.Normalize(source, Normalizer.Form.NFKD)); } else { PrimResult.Append(source); } } return(new RuleBasedCollationKey(source, PrimResult.ToString())); } }
/// <summary> /// Compares the character data stored in two different strings based on the /// collation rules. Returns information about whether a string is less /// than, greater than or equal to another string in a language. /// This can be overriden in a subclass. /// </summary> /// <exception cref="NullPointerException"> if <code>source</code> or <code>target</code> is null. </exception> public override int Compare(String source, String target) { lock (this) { if (source == null || target == null) { throw new NullPointerException(); } // The basic algorithm here is that we use CollationElementIterators // to step through both the source and target strings. We compare each // collation element in the source string against the corresponding one // in the target, checking for differences. // // If a difference is found, we set <result> to LESS or GREATER to // indicate whether the source string is less or greater than the target. // // However, it's not that simple. If we find a tertiary difference // (e.g. 'A' vs. 'a') near the beginning of a string, it can be // overridden by a primary difference (e.g. "A" vs. "B") later in // the string. For example, "AA" < "aB", even though 'A' > 'a'. // // To keep track of this, we use strengthResult to keep track of the // strength of the most significant difference that has been found // so far. When we find a difference whose strength is greater than // strengthResult, it overrides the last difference (if any) that // was found. int result = Collator.EQUAL; if (SourceCursor == null) { SourceCursor = GetCollationElementIterator(source); } else { SourceCursor.Text = source; } if (TargetCursor == null) { TargetCursor = GetCollationElementIterator(target); } else { TargetCursor.Text = target; } int sOrder = 0, tOrder = 0; bool initialCheckSecTer = Strength >= Collator.SECONDARY; bool checkSecTer = initialCheckSecTer; bool checkTertiary = Strength >= Collator.TERTIARY; bool gets = true, gett = true; while (true) { // Get the next collation element in each of the strings, unless // we've been requested to skip it. if (gets) { sOrder = SourceCursor.Next(); } else { gets = true; } if (gett) { tOrder = TargetCursor.Next(); } else { gett = true; } // If we've hit the end of one of the strings, jump out of the loop if ((sOrder == CollationElementIterator.NULLORDER) || (tOrder == CollationElementIterator.NULLORDER)) { break; } int pSOrder = CollationElementIterator.PrimaryOrder(sOrder); int pTOrder = CollationElementIterator.PrimaryOrder(tOrder); // If there's no difference at this position, we can skip it if (sOrder == tOrder) { if (Tables_Renamed.FrenchSec && pSOrder != 0) { if (!checkSecTer) { // in french, a secondary difference more to the right is stronger, // so accents have to be checked with each base element checkSecTer = initialCheckSecTer; // but tertiary differences are less important than the first // secondary difference, so checking tertiary remains disabled checkTertiary = false; } } continue; } // Compare primary differences first. if (pSOrder != pTOrder) { if (sOrder == 0) { // The entire source element is ignorable. // Skip to the next source element, but don't fetch another target element. gett = false; continue; } if (tOrder == 0) { gets = false; continue; } // The source and target elements aren't ignorable, but it's still possible // for the primary component of one of the elements to be ignorable.... if (pSOrder == 0) // primary order in source is ignorable { // The source's primary is ignorable, but the target's isn't. We treat ignorables // as a secondary difference, so remember that we found one. if (checkSecTer) { result = Collator.GREATER; // (strength is SECONDARY) checkSecTer = false; } // Skip to the next source element, but don't fetch another target element. gett = false; } else if (pTOrder == 0) { // record differences - see the comment above. if (checkSecTer) { result = Collator.LESS; // (strength is SECONDARY) checkSecTer = false; } // Skip to the next source element, but don't fetch another target element. gets = false; } else { // Neither of the orders is ignorable, and we already know that the primary // orders are different because of the (pSOrder != pTOrder) test above. // Record the difference and stop the comparison. if (pSOrder < pTOrder) { return(Collator.LESS); // (strength is PRIMARY) } else { return(Collator.GREATER); // (strength is PRIMARY) } } } // else of if ( pSOrder != pTOrder ) else { // primary order is the same, but complete order is different. So there // are no base elements at this point, only ignorables (Since the strings are // normalized) if (checkSecTer) { // a secondary or tertiary difference may still matter short secSOrder = CollationElementIterator.SecondaryOrder(sOrder); short secTOrder = CollationElementIterator.SecondaryOrder(tOrder); if (secSOrder != secTOrder) { // there is a secondary difference result = (secSOrder < secTOrder) ? Collator.LESS : Collator.GREATER; // (strength is SECONDARY) checkSecTer = false; // (even in french, only the first secondary difference within // a base character matters) } else { if (checkTertiary) { // a tertiary difference may still matter short terSOrder = CollationElementIterator.TertiaryOrder(sOrder); short terTOrder = CollationElementIterator.TertiaryOrder(tOrder); if (terSOrder != terTOrder) { // there is a tertiary difference result = (terSOrder < terTOrder) ? Collator.LESS : Collator.GREATER; // (strength is TERTIARY) checkTertiary = false; } } } } // if (checkSecTer) } // if ( pSOrder != pTOrder ) } // while() if (sOrder != CollationElementIterator.NULLORDER) { // (tOrder must be CollationElementIterator::NULLORDER, // since this point is only reached when sOrder or tOrder is NULLORDER.) // The source string has more elements, but the target string hasn't. do { if (CollationElementIterator.PrimaryOrder(sOrder) != 0) { // We found an additional non-ignorable base character in the source string. // This is a primary difference, so the source is greater return(Collator.GREATER); // (strength is PRIMARY) } else if (CollationElementIterator.SecondaryOrder(sOrder) != 0) { // Additional secondary elements mean the source string is greater if (checkSecTer) { result = Collator.GREATER; // (strength is SECONDARY) checkSecTer = false; } } } while ((sOrder = SourceCursor.Next()) != CollationElementIterator.NULLORDER); } else if (tOrder != CollationElementIterator.NULLORDER) { // The target string has more elements, but the source string hasn't. do { if (CollationElementIterator.PrimaryOrder(tOrder) != 0) // We found an additional non-ignorable base character in the target string. // This is a primary difference, so the source is less { return(Collator.LESS); // (strength is PRIMARY) } else if (CollationElementIterator.SecondaryOrder(tOrder) != 0) { // Additional secondary elements in the target mean the source string is less if (checkSecTer) { result = Collator.LESS; // (strength is SECONDARY) checkSecTer = false; } } } while ((tOrder = TargetCursor.Next()) != CollationElementIterator.NULLORDER); } // For IDENTICAL comparisons, we use a bitwise character comparison // as a tiebreaker if all else is equal if (result == 0 && Strength == IDENTICAL) { int mode = Decomposition; Normalizer.Form form; if (mode == CANONICAL_DECOMPOSITION) { form = Normalizer.Form.NFD; } else if (mode == FULL_DECOMPOSITION) { form = Normalizer.Form.NFKD; } else { return(source.CompareTo(target)); } String sourceDecomposition = Normalizer.Normalize(source, form); String targetDecomposition = Normalizer.Normalize(target, form); return(sourceDecomposition.CompareTo(targetDecomposition)); } return(result); } }