예제 #1
0
 public int AlternateHandlingSetting(CollationStrength collationStrength,
                                     AlternateHandling alternateHandling, string string1, string string2)
 {
     /*  The Alternate attribute is used to control the handling of the so-called
      * variable characters in the UCA: whitespace, punctuation and symbols. If
      * Alternate is set to Non-Ignorable (N), then differences among these
      * characters are of the same importance as differences among letters.
      * If Alternate is set to Shifted (S), then these characters are of only
      * minor importance. The Shifted value is often used in combination with
      * Strength set to Quaternary. In such a case, white-space, punctuation,
      * and symbols are considered when comparing strings, but only if all other
      * aspects of the strings (base letters, accents, and case) are identical.
      * If Alternate is not set to Shifted, then there is no difference between
      * a Strength of 3 and a Strength of 4.
      * Example:
      *        S=3, A=N di Silva < Di Silva < diSilva < U.S.A. < USA
      *        S=3, A=S di Silva = diSilva < Di Silva  < U.S.A. = USA
      *        S=4, A=S di Silva < diSilva < Di Silva < U.S.A. < USA
      */
     using (var ucaCollator = new RuleBasedCollator(string.Empty, collationStrength))
     {
         ucaCollator.AlternateHandling = alternateHandling;
         return(ucaCollator.Compare(string1, string2));
     }
 }
예제 #2
0
        /// <summary>
        /// RuleBasedCollator constructor.
        /// This takes the table rules and builds a collation table out of them.
        /// </summary>
        /// <param name="rules">the collation rules to build the collation table from</param>
        /// <param name="normalizationMode">the normalization mode to use</param>
        /// <param name="collationStrength">the collation strength to use</param>
        public RuleBasedCollator(string rules,
                                 NormalizationMode normalizationMode,
                                 CollationStrength collationStrength)
        {
            ErrorCode status;
            var       parseError = new ParseError();

            _collatorHandle = NativeMethods.ucol_openRules(rules,
                                                           rules.Length,
                                                           normalizationMode,
                                                           collationStrength,
                                                           ref parseError,
                                                           out status);
            try
            {
                ExceptionFromErrorCode.ThrowIfError(status, parseError.ToString(rules));
            }
            catch
            {
                if (_collatorHandle != default(SafeRuleBasedCollatorHandle))
                {
                    _collatorHandle.Dispose();
                }
                _collatorHandle = default(SafeRuleBasedCollatorHandle);
                throw;
            }
        }
예제 #3
0
        private void ParseRuleChain()
        {
            CollationStrength resetStrength = (CollationStrength)ParseResetAndPosition();
            bool isFirstRelation            = true;

            for (; ;)
            {
                int result = ParseRelationOperator();
                if (result < 0)
                {
                    if (ruleIndex < rules.Length && rules[ruleIndex] == 0x23)
                    {
                        // '#' starts a comment, until the end of the line
                        ruleIndex = SkipComment(ruleIndex + 1);
                        continue;
                    }
                    if (isFirstRelation)
                    {
                        SetParseError("reset not followed by a relation");
                    }
                    return;
                }
                CollationStrength strength = (CollationStrength)(result & STRENGTH_MASK);
                if (resetStrength < CollationStrength.Identical)
                {
                    // reset-before rule chain
                    if (isFirstRelation)
                    {
                        if (strength != resetStrength)
                        {
                            SetParseError("reset-before strength differs from its first relation");
                            return;
                        }
                    }
                    else
                    {
                        if (strength < resetStrength)
                        {
                            SetParseError("reset-before strength followed by a stronger relation");
                            return;
                        }
                    }
                }
                int i = ruleIndex + (result >> OFFSET_SHIFT);  // skip over the relation operator
                if ((result & STARRED_FLAG) == 0)
                {
                    ParseRelationStrings(strength, i);
                }
                else
                {
                    ParseStarredCharacters(strength, i);
                }
                isFirstRelation = false;
            }
        }
 public RuleBasedCollator(string rules, NormalizationMode normalizationMode, CollationStrength collationStrength)
 {
     ErrorCode status;
     _handle = NativeMethods.ucol_openRules(rules,
                                            rules.Length,
                                            normalizationMode,
                                            collationStrength,
                                            ref _parseError,
                                            out status);
     status.ThrowIfError(_parseError.ToString(rules));
 }
예제 #5
0
        /// <summary>
        /// RuleBasedCollator constructor.
        /// This takes the table rules and builds a collation table out of them.
        /// </summary>
        /// <param name="rules">the collation rules to build the collation table from</param>
        /// <param name="normalizationMode">the normalization mode to use</param>
        /// <param name="collationStrength">the collation strength to use</param>
        public RuleBasedCollator(string rules,
                                 NormalizationMode normalizationMode,
                                 CollationStrength collationStrength)
        {
            ErrorCode status;

            collatorHandle = NativeMethods.ucol_openRules(rules,
                                                          rules.Length,
                                                          normalizationMode,
                                                          collationStrength,
                                                          ref parseError,
                                                          out status);
            ExceptionFromErrorCode.ThrowIfError(status, parseError.ToString(rules));
        }
예제 #6
0
 ///<summary>
 /// Produce an Collator instance according to the rules supplied.
 /// The rules are used to change the default ordering, defined in the
 /// UCA in a process called tailoring. The resulting Collator pointer
 /// can be used in the same way as the one obtained by ucol_strcoll.
 /// </summary>
 /// <param name="rules">A string describing the collation rules. For the syntax
 ///    of the rules please see users guide.</param>
 /// <param name="rulesLength">The length of rules, or -1 if null-terminated.</param>
 /// <param name="normalizationMode">The normalization mode</param>
 /// <param name="strength">The default collation strength; can be also set in the rules</param>
 /// <param name="parseError">A pointer to ParseError to recieve information about errors
 /// occurred during parsing. This argument can currently be set
 /// to NULL, but at users own risk. Please provide a real structure.</param>
 /// <param name="status">A pointer to an ErrorCode to receive any errors</param>
 /// <returns>A pointer to a UCollator. It is not guaranteed that NULL be returned in case
 ///         of error - please use status argument to check for errors.</returns>
 public static RuleBasedCollator.SafeRuleBasedCollatorHandle ucol_openRules(
     [MarshalAs(UnmanagedType.LPWStr)] string rules,
     int rulesLength,
     NormalizationMode normalizationMode,
     CollationStrength strength,
     ref ParseError parseError,
     out ErrorCode status)
 {
     status = ErrorCode.NoErrors;
     if (CollatorMethods.ucol_openRules == null)
     {
         CollatorMethods.ucol_openRules = GetMethod <CollatorMethodsContainer.ucol_openRulesDelegate>(IcuI18NLibHandle, "ucol_openRules");
     }
     return(CollatorMethods.ucol_openRules(rules, rulesLength, normalizationMode, strength, ref parseError,
                                           out status));
 }
예제 #7
0
파일: Collation.cs 프로젝트: LJM74520/nice
        internal static int ToInt32(CollationStrength strength)
        {
            switch (strength)
            {
            case CollationStrength.Primary: return(1);

            case CollationStrength.Secondary: return(2);

            case CollationStrength.Tertiary: return(3);

            case CollationStrength.Quaternary: return(4);

            case CollationStrength.Identical: return(5);

            default: throw new ArgumentException($"Invalid strength: {strength}.", nameof(strength));
            }
        }
예제 #8
0
        private void ParseRelationStrings(CollationStrength strength, int i)
        {
            // Parse
            //     prefix | str / extension
            // where prefix and extension are optional.
            StringCharSequence prefix    = new StringCharSequence("");
            string             extension = "";

            i = ParseTailoringString(i, rawBuilder.Value);
            char next = (i < rules.Length) ? rules[i] : (char)0;

            if (next == 0x7c)
            {  // '|' separates the context prefix from the string.
                prefix = new StringCharSequence(rawBuilder.ToString());
                i      = ParseTailoringString(i + 1, rawBuilder.Value);
                next   = (i < rules.Length) ? rules[i] : (char)0;
            }
            // str = rawBuilder (do not modify rawBuilder any more in this function)
            if (next == 0x2f)
            {  // '/' separates the string from the extension.
                StringBuilder extBuilder = new StringBuilder();
                i         = ParseTailoringString(i + 1, extBuilder);
                extension = extBuilder.ToString();
            }
            if (prefix.Length != 0)
            {
                int prefix0 = prefix.Value.CodePointAt(0);
                int c       = rawBuilder.Value.CodePointAt(0);
                if (!nfc.HasBoundaryBefore(prefix0) || !nfc.HasBoundaryBefore(c))
                {
                    SetParseError("in 'prefix|str', prefix and str must each start with an NFC boundary");
                    return;
                }
            }
            try
            {
                sink.AddRelation(strength, prefix, rawBuilder, extension);
            }
            catch (Exception e)
            {
                SetParseError("adding relation failed", e);
                return;
            }
            ruleIndex = i;
        }
예제 #9
0
 public int HiraganaQuarternarySetting(CollationStrength collationStrength, string string1,
                                       string string2)
 {
     /* Compatibility with JIS x 4061 requires the introduction of an
      * additional level to distinguish Hiragana and Katakana characters.
      * If compatibility with that standard is required, then this attribute
      * should be set On, and the strength set to Quaternary. This will affect
      * sort key length and string comparison string comparison performance.
      *      Example:
      *      H=X, S=4 きゅう = キュウ < きゆう = キユウ
      *      H=O, S=4 きゅう < キュウ < きゆう < キユウ
      */
     using (var jaCollator = CreateJaCollator())
     {
         // In ICU54 the HiraganaQauternary special feature is deprecated in favor of supporting
         // quaternary sorting as a regular feature.
         jaCollator.Strength = collationStrength;
         return(jaCollator.Compare(string1, string2));
     }
 }
예제 #10
0
 public int StrengthSetting(CollationStrength collationStrength, string string1,
                            string string2)
 {
     /*The Strength attribute determines whether accents or case are taken
      * into account when collating or matching text. ( (In writing systems
      * without case or accents, it controls similarly important features).
      * The default strength setting usually does not need to be changed for
      * collating (sorting), but often needs to be changed when matching
      * (e.g. SELECT). The possible values include Default (D), Primary
      * (1), Secondary (2), Tertiary (3), Quaternary (4), and Identical (I).
      *
      * For example, people may choose to ignore accents or ignore accents and
      * case when searching for text.
      *
      * Almost all characters are distinguished by the first three levels, and
      * in most locales the default value is thus Tertiary. However, if
      * Alternate is set to be Shifted, then the Quaternary strength (4)
      * can be used to break ties among whitespace, punctuation, and symbols
      * that would otherwise be ignored. If very fine distinctions among
      * characters are required, then the Identical strength (I) can be
      * used (for example, Identical Strength distinguishes between the
      * Mathematical Bold Small A and the Mathematical Italic Small A. For
      * more examples, look at the cells with white backgrounds in the
      * collation charts). However, using levels higher than Tertiary -
      * the Identical strength - result in significantly longer sort keys,
      * and slower string comparison performance for equal strings.
      *      Example:
      *      S=1 role = Role = rôle
      *      S=2 role = Role < rôle
      *      S=3 role < Role < rôle
      *      A=S  S=4 ab < a c < a-c < ac*/
     using (var ucaCollator = new RuleBasedCollator(string.Empty, collationStrength))
     {
         if (collationStrength == CollationStrength.Quaternary)
         {
             ucaCollator.AlternateHandling = AlternateHandling.Shifted;
         }
         Assert.That(ucaCollator.Strength, Is.EqualTo(collationStrength));
         return(ucaCollator.Compare(string1, string2));
     }
 }
 public ICollator CreateCollator(CultureInfo locale, CollationStrength strength, CollationDecomposition decomposition)
 {
     return(new SystemCollator(locale));
 }
예제 #12
0
        public static int CompareUpToQuaternary(CollationIterator left, CollationIterator right,
                                                CollationSettings settings)
        {
            int  options = settings.Options;
            long variableTop;

            if ((options & CollationSettings.ALTERNATE_MASK) == 0)
            {
                variableTop = 0;
            }
            else
            {
                // +1 so that we can use "<" and primary ignorables test out early.
                variableTop = settings.VariableTop + 1;
            }
            bool anyVariable = false;

            // Fetch CEs, compare primaries, store secondary & tertiary weights.
            for (; ;)
            {
                // We fetch CEs until we get a non-ignorable primary or reach the end.
                long leftPrimary;
                do
                {
                    long ce = left.NextCE();
                    leftPrimary = ce.TripleShift(32);
                    if (leftPrimary < variableTop && leftPrimary > Collation.MERGE_SEPARATOR_PRIMARY)
                    {
                        // Variable CE, shift it to quaternary level.
                        // Ignore all following primary ignorables, and shift further variable CEs.
                        anyVariable = true;
                        do
                        {
                            // Store only the primary of the variable CE.
                            left.SetCurrentCE(ce & unchecked ((long)0xffffffff00000000L));
                            for (; ;)
                            {
                                ce          = left.NextCE();
                                leftPrimary = ce.TripleShift(32);
                                if (leftPrimary == 0)
                                {
                                    left.SetCurrentCE(0);
                                }
                                else
                                {
                                    break;
                                }
                            }
                        } while (leftPrimary < variableTop && leftPrimary > Collation.MERGE_SEPARATOR_PRIMARY);
                    }
                } while (leftPrimary == 0);

                long rightPrimary;
                do
                {
                    long ce = right.NextCE();
                    rightPrimary = ce.TripleShift(32);
                    if (rightPrimary < variableTop && rightPrimary > Collation.MERGE_SEPARATOR_PRIMARY)
                    {
                        // Variable CE, shift it to quaternary level.
                        // Ignore all following primary ignorables, and shift further variable CEs.
                        anyVariable = true;
                        do
                        {
                            // Store only the primary of the variable CE.
                            right.SetCurrentCE(ce & unchecked ((long)0xffffffff00000000L));
                            for (; ;)
                            {
                                ce           = right.NextCE();
                                rightPrimary = ce.TripleShift(32);
                                if (rightPrimary == 0)
                                {
                                    right.SetCurrentCE(0);
                                }
                                else
                                {
                                    break;
                                }
                            }
                        } while (rightPrimary < variableTop && rightPrimary > Collation.MERGE_SEPARATOR_PRIMARY);
                    }
                } while (rightPrimary == 0);

                if (leftPrimary != rightPrimary)
                {
                    // Return the primary difference, with script reordering.
                    if (settings.HasReordering)
                    {
                        leftPrimary  = settings.Reorder(leftPrimary);
                        rightPrimary = settings.Reorder(rightPrimary);
                    }
                    return((leftPrimary < rightPrimary) ? Collation.LESS : Collation.GREATER);
                }
                if (leftPrimary == Collation.NO_CE_PRIMARY)
                {
                    break;
                }
            }

            // Compare the buffered secondary & tertiary weights.
            // We might skip the secondary level but continue with the case level
            // which is turned on separately.
            if (CollationSettings.GetStrength(options) >= CollationStrength.Secondary)
            {
                if ((options & CollationSettings.BACKWARD_SECONDARY) == 0)
                {
                    int leftIndex2  = 0;
                    int rightIndex2 = 0;
                    for (; ;)
                    {
                        int leftSecondary;
                        do
                        {
                            leftSecondary = ((int)left.GetCE(leftIndex2++)).TripleShift(16);
                        } while (leftSecondary == 0);

                        int rightSecondary;
                        do
                        {
                            rightSecondary = ((int)right.GetCE(rightIndex2++)).TripleShift(16);
                        } while (rightSecondary == 0);

                        if (leftSecondary != rightSecondary)
                        {
                            return((leftSecondary < rightSecondary) ? Collation.LESS : Collation.GREATER);
                        }
                        if (leftSecondary == Collation.NO_CE_WEIGHT16)
                        {
                            break;
                        }
                    }
                }
                else
                {
                    // The backwards secondary level compares secondary weights backwards
                    // within segments separated by the merge separator (U+FFFE, weight 02).
                    int leftStart  = 0;
                    int rightStart = 0;
                    for (; ;)
                    {
                        // Find the merge separator or the NO_CE terminator.
                        long p;
                        int  leftLimit = leftStart;
                        while ((p = left.GetCE(leftLimit).TripleShift(32)) > Collation.MERGE_SEPARATOR_PRIMARY ||
                               p == 0)
                        {
                            ++leftLimit;
                        }
                        int rightLimit = rightStart;
                        while ((p = right.GetCE(rightLimit).TripleShift(32)) > Collation.MERGE_SEPARATOR_PRIMARY ||
                               p == 0)
                        {
                            ++rightLimit;
                        }

                        // Compare the segments.
                        int leftIndex3  = leftLimit;
                        int rightIndex3 = rightLimit;
                        for (; ;)
                        {
                            int leftSecondary = 0;
                            while (leftSecondary == 0 && leftIndex3 > leftStart)
                            {
                                leftSecondary = ((int)left.GetCE(--leftIndex3)).TripleShift(16);
                            }

                            int rightSecondary = 0;
                            while (rightSecondary == 0 && rightIndex3 > rightStart)
                            {
                                rightSecondary = ((int)right.GetCE(--rightIndex3)).TripleShift(16);
                            }

                            if (leftSecondary != rightSecondary)
                            {
                                return((leftSecondary < rightSecondary) ? Collation.LESS : Collation.GREATER);
                            }
                            if (leftSecondary == 0)
                            {
                                break;
                            }
                        }

                        // Did we reach the end of either string?
                        // Both strings have the same number of merge separators,
                        // or else there would have been a primary-level difference.
                        Debug.Assert(left.GetCE(leftLimit) == right.GetCE(rightLimit));
                        if (p == Collation.NO_CE_PRIMARY)
                        {
                            break;
                        }
                        // Skip both merge separators and continue.
                        leftStart  = leftLimit + 1;
                        rightStart = rightLimit + 1;
                    }
                }
            }

            if ((options & CollationSettings.CASE_LEVEL) != 0)
            {
                CollationStrength strength = CollationSettings.GetStrength(options);
                int leftIndex4             = 0;
                int rightIndex4            = 0;
                for (; ;)
                {
                    int leftCase, leftLower32, rightCase;
                    if (strength == CollationStrength.Primary)
                    {
                        // Primary+caseLevel: Ignore case level weights of primary ignorables.
                        // Otherwise we would get a-umlaut > a
                        // which is not desirable for accent-insensitive sorting.
                        // Check for (lower 32 bits) == 0 as well because variable CEs are stored
                        // with only primary weights.
                        long ce;
                        do
                        {
                            ce       = left.GetCE(leftIndex4++);
                            leftCase = (int)ce;
                        } while ((ce.TripleShift(32)) == 0 || leftCase == 0);
                        leftLower32 = leftCase;
                        leftCase   &= 0xc000;

                        do
                        {
                            ce        = right.GetCE(rightIndex4++);
                            rightCase = (int)ce;
                        } while ((ce.TripleShift(32)) == 0 || rightCase == 0);
                        rightCase &= 0xc000;
                    }
                    else
                    {
                        // Secondary+caseLevel: By analogy with the above,
                        // ignore case level weights of secondary ignorables.
                        //
                        // Note: A tertiary CE has uppercase case bits (0.0.ut)
                        // to keep tertiary+caseFirst well-formed.
                        //
                        // Tertiary+caseLevel: Also ignore case level weights of secondary ignorables.
                        // Otherwise a tertiary CE's uppercase would be no greater than
                        // a primary/secondary CE's uppercase.
                        // (See UCA well-formedness condition 2.)
                        // We could construct a special case weight higher than uppercase,
                        // but it's simpler to always ignore case weights of secondary ignorables,
                        // turning 0.0.ut into 0.0.0.t.
                        // (See LDML Collation, Case Parameters.)
                        do
                        {
                            leftCase = (int)left.GetCE(leftIndex4++);
                        } while ((leftCase & 0xffff0000) == 0);
                        leftLower32 = leftCase;
                        leftCase   &= 0xc000;

                        do
                        {
                            rightCase = (int)right.GetCE(rightIndex4++);
                        } while ((rightCase & 0xffff0000) == 0);
                        rightCase &= 0xc000;
                    }

                    // No need to handle NO_CE and MERGE_SEPARATOR specially:
                    // There is one case weight for each previous-level weight,
                    // so level length differences were handled there.
                    if (leftCase != rightCase)
                    {
                        if ((options & CollationSettings.UPPER_FIRST) == 0)
                        {
                            return((leftCase < rightCase) ? Collation.LESS : Collation.GREATER);
                        }
                        else
                        {
                            return((leftCase < rightCase) ? Collation.GREATER : Collation.LESS);
                        }
                    }
                    if ((leftLower32.TripleShift(16)) == Collation.NO_CE_WEIGHT16)
                    {
                        break;
                    }
                }
            }
            if (CollationSettings.GetStrength(options) <= CollationStrength.Secondary)
            {
                return(Collation.EQUAL);
            }

            int tertiaryMask = CollationSettings.GetTertiaryMask(options);

            int leftIndex       = 0;
            int rightIndex      = 0;
            int anyQuaternaries = 0;

            for (; ;)
            {
                int leftLower32, leftTertiary;
                do
                {
                    leftLower32      = (int)left.GetCE(leftIndex++);
                    anyQuaternaries |= leftLower32;
                    Debug.Assert((leftLower32 & Collation.ONLY_TERTIARY_MASK) != 0 || (leftLower32 & 0xc0c0) == 0);
                    leftTertiary = leftLower32 & tertiaryMask;
                } while (leftTertiary == 0);

                int rightLower32, rightTertiary;
                do
                {
                    rightLower32     = (int)right.GetCE(rightIndex++);
                    anyQuaternaries |= rightLower32;
                    Debug.Assert((rightLower32 & Collation.ONLY_TERTIARY_MASK) != 0 || (rightLower32 & 0xc0c0) == 0);
                    rightTertiary = rightLower32 & tertiaryMask;
                } while (rightTertiary == 0);

                if (leftTertiary != rightTertiary)
                {
                    if (CollationSettings.SortsTertiaryUpperCaseFirst(options))
                    {
                        // Pass through NO_CE and keep real tertiary weights larger than that.
                        // Do not change the artificial uppercase weight of a tertiary CE (0.0.ut),
                        // to keep tertiary CEs well-formed.
                        // Their case+tertiary weights must be greater than those of
                        // primary and secondary CEs.
                        if (leftTertiary > Collation.NO_CE_WEIGHT16)
                        {
                            if ((leftLower32 & 0xffff0000) != 0)
                            {
                                leftTertiary ^= 0xc000;
                            }
                            else
                            {
                                leftTertiary += 0x4000;
                            }
                        }
                        if (rightTertiary > Collation.NO_CE_WEIGHT16)
                        {
                            if ((rightLower32 & 0xffff0000) != 0)
                            {
                                rightTertiary ^= 0xc000;
                            }
                            else
                            {
                                rightTertiary += 0x4000;
                            }
                        }
                    }
                    return((leftTertiary < rightTertiary) ? Collation.LESS : Collation.GREATER);
                }
                if (leftTertiary == Collation.NO_CE_WEIGHT16)
                {
                    break;
                }
            }
            if (CollationSettings.GetStrength(options) <= CollationStrength.Tertiary)
            {
                return(Collation.EQUAL);
            }

            if (!anyVariable && (anyQuaternaries & 0xc0) == 0)
            {
                // If there are no "variable" CEs and no non-zero quaternary weights,
                // then there are no quaternary differences.
                return(Collation.EQUAL);
            }

            leftIndex  = 0;
            rightIndex = 0;
            for (; ;)
            {
                long leftQuaternary;
                do
                {
                    long ce = left.GetCE(leftIndex++);
                    leftQuaternary = ce & 0xffff;
                    if (leftQuaternary <= Collation.NO_CE_WEIGHT16)
                    {
                        // Variable primary or completely ignorable or NO_CE.
                        leftQuaternary = ce.TripleShift(32);
                    }
                    else
                    {
                        // Regular CE, not tertiary ignorable.
                        // Preserve the quaternary weight in bits 7..6.
                        leftQuaternary |= 0xffffff3fL;
                    }
                } while (leftQuaternary == 0);

                long rightQuaternary;
                do
                {
                    long ce = right.GetCE(rightIndex++);
                    rightQuaternary = ce & 0xffff;
                    if (rightQuaternary <= Collation.NO_CE_WEIGHT16)
                    {
                        // Variable primary or completely ignorable or NO_CE.
                        rightQuaternary = ce.TripleShift(32);
                    }
                    else
                    {
                        // Regular CE, not tertiary ignorable.
                        // Preserve the quaternary weight in bits 7..6.
                        rightQuaternary |= 0xffffff3fL;
                    }
                } while (rightQuaternary == 0);

                if (leftQuaternary != rightQuaternary)
                {
                    // Return the difference, with script reordering.
                    if (settings.HasReordering)
                    {
                        leftQuaternary  = settings.Reorder(leftQuaternary);
                        rightQuaternary = settings.Reorder(rightQuaternary);
                    }
                    return((leftQuaternary < rightQuaternary) ? Collation.LESS : Collation.GREATER);
                }
                if (leftQuaternary == Collation.NO_CE_PRIMARY)
                {
                    break;
                }
            }
            return(Collation.EQUAL);
        }
예제 #13
0
        /// <summary>
        /// Produces a bound for the sort order of a given collation key and a
        /// strength level. This API does not attempt to find a bound for the
        /// <see cref="CollationKey"/> string representation, hence null will be returned in its
        /// place.
        /// </summary>
        /// <remarks>
        /// Resulting bounds can be used to produce a range of strings that are
        /// between upper and lower bounds. For example, if bounds are produced
        /// for a sortkey of string "smith", strings between upper and lower
        /// bounds with primary strength would include "Smith", "SMITH", "sMiTh".
        /// <para/>
        /// There are two upper bounds that can be produced. If <see cref="CollationKeyBoundMode.Upper"/>
        /// is produced, strings matched would be as above. However, if a bound
        /// is produced using <see cref="CollationKeyBoundMode.UpperLong"/> is used, the above example will
        /// also match "Smithsonian" and similar.
        /// <para/>
        /// For more on usage, see example in test procedure
        /// <a href="http://source.icu-project.org/repos/icu/icu4j/trunk/src/com/ibm/icu/dev/test/collator/CollationAPITest.java">
        /// src/com/ibm/icu/dev/test/collator/CollationAPITest/TestBounds.
        /// </a>
        /// <para/>
        /// Collation keys produced may be compared using the <see cref="CollationKey.CompareTo(CollationKey)"/> API.
        /// </remarks>
        /// <param name="boundType">
        /// Mode of bound required. It can be <see cref="CollationKeyBoundMode.Lower"/>, which
        /// produces a lower inclusive bound, <see cref="CollationKeyBoundMode.Upper"/>, that
        /// produces upper bound that matches strings of the same
        /// length or <see cref="CollationKeyBoundMode.UpperLong"/> that matches strings that
        /// have the same starting substring as the source string.
        /// </param>
        /// <param name="noOfLevels">
        /// Strength levels required in the resulting bound
        /// (for most uses, the recommended value is <see cref="CollationStrength.Primary"/>). This
        /// strength should be less than the maximum strength of
        /// this <see cref="CollationKey"/>.
        /// See users guide for explanation on the strength levels a
        /// collation key can have.
        /// </param>
        /// <returns>
        /// The result bounded <see cref="CollationKey"/> with a valid sort order but
        /// a null string representation.
        /// </returns>
        /// <exception cref="ArgumentException">
        /// thrown when the strength level
        /// requested is higher than or equal to the strength in this
        /// <see cref="CollationKey"/>.
        /// In the case of an Exception, information
        /// about the maximum strength to use will be returned in the
        /// Exception. The user can then call <see cref="GetBound(CollationKeyBoundMode, CollationStrength)"/> again with the
        /// appropriate strength.
        /// </exception>
        /// <seealso cref="CollationKey"/>
        /// <seealso cref="CollationKeyBoundMode"/>
        /// <seealso cref="CollationStrength.Primary"/>
        /// <seealso cref="CollationStrength.Secondary"/>
        /// <seealso cref="CollationStrength.Tertiary"/>
        /// <seealso cref="CollationStrength.Quaternary"/>
        /// <seealso cref="CollationStrength.Identical"/>
        /// <stable>ICU 2.6</stable>
        // ICU4N TODO: Update documentation to point to .NET CollationAPITest class
        public CollationKey GetBound(CollationKeyBoundMode boundType, CollationStrength noOfLevels)
        {
            // Scan the string until we skip enough of the key OR reach the end of
            // the key
            int offset = 0;
            CollationStrength keystrength = CollationStrength.Primary;

            if (noOfLevels > CollationStrength.Primary)
            {
                while (offset < m_key_.Length && m_key_[offset] != 0)
                {
                    if (m_key_[offset++]
                        == Collation.LevelSeparatorByte)
                    {
                        keystrength++;
                        noOfLevels--;
                        if (noOfLevels == CollationStrength.Primary ||
                            offset == m_key_.Length || m_key_[offset] == 0)
                        {
                            offset--;
                            break;
                        }
                    }
                }
            }

            if (noOfLevels > 0)
            {
                throw new ArgumentException(
                          "Source collation key has only "
                          + keystrength
                          + " strength level. Call GetBound() again "
                          + " with noOfLevels < " + keystrength);
            }

            // READ ME: this code assumes that the values for BoundMode variables
            // will not change. They are set so that the enum value corresponds to
            // the number of extra bytes each bound type needs.
            byte[] resultkey = new byte[offset + (int)boundType + 1];
            System.Array.Copy(m_key_, 0, resultkey, 0, offset);
            switch (boundType)
            {
            case CollationKeyBoundMode.Lower:     // = 0
                // Lower bound just gets terminated. No extra bytes
                break;

            case CollationKeyBoundMode.Upper:     // = 1
                // Upper bound needs one extra byte
                resultkey[offset++] = 2;
                break;

            case CollationKeyBoundMode.UpperLong:     // = 2
                // Upper long bound needs two extra bytes
                resultkey[offset++] = (byte)0xFF;
                resultkey[offset++] = (byte)0xFF;
                break;

            default:
                throw new ArgumentException(
                          "Illegal boundType argument");
            }
            resultkey[offset] = 0;
            return(new CollationKey(null, resultkey, offset));
        }
예제 #14
0
        public void ToInt32_with_maxVariable_should_return_expected_result(CollationStrength strength, int expectedResult)
        {
            var result = Collation.ToInt32(strength);

            result.Should().Be(expectedResult);
        }
예제 #15
0
        public void ToCollationStrength_should_return_expected_result(int value, CollationStrength expectedResult)
        {
            var result = Collation.ToCollationStrength(value);

            result.Should().Be(expectedResult);
        }
        public void ToInt32_with_maxVariable_should_return_expected_result(CollationStrength strength, int expectedResult)
        {
            var result = Collation.ToInt32(strength);

            result.Should().Be(expectedResult);
        }
        public void ToCollationStrength_should_return_expected_result(int value, CollationStrength expectedResult)
        {
            var result = Collation.ToCollationStrength(value);

            result.Should().Be(expectedResult);
        }
예제 #18
0
 /// <summary>
 /// RuleBasedCollator constructor.
 /// This takes the table rules and builds a collation table out of them.
 /// </summary>
 /// <param name="rules">the collation rules to build the collation table from</param>
 /// <param name="collationStrength">the collation strength to use</param>
 public RuleBasedCollator(string rules, CollationStrength collationStrength)
     : this(rules, NormalizationMode.Default, collationStrength)
 {
 }
예제 #19
0
 public ICollator CreateCollator(CultureInfo locale, CollationStrength strength, CollationDecomposition decomposition)
 {
     return new SystemCollator(locale);
 }
예제 #20
0
 /**
  * Adds a relation with strength and prefix | str / extension.
  */
 internal abstract void AddRelation(CollationStrength strength, ICharSequence prefix,
                                    ICharSequence str, string extension); // ICU4N specific - changed extension from ICharSequence to string
예제 #21
0
 /**
  * Adds a reset.
  * strength=UCOL_IDENTICAL for &str.
  * strength=UCOL_PRIMARY/UCOL_SECONDARY/UCOL_TERTIARY for &[before n]str where n=1/2/3.
  */
 internal abstract void AddReset(CollationStrength strength, ICharSequence str);
 public RuleBasedCollator(string rules, CollationStrength collationStrength = CollationStrength.Default)
     : this(rules, NormalizationMode.Default, collationStrength)
 {
 }
예제 #23
0
 public static extern RuleBasedCollator.Handle ucol_openRules([MarshalAs(UnmanagedType.LPWStr)] string rules,
                                                              int rulesLength,
                                                              NormalizationMode normalizationMode,
                                                              CollationStrength strength,
                                                              ref ParseError parseError,
                                                              out ErrorCode status);
예제 #24
0
        private void ParseStarredCharacters(CollationStrength strength, int i)
        {
            StringCharSequence empty = new StringCharSequence("");

            i = ParseString(SkipWhiteSpace(i), rawBuilder.Value);
            if (rawBuilder.Length == 0)
            {
                SetParseError("missing starred-relation string");
                return;
            }
            int prev = -1;
            int j    = 0;

            for (; ;)
            {
                while (j < rawBuilder.Length)
                {
                    int cp = rawBuilder.Value.CodePointAt(j);
                    if (!nfd.IsInert(cp))
                    {
                        SetParseError("starred-relation string is not all NFD-inert");
                        return;
                    }
                    try
                    {
                        sink.AddRelation(strength, empty, UTF16.ValueOf(cp).AsCharSequence(), empty.Value);
                    }
                    catch (Exception e)
                    {
                        SetParseError("adding relation failed", e);
                        return;
                    }
                    j   += Character.CharCount(cp);
                    prev = cp;
                }
                if (i >= rules.Length || rules[i] != 0x2d)
                {  // '-'
                    break;
                }
                if (prev < 0)
                {
                    SetParseError("range without start in starred-relation string");
                    return;
                }
                i = ParseString(i + 1, rawBuilder.Value);
                if (rawBuilder.Length == 0)
                {
                    SetParseError("range without end in starred-relation string");
                    return;
                }
                int c = rawBuilder.Value.CodePointAt(0);
                if (c < prev)
                {
                    SetParseError("range start greater than end in starred-relation string");
                    return;
                }
                // range prev-c
                while (++prev <= c)
                {
                    if (!nfd.IsInert(prev))
                    {
                        SetParseError("starred-relation string range is not all NFD-inert");
                        return;
                    }
                    if (IsSurrogate(prev))
                    {
                        SetParseError("starred-relation string range contains a surrogate");
                        return;
                    }
                    if (0xfffd <= prev && prev <= 0xffff)
                    {
                        SetParseError("starred-relation string range contains U+FFFD, U+FFFE or U+FFFF");
                        return;
                    }
                    try
                    {
                        sink.AddRelation(strength, empty, UTF16.ValueOf(prev).AsCharSequence(), empty.Value);
                    }
                    catch (Exception e)
                    {
                        SetParseError("adding relation failed", e);
                        return;
                    }
                }
                prev = -1;
                j    = Character.CharCount(c);
            }
            ruleIndex = SkipWhiteSpace(i);
        }