Ejemplo n.º 1
0
        /// <summary>
        /// Transforms the string into a series of characters that can be compared
        /// with CollationKey.compareTo. This overrides java.text.Collator.getCollationKey.
        /// It can be overriden in a subclass.
        /// </summary>
        public override CollationKey GetCollationKey(String source)
        {
            lock (this)
            {
                //
                // The basic algorithm here is to find all of the collation elements for each
                // character in the source string, convert them to a char representation,
                // and put them into the collation key.  But it's trickier than that.
                // Each collation element in a string has three components: primary (A vs B),
                // secondary (A vs A-acute), and tertiary (A' vs a); and a primary difference
                // at the end of a string takes precedence over a secondary or tertiary
                // difference earlier in the string.
                //
                // To account for this, we put all of the primary orders at the beginning of the
                // string, followed by the secondary and tertiary orders, separated by nulls.
                //
                // Here's a hypothetical example, with the collation element represented as
                // a three-digit number, one digit for primary, one for secondary, etc.
                //
                // String:              A     a     B   \u00e9 <--(e-acute)
                // Collation Elements: 101   100   201  510
                //
                // Collation Key:      1125<null>0001<null>1010
                //
                // To make things even trickier, secondary differences (accent marks) are compared
                // starting at the *end* of the string in languages with French secondary ordering.
                // But when comparing the accent marks on a single base character, they are compared
                // from the beginning.  To handle this, we reverse all of the accents that belong
                // to each base character, then we reverse the entire string of secondary orderings
                // at the end.  Taking the same example above, a French collator might return
                // this instead:
                //
                // Collation Key:      1125<null>1000<null>1010
                //
                if (source == null)
                {
                    return(null);
                }

                if (PrimResult == null)
                {
                    PrimResult = new StringBuffer();
                    SecResult  = new StringBuffer();
                    TerResult  = new StringBuffer();
                }
                else
                {
                    PrimResult.Length = 0;
                    SecResult.Length  = 0;
                    TerResult.Length  = 0;
                }
                int  order        = 0;
                bool compareSec   = (Strength >= Collator.SECONDARY);
                bool compareTer   = (Strength >= Collator.TERTIARY);
                int  secOrder     = CollationElementIterator.NULLORDER;
                int  terOrder     = CollationElementIterator.NULLORDER;
                int  preSecIgnore = 0;

                if (SourceCursor == null)
                {
                    SourceCursor = GetCollationElementIterator(source);
                }
                else
                {
                    SourceCursor.Text = source;
                }

                // walk through each character
                while ((order = SourceCursor.Next()) != CollationElementIterator.NULLORDER)
                {
                    secOrder = CollationElementIterator.SecondaryOrder(order);
                    terOrder = CollationElementIterator.TertiaryOrder(order);
                    if (!CollationElementIterator.IsIgnorable(order))
                    {
                        PrimResult.Append((char)(CollationElementIterator.PrimaryOrder(order) + COLLATIONKEYOFFSET));

                        if (compareSec)
                        {
                            //
                            // accumulate all of the ignorable/secondary characters attached
                            // to a given base character
                            //
                            if (Tables_Renamed.FrenchSec && preSecIgnore < SecResult.Length())
                            {
                                //
                                // We're doing reversed secondary ordering and we've hit a base
                                // (non-ignorable) character.  Reverse any secondary orderings
                                // that applied to the last base character.  (see block comment above.)
                                //
                                RBCollationTables.Reverse(SecResult, preSecIgnore, SecResult.Length());
                            }
                            // Remember where we are in the secondary orderings - this is how far
                            // back to go if we need to reverse them later.
                            SecResult.Append((char)(secOrder + COLLATIONKEYOFFSET));
                            preSecIgnore = SecResult.Length();
                        }
                        if (compareTer)
                        {
                            TerResult.Append((char)(terOrder + COLLATIONKEYOFFSET));
                        }
                    }
                    else
                    {
                        if (compareSec && secOrder != 0)
                        {
                            SecResult.Append((char)(secOrder + Tables_Renamed.MaxSecOrder + COLLATIONKEYOFFSET));
                        }
                        if (compareTer && terOrder != 0)
                        {
                            TerResult.Append((char)(terOrder + Tables_Renamed.MaxTerOrder + COLLATIONKEYOFFSET));
                        }
                    }
                }
                if (Tables_Renamed.FrenchSec)
                {
                    if (preSecIgnore < SecResult.Length())
                    {
                        // If we've accumulated any secondary characters after the last base character,
                        // reverse them.
                        RBCollationTables.Reverse(SecResult, preSecIgnore, SecResult.Length());
                    }
                    // And now reverse the entire secResult to get French secondary ordering.
                    RBCollationTables.Reverse(SecResult, 0, SecResult.Length());
                }
                PrimResult.Append((char)0);
                SecResult.Append((char)0);
                SecResult.Append(TerResult.ToString());
                PrimResult.Append(SecResult.ToString());

                if (Strength == IDENTICAL)
                {
                    PrimResult.Append((char)0);
                    int mode = Decomposition;
                    if (mode == CANONICAL_DECOMPOSITION)
                    {
                        PrimResult.Append(Normalizer.Normalize(source, Normalizer.Form.NFD));
                    }
                    else if (mode == FULL_DECOMPOSITION)
                    {
                        PrimResult.Append(Normalizer.Normalize(source, Normalizer.Form.NFKD));
                    }
                    else
                    {
                        PrimResult.Append(source);
                    }
                }
                return(new RuleBasedCollationKey(source, PrimResult.ToString()));
            }
        }
Ejemplo n.º 2
0
        /// <summary>
        /// RuleBasedCollator constructor.  This takes the table rules and builds
        /// a collation table out of them.  Please see RuleBasedCollator class
        /// description for more details on the collation rule syntax. </summary>
        /// <seealso cref= java.util.Locale </seealso>
        /// <param name="rules"> the collation rules to build the collation table from. </param>
        /// <param name="decomp"> the decomposition strength used to build the
        /// collation table and to perform comparisons. </param>
        /// <exception cref="ParseException"> A format exception
        /// will be thrown if the build process of the rules fails. For
        /// example, build rule "a < ? < d" will cause the constructor to
        /// throw the ParseException because the '?' is not quoted. </exception>
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: RuleBasedCollator(String rules, int decomp) throws ParseException
        internal RuleBasedCollator(String rules, int decomp)
        {
            Strength       = Collator.TERTIARY;
            Decomposition  = decomp;
            Tables_Renamed = new RBCollationTables(rules, decomp);
        }