Beispiel #1
0
        /// <summary>
        /// WARNING this is used for searching in a Vector.
        /// Because Vector.indexOf doesn't take a comparator,
        /// this method is ill-defined and ignores strength.
        /// </summary>
        public override bool Equals(Object obj)
        {
            if (obj == null)
            {
                return(false);
            }
            PatternEntry other  = (PatternEntry)obj;
            bool         result = Chars_Renamed.Equals(other.Chars_Renamed);

            return(result);
        }
Beispiel #2
0
 private PatternEntry FindLastWithNoExtension(int i)
 {
     for (--i; i >= 0; --i)
     {
         PatternEntry entry = Patterns[i];
         if (entry.Extension_Renamed.length() == 0)
         {
             return(entry);
         }
     }
     return(null);
 }
Beispiel #3
0
        internal static void AppendQuoted(String chars, StringBuffer toAddTo)
        {
            bool inQuote = false;
            char ch      = chars.CharAt(0);

            if (Character.IsSpaceChar(ch))
            {
                inQuote = true;
                toAddTo.Append('\'');
            }
            else
            {
                if (PatternEntry.IsSpecialChar(ch))
                {
                    inQuote = true;
                    toAddTo.Append('\'');
                }
                else
                {
                    switch (ch)
                    {
                    case 0x0010:
                    case '\f':
                    case '\r':
                    case '\t':
                    case '\n':
                    case '@':
                        inQuote = true;
                        toAddTo.Append('\'');
                        break;

                    case '\'':
                        inQuote = true;
                        toAddTo.Append('\'');
                        break;

                    default:
                        if (inQuote)
                        {
                            inQuote = false;
                            toAddTo.Append('\'');
                        }
                        break;
                    }
                }
            }
            toAddTo.Append(chars);
            if (inQuote)
            {
                toAddTo.Append('\'');
            }
        }
Beispiel #4
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: private final int findLastEntry(PatternEntry entry, StringBuffer excessChars) throws ParseException
        private int FindLastEntry(PatternEntry entry, StringBuffer excessChars)
        {
            if (entry == null)
            {
                return(0);
            }

            if (entry.Strength_Renamed != PatternEntry.RESET)
            {
                // Search backwards for string that contains this one;
                // most likely entry is last one

                int oldIndex = -1;
                if ((entry.Chars_Renamed.length() == 1))
                {
                    int index = entry.Chars_Renamed.charAt(0) >> BYTEPOWER;
                    if ((StatusArray[index] & (BITARRAYMASK << (entry.Chars_Renamed.charAt(0) & BYTEMASK))) != 0)
                    {
                        oldIndex = Patterns.LastIndexOf(entry);
                    }
                }
                else
                {
                    oldIndex = Patterns.LastIndexOf(entry);
                }
                if ((oldIndex == -1))
                {
                    throw new ParseException("couldn't find last entry: " + entry, oldIndex);
                }
                return(oldIndex + 1);
            }
            else
            {
                int i;
                for (i = Patterns.Count - 1; i >= 0; --i)
                {
                    PatternEntry e = Patterns[i];
                    if (e.Chars_Renamed.regionMatches(0, entry.Chars_Renamed, 0, e.Chars_Renamed.length()))
                    {
                        excessChars.Append(StringHelperClass.SubstringSpecial(entry.Chars_Renamed, e.Chars_Renamed.length(), entry.Chars_Renamed.length()));
                        break;
                    }
                }
                if (i == -1)
                {
                    throw new ParseException("couldn't find: " + entry, i);
                }
                return(i + 1);
            }
        }
Beispiel #5
0
        /// <summary>
        /// emits the pattern for collation builder. </summary>
        /// <param name="withWhiteSpace"> puts spacing around the entries, and \n
        /// before & and < </param>
        /// <returns> emits the string in the format understable to the collation
        /// builder. </returns>
        public String EmitPattern(bool withWhiteSpace)
        {
            StringBuffer result = new StringBuffer();

            for (int i = 0; i < Patterns.Count; ++i)
            {
                PatternEntry entry = Patterns[i];
                if (entry != null)
                {
                    entry.AddToBuffer(result, true, withWhiteSpace, null);
                }
            }
            return(result.ToString());
        }
Beispiel #6
0
        /// <summary>
        /// adds a pattern to the current one. </summary>
        /// <param name="pattern"> the new pattern to be added </param>
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void addPattern(String pattern) throws ParseException
        public void AddPattern(String pattern)
        {
            if (pattern == null)
            {
                return;
            }

            PatternEntry.Parser parser = new PatternEntry.Parser(pattern);

            PatternEntry entry = parser.Next();

            while (entry != null)
            {
                FixEntry(entry);
                entry = parser.Next();
            }
        }
Beispiel #7
0
        /// <summary>
        /// recovers current pattern. </summary>
        /// <param name="withWhiteSpace"> puts spacing around the entries, and \n
        /// before & and < </param>
        public String GetPattern(bool withWhiteSpace)
        {
            StringBuffer        result  = new StringBuffer();
            PatternEntry        tmp     = null;
            List <PatternEntry> extList = null;
            int i;

            for (i = 0; i < Patterns.Count; ++i)
            {
                PatternEntry entry = Patterns[i];
                if (entry.Extension_Renamed.length() != 0)
                {
                    if (extList == null)
                    {
                        extList = new List <>();
                    }
                    extList.Add(entry);
                }
                else
                {
                    if (extList != null)
                    {
                        PatternEntry last = FindLastWithNoExtension(i - 1);
                        for (int j = extList.Count - 1; j >= 0; j--)
                        {
                            tmp = extList[j];
                            tmp.AddToBuffer(result, false, withWhiteSpace, last);
                        }
                        extList = null;
                    }
                    entry.AddToBuffer(result, false, withWhiteSpace, null);
                }
            }
            if (extList != null)
            {
                PatternEntry last = FindLastWithNoExtension(i - 1);
                for (int j = extList.Count - 1; j >= 0; j--)
                {
                    tmp = extList[j];
                    tmp.AddToBuffer(result, false, withWhiteSpace, last);
                }
                extList = null;
            }
            return(result.ToString());
        }
Beispiel #8
0
        /*
         * If the strength is RESET, then just change the lastEntry to
         * be the current. (If the current is not in patterns, signal an error).
         * If not, then remove the current entry, and add it after lastEntry
         * (which is usually at the end).
         */
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: private final void fixEntry(PatternEntry newEntry) throws ParseException
        private void FixEntry(PatternEntry newEntry)
        {
            // check to see whether the new entry has the same characters as the previous
            // entry did (this can happen when a pattern declaring a difference between two
            // strings that are canonically equivalent is normalized).  If so, and the strength
            // is anything other than IDENTICAL or RESET, throw an exception (you can't
            // declare a string to be unequal to itself).       --rtg 5/24/99
            if (LastEntry != null && newEntry.Chars_Renamed.Equals(LastEntry.Chars_Renamed) && newEntry.Extension_Renamed.Equals(LastEntry.Extension_Renamed))
            {
                if (newEntry.Strength_Renamed != Collator.IDENTICAL && newEntry.Strength_Renamed != PatternEntry.RESET)
                {
                    throw new ParseException("The entries " + LastEntry + " and " + newEntry + " are adjacent in the rules, but have conflicting " + "strengths: A character can't be unequal to itself.", -1);
                }
                else
                {
                    // otherwise, just skip this entry and behave as though you never saw it
                    return;
                }
            }

            bool changeLastEntry = true;

            if (newEntry.Strength_Renamed != PatternEntry.RESET)
            {
                int oldIndex = -1;

                if ((newEntry.Chars_Renamed.length() == 1))
                {
                    char  c           = newEntry.Chars_Renamed.charAt(0);
                    int   statusIndex = c >> BYTEPOWER;
                    sbyte bitClump    = StatusArray[statusIndex];
                    sbyte setBit      = (sbyte)(BITARRAYMASK << (c & BYTEMASK));

                    if (bitClump != 0 && (bitClump & setBit) != 0)
                    {
                        oldIndex = Patterns.LastIndexOf(newEntry);
                    }
                    else
                    {
                        // We're going to add an element that starts with this
                        // character, so go ahead and set its bit.
                        StatusArray[statusIndex] = (sbyte)(bitClump | setBit);
                    }
                }
                else
                {
                    oldIndex = Patterns.LastIndexOf(newEntry);
                }
                if (oldIndex != -1)
                {
                    Patterns.RemoveAt(oldIndex);
                }

                Excess.Length = 0;
                int lastIndex = FindLastEntry(LastEntry, Excess);

                if (Excess.Length() != 0)
                {
                    newEntry.Extension_Renamed = Excess + newEntry.Extension_Renamed;
                    if (lastIndex != Patterns.Count)
                    {
                        LastEntry       = SaveEntry;
                        changeLastEntry = false;
                    }
                }
                if (lastIndex == Patterns.Count)
                {
                    Patterns.Add(newEntry);
                    SaveEntry = newEntry;
                }
                else
                {
                    Patterns.Insert(lastIndex, newEntry);
                }
            }
            if (changeLastEntry)
            {
                LastEntry = newEntry;
            }
        }
Beispiel #9
0
//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public PatternEntry next() throws ParseException
            public virtual PatternEntry Next()
            {
                int newStrength = UNSET;

                NewChars.Length     = 0;
                NewExtension.Length = 0;

                bool inChars = true;
                bool inQuote = false;

                while (i < Pattern.Length())
                {
                    char ch = Pattern.CharAt(i);
                    if (inQuote)
                    {
                        if (ch == '\'')
                        {
                            inQuote = false;
                        }
                        else
                        {
                            if (NewChars.Length() == 0)
                            {
                                NewChars.Append(ch);
                            }
                            else if (inChars)
                            {
                                NewChars.Append(ch);
                            }
                            else
                            {
                                NewExtension.Append(ch);
                            }
                        }
                    }
                    else
                    {
                        switch (ch)
                        {
                        case '=':
                            if (newStrength != UNSET)
                            {
                                goto mainLoopBreak;
                            }
                            newStrength = Collator.IDENTICAL;
                            break;

                        case ',':
                            if (newStrength != UNSET)
                            {
                                goto mainLoopBreak;
                            }
                            newStrength = Collator.TERTIARY;
                            break;

                        case ';':
                            if (newStrength != UNSET)
                            {
                                goto mainLoopBreak;
                            }
                            newStrength = Collator.SECONDARY;
                            break;

                        case '<':
                            if (newStrength != UNSET)
                            {
                                goto mainLoopBreak;
                            }
                            newStrength = Collator.PRIMARY;
                            break;

                        case '&':
                            if (newStrength != UNSET)
                            {
                                goto mainLoopBreak;
                            }
                            newStrength = RESET;
                            break;

                        case '\t':
                        case '\n':
                        case '\f':
                        case '\r':
                        case ' ':                 // skip whitespace TODO use Character
                            break;

                        case '/':
                            inChars = false;
                            break;

                        case '\'':
                            inQuote = true;
                            ch      = Pattern.CharAt(++i);
                            if (NewChars.Length() == 0)
                            {
                                NewChars.Append(ch);
                            }
                            else if (inChars)
                            {
                                NewChars.Append(ch);
                            }
                            else
                            {
                                NewExtension.Append(ch);
                            }
                            break;

                        default:
                            if (newStrength == UNSET)
                            {
                                throw new ParseException("missing char (=,;<&) : " + Pattern.Substring(i, (10 < Pattern.Length()) ? i + 10 : Pattern.Length()), i);
                            }
                            if (PatternEntry.IsSpecialChar(ch) && (inQuote == false))
                            {
                                throw new ParseException("Unquoted punctuation character : " + Convert.ToString(ch, 16), i);
                            }
                            if (inChars)
                            {
                                NewChars.Append(ch);
                            }
                            else
                            {
                                NewExtension.Append(ch);
                            }
                            break;
                        }
                    }
                    i++;
                    mainLoopContinue :;
                }
                mainLoopBreak :
                if (newStrength == UNSET)
                {
                    return(null);
                }
                if (NewChars.Length() == 0)
                {
                    throw new ParseException("missing chars (=,;<&): " + Pattern.Substring(i, (10 < Pattern.Length()) ? i + 10 : Pattern.Length()), i);
                }

                return(new PatternEntry(newStrength, NewChars, NewExtension));
            }
Beispiel #10
0
        // ===== privates =====

        internal virtual void AddToBuffer(StringBuffer toAddTo, bool showExtension, bool showWhiteSpace, PatternEntry lastEntry)
        {
            if (showWhiteSpace && toAddTo.Length() > 0)
            {
                if (Strength_Renamed == Collator.PRIMARY || lastEntry != null)
                {
                    toAddTo.Append('\n');
                }
                else
                {
                    toAddTo.Append(' ');
                }
            }
            if (lastEntry != null)
            {
                toAddTo.Append('&');
                if (showWhiteSpace)
                {
                    toAddTo.Append(' ');
                }
                lastEntry.AppendQuotedChars(toAddTo);
                AppendQuotedExtension(toAddTo);
                if (showWhiteSpace)
                {
                    toAddTo.Append(' ');
                }
            }
            switch (Strength_Renamed)
            {
            case Collator.IDENTICAL:
                toAddTo.Append('=');
                break;

            case Collator.TERTIARY:
                toAddTo.Append(',');
                break;

            case Collator.SECONDARY:
                toAddTo.Append(';');
                break;

            case Collator.PRIMARY:
                toAddTo.Append('<');
                break;

            case RESET:
                toAddTo.Append('&');
                break;

            case UNSET:
                toAddTo.Append('?');
                break;
            }
            if (showWhiteSpace)
            {
                toAddTo.Append(' ');
            }
            AppendQuoted(Chars_Renamed, toAddTo);
            if (showExtension && Extension_Renamed.Length() != 0)
            {
                toAddTo.Append('/');
                AppendQuoted(Extension_Renamed, toAddTo);
            }
        }
Beispiel #11
0
        /// <summary>
        /// Create a table-based collation object with the given rules.
        /// This is the main function that actually builds the tables and
        /// stores them back in the RBCollationTables object.  It is called
        /// ONLY by the RBCollationTables constructor. </summary>
        /// <seealso cref= RuleBasedCollator#RuleBasedCollator </seealso>
        /// <exception cref="ParseException"> If the rules format is incorrect. </exception>

//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
//ORIGINAL LINE: public void build(String pattern, int decmp) throws ParseException
        public void Build(String pattern, int decmp)
        {
            bool   isSource = true;
            int    i        = 0;
            String expChars;
            String groupChars;

            if (pattern.Length() == 0)
            {
                throw new ParseException("Build rules empty.", 0);
            }

            // This array maps Unicode characters to their collation ordering
            Mapping = new UCompactIntArray(RBCollationTables.UNMAPPED);
            // Normalize the build rules.  Find occurances of all decomposed characters
            // and normalize the rules before feeding into the builder.  By "normalize",
            // we mean that all precomposed Unicode characters must be converted into
            // a base character and one or more combining characters (such as accents).
            // When there are multiple combining characters attached to a base character,
            // the combining characters must be in their canonical order
            //
            // sherman/Note:
            //(1)decmp will be NO_DECOMPOSITION only in ko locale to prevent decompose
            //hangual syllables to jamos, so we can actually just call decompose with
            //normalizer's IGNORE_HANGUL option turned on
            //
            //(2)just call the "special version" in NormalizerImpl directly
            //pattern = Normalizer.decompose(pattern, false, Normalizer.IGNORE_HANGUL, true);
            //
            //Normalizer.Mode mode = CollatorUtilities.toNormalizerMode(decmp);
            //pattern = Normalizer.normalize(pattern, mode, 0, true);

            pattern = NormalizerImpl.canonicalDecomposeWithSingleQuotation(pattern);

            // Build the merged collation entries
            // Since rules can be specified in any order in the string
            // (e.g. "c , C < d , D < e , E .... C < CH")
            // this splits all of the rules in the string out into separate
            // objects and then sorts them.  In the above example, it merges the
            // "C < CH" rule in just before the "C < D" rule.
            //

            MPattern = new MergeCollation(pattern);

            int order = 0;

            // Now walk though each entry and add it to my own tables
            for (i = 0; i < MPattern.Count; ++i)
            {
                PatternEntry entry = MPattern.GetItemAt(i);
                if (entry != null)
                {
                    groupChars = entry.Chars;
                    if (groupChars.Length() > 1)
                    {
                        switch (groupChars.CharAt(groupChars.Length() - 1))
                        {
                        case '@':
                            FrenchSec  = true;
                            groupChars = groupChars.Substring(0, groupChars.Length() - 1);
                            break;

                        case '!':
                            SeAsianSwapping = true;
                            groupChars      = groupChars.Substring(0, groupChars.Length() - 1);
                            break;
                        }
                    }

                    order    = Increment(entry.Strength, order);
                    expChars = entry.Extension;

                    if (expChars.Length() != 0)
                    {
                        AddExpandOrder(groupChars, expChars, order);
                    }
                    else if (groupChars.Length() > 1)
                    {
                        char ch = groupChars.CharAt(0);
                        if (char.IsHighSurrogate(ch) && groupChars.Length() == 2)
                        {
                            AddOrder(Character.ToCodePoint(ch, groupChars.CharAt(1)), order);
                        }
                        else
                        {
                            AddContractOrder(groupChars, order);
                        }
                    }
                    else
                    {
                        char ch = groupChars.CharAt(0);
                        AddOrder(ch, order);
                    }
                }
            }
            AddComposedChars();

            Commit();
            Mapping.compact();

            /*
             * System.out.println("mappingSize=" + mapping.getKSize());
             * for (int j = 0; j < 0xffff; j++) {
             *  int value = mapping.elementAt(j);
             *  if (value != RBCollationTables.UNMAPPED)
             *      System.out.println("index=" + Integer.toString(j, 16)
             + ", value=" + Integer.toString(value, 16));
             + }
             */
            Tables.FillInTables(FrenchSec, SeAsianSwapping, Mapping, ContractTable, ExpandTable, ContractFlags, MaxSecOrder, MaxTerOrder);
        }