Example #1
0
 private int SkipWhiteSpace(int i)
 {
     while (i < rules.Length && PatternProps.IsWhiteSpace(rules[i]))
     {
         ++i;
     }
     return(i);
 }
Example #2
0
        private CollationStrength ParseResetAndPosition()
        {
            int  i = SkipWhiteSpace(ruleIndex + 1);
            int  j;
            char c;
            CollationStrength resetStrength;

            if (rules.RegionMatches(i, BEFORE, 0, BEFORE.Length, StringComparison.Ordinal) &&
                (j = i + BEFORE.Length) < rules.Length &&
                PatternProps.IsWhiteSpace(rules[j]) &&
                ((j = SkipWhiteSpace(j + 1)) + 1) < rules.Length &&
                0x31 <= (c = rules[j]) && c <= 0x33 &&
                rules[j + 1] == 0x5d)
            {
                // &[before n] with n=1 or 2 or 3
                resetStrength = CollationStrength.Primary + (c - 0x31);
                i             = SkipWhiteSpace(j + 2);
            }
            else
            {
                resetStrength = CollationStrength.Identical;
            }
            if (i >= rules.Length)
            {
                SetParseError("reset without position");
                return((CollationStrength)UCOL_DEFAULT);
            }
            if (rules[i] == 0x5b)
            {  // '['
                i = ParseSpecialPosition(i, rawBuilder.Value);
            }
            else
            {
                i = ParseTailoringString(i, rawBuilder.Value);
            }
            try
            {
                sink.AddReset(resetStrength, rawBuilder);
            }
            catch (Exception e)
            {
                SetParseError("adding reset failed", e);
                return((CollationStrength)UCOL_DEFAULT);
            }
            ruleIndex = i;
            return(resetStrength);
        }
Example #3
0
        private void Parse(string ruleString)
        {
            rules     = ruleString;
            ruleIndex = 0;

            while (ruleIndex < rules.Length)
            {
                char c = rules[ruleIndex];
                if (PatternProps.IsWhiteSpace(c))
                {
                    ++ruleIndex;
                    continue;
                }
                switch (c)
                {
                case '&':
                    ParseRuleChain();
                    break;

                case '[':
                    ParseSetting();
                    break;

                case '#':     // starts a comment, until the end of the line
                    ruleIndex = SkipComment(ruleIndex + 1);
                    break;

                case '@':     // is equivalent to [backwards 2]
                    settings.SetFlag(CollationSettings.BackwardSecondary, true);
                    ++ruleIndex;
                    break;

                case '!':      // '!' used to turn on Thai/Lao character reversal
                               // Accept but ignore. The root collator has contractions
                               // that are equivalent to the character reversal, where appropriate.
                    ++ruleIndex;
                    break;

                default:
                    SetParseError("expected a reset or setting or comment");
                    break;
                }
            }
        }
Example #4
0
 private int ReadWords(int i, StringBuilder raw)
 {
     raw.Length = 0;
     i          = SkipWhiteSpace(i);
     for (; ;)
     {
         if (i >= rules.Length)
         {
             return(0);
         }
         char c = rules[i];
         if (IsSyntaxChar(c) && c != 0x2d && c != 0x5f)
         {  // syntax except -_
             if (raw.Length == 0)
             {
                 return(i);
             }
             int lastIndex = raw.Length - 1;
             if (raw[lastIndex] == ' ')
             {  // remove trailing space
                 raw.Length = lastIndex;
             }
             return(i);
         }
         if (PatternProps.IsWhiteSpace(c))
         {
             raw.Append(' ');
             i = SkipWhiteSpace(i + 1);
         }
         else
         {
             raw.Append(c);
             ++i;
         }
     }
 }
Example #5
0
        /// <summary>
        /// Implements <see cref="Transliterator.HandleTransliterate(IReplaceable, Position, bool)"/>.
        /// </summary>
        protected override void HandleTransliterate(IReplaceable text,
                                                    Position offsets, bool isIncremental)
        {
            int maxLen = UCharacterName.Instance.MaxCharNameLength + 1; // allow for temporary trailing space

            StringBuffer name = new StringBuffer(maxLen);

            // Get the legal character set
            UnicodeSet legal = new UnicodeSet();

            UCharacterName.Instance.GetCharNameCharacters(legal);

            int cursor = offsets.Start;
            int limit  = offsets.Limit;

            // Modes:
            // 0 - looking for open delimiter
            // 1 - after open delimiter
            int mode    = 0;
            int openPos = -1; // open delim candidate pos

            int c;

            while (cursor < limit)
            {
                c = text.Char32At(cursor);

                switch (mode)
                {
                case 0:   // looking for open delimiter
                    if (c == OPEN_DELIM)
                    {     // quick check first
                        openPos = cursor;
                        int i = Utility.ParsePattern(OPEN_PAT, text, cursor, limit);
                        if (i >= 0 && i < limit)
                        {
                            mode        = 1;
                            name.Length = 0;
                            cursor      = i;
                            continue;     // *** reprocess char32At(cursor)
                        }
                    }
                    break;

                case 1:     // after open delimiter
                            // Look for legal chars.  If \s+ is found, convert it
                            // to a single space.  If closeDelimiter is found, exit
                            // the loop.  If any other character is found, exit the
                            // loop.  If the limit is reached, exit the loop.

                    // Convert \s+ => SPACE.  This assumes there are no
                    // runs of >1 space characters in names.
                    if (PatternProps.IsWhiteSpace(c))
                    {
                        // Ignore leading whitespace
                        if (name.Length > 0 &&
                            name[name.Length - 1] != SPACE)
                        {
                            name.Append(SPACE);
                            // If we are too long then abort.  maxLen includes
                            // temporary trailing space, so use '>'.
                            if (name.Length > maxLen)
                            {
                                mode = 0;
                            }
                        }
                        break;
                    }

                    if (c == CLOSE_DELIM)
                    {
                        int len = name.Length;

                        // Delete trailing space, if any
                        if (len > 0 &&
                            name[len - 1] == SPACE)
                        {
                            name.Length = --len;
                        }

                        c = UCharacter.GetCharFromExtendedName(name.ToString());
                        if (c != -1)
                        {
                            // Lookup succeeded

                            // assert(UTF16.getCharCount(CLOSE_DELIM) == 1);
                            cursor++;     // advance over CLOSE_DELIM

                            string str = UTF16.ValueOf(c);
                            text.Replace(openPos, cursor, str);

                            // Adjust indices for the change in the length of
                            // the string.  Do not assume that str.length() ==
                            // 1, in case of surrogates.
                            int delta = cursor - openPos - str.Length;
                            cursor -= delta;
                            limit  -= delta;
                            // assert(cursor == openPos + str.length());
                        }
                        // If the lookup failed, we leave things as-is and
                        // still switch to mode 0 and continue.
                        mode    = 0;
                        openPos = -1; // close off candidate
                        continue;     // *** reprocess char32At(cursor)
                    }

                    if (legal.Contains(c))
                    {
                        UTF16.Append(name, c);
                        // If we go past the longest possible name then abort.
                        // maxLen includes temporary trailing space, so use '>='.
                        if (name.Length >= maxLen)
                        {
                            mode = 0;
                        }
                    }

                    // Invalid character
                    else
                    {
                        --cursor;     // Backup and reprocess this character
                        mode = 0;
                    }

                    break;
                }

                cursor += UTF16.GetCharCount(c);
            }

            offsets.ContextLimit += limit - offsets.Limit;
            offsets.Limit         = limit;
            // In incremental mode, only advance the cursor up to the last
            // open delimiter candidate.
            offsets.Start = (isIncremental && openPos >= 0) ? openPos : cursor;
        }
Example #6
0
 private int ParseString(int i, StringBuilder raw)
 {
     raw.Length = 0;
     while (i < rules.Length)
     {
         char c = rules[i++];
         if (IsSyntaxChar(c))
         {
             if (c == 0x27)
             {  // apostrophe
                 if (i < rules.Length && rules[i] == 0x27)
                 {
                     // Double apostrophe, encodes a single one.
                     raw.Append((char)0x27);
                     ++i;
                     continue;
                 }
                 // Quote literal text until the next single apostrophe.
                 for (; ;)
                 {
                     if (i == rules.Length)
                     {
                         SetParseError("quoted literal text missing terminating apostrophe");
                         return(i);
                     }
                     c = rules[i++];
                     if (c == 0x27)
                     {
                         if (i < rules.Length && rules[i] == 0x27)
                         {
                             // Double apostrophe inside quoted literal text,
                             // still encodes a single apostrophe.
                             ++i;
                         }
                         else
                         {
                             break;
                         }
                     }
                     raw.Append(c);
                 }
             }
             else if (c == 0x5c)
             {  // backslash
                 if (i == rules.Length)
                 {
                     SetParseError("backslash escape at the end of the rule string");
                     return(i);
                 }
                 int cp = rules.CodePointAt(i);
                 raw.AppendCodePoint(cp);
                 i += Character.CharCount(cp);
             }
             else
             {
                 // Any other syntax character terminates a string.
                 --i;
                 break;
             }
         }
         else if (PatternProps.IsWhiteSpace(c))
         {
             // Unquoted white space terminates a string.
             --i;
             break;
         }
         else
         {
             raw.Append(c);
         }
     }
     for (int j = 0; j < raw.Length;)
     {
         int c = raw.CodePointAt(j);
         if (IsSurrogate(c))
         {
             SetParseError("string contains an unpaired surrogate");
             return(i);
         }
         if (0xfffd <= c && c <= 0xffff)
         {
             SetParseError("string contains U+FFFD, U+FFFE or U+FFFF");
             return(i);
         }
         j += Character.CharCount(c);
     }
     return(i);
 }