Example #1
0
 private int SkipWhiteSpace(int i)
 {
     while (i < rules.Length && PatternProps.IsWhiteSpace(rules[i]))
     {
         ++i;
     }
     return(i);
 }
Example #2
0
        private CollationStrength ParseResetAndPosition()
        {
            int  i = SkipWhiteSpace(ruleIndex + 1);
            int  j;
            char c;
            CollationStrength resetStrength;

            if (rules.RegionMatches(i, BEFORE, 0, BEFORE.Length, StringComparison.Ordinal) &&
                (j = i + BEFORE.Length) < rules.Length &&
                PatternProps.IsWhiteSpace(rules[j]) &&
                ((j = SkipWhiteSpace(j + 1)) + 1) < rules.Length &&
                0x31 <= (c = rules[j]) && c <= 0x33 &&
                rules[j + 1] == 0x5d)
            {
                // &[before n] with n=1 or 2 or 3
                resetStrength = CollationStrength.Primary + (c - 0x31);
                i             = SkipWhiteSpace(j + 2);
            }
            else
            {
                resetStrength = CollationStrength.Identical;
            }
            if (i >= rules.Length)
            {
                SetParseError("reset without position");
                return((CollationStrength)UCOL_DEFAULT);
            }
            if (rules[i] == 0x5b)
            {  // '['
                i = ParseSpecialPosition(i, rawBuilder.Value);
            }
            else
            {
                i = ParseTailoringString(i, rawBuilder.Value);
            }
            try
            {
                sink.AddReset(resetStrength, rawBuilder);
            }
            catch (Exception e)
            {
                SetParseError("adding reset failed", e);
                return((CollationStrength)UCOL_DEFAULT);
            }
            ruleIndex = i;
            return(resetStrength);
        }
Example #3
0
        private void Parse(string ruleString)
        {
            rules     = ruleString;
            ruleIndex = 0;

            while (ruleIndex < rules.Length)
            {
                char c = rules[ruleIndex];
                if (PatternProps.IsWhiteSpace(c))
                {
                    ++ruleIndex;
                    continue;
                }
                switch (c)
                {
                case '&':
                    ParseRuleChain();
                    break;

                case '[':
                    ParseSetting();
                    break;

                case '#':     // starts a comment, until the end of the line
                    ruleIndex = SkipComment(ruleIndex + 1);
                    break;

                case '@':     // is equivalent to [backwards 2]
                    settings.SetFlag(CollationSettings.BackwardSecondary, true);
                    ++ruleIndex;
                    break;

                case '!':      // '!' used to turn on Thai/Lao character reversal
                               // Accept but ignore. The root collator has contractions
                               // that are equivalent to the character reversal, where appropriate.
                    ++ruleIndex;
                    break;

                default:
                    SetParseError("expected a reset or setting or comment");
                    break;
                }
            }
        }
Example #4
0
 private int ReadWords(int i, StringBuilder raw)
 {
     raw.Length = 0;
     i          = SkipWhiteSpace(i);
     for (; ;)
     {
         if (i >= rules.Length)
         {
             return(0);
         }
         char c = rules[i];
         if (IsSyntaxChar(c) && c != 0x2d && c != 0x5f)
         {  // syntax except -_
             if (raw.Length == 0)
             {
                 return(i);
             }
             int lastIndex = raw.Length - 1;
             if (raw[lastIndex] == ' ')
             {  // remove trailing space
                 raw.Length = lastIndex;
             }
             return(i);
         }
         if (PatternProps.IsWhiteSpace(c))
         {
             raw.Append(' ');
             i = SkipWhiteSpace(i + 1);
         }
         else
         {
             raw.Append(c);
             ++i;
         }
     }
 }
Example #5
0
        //----------------------------------------------------------------
        // Private implementation
        //----------------------------------------------------------------

        /**
         * Parse an ID into component pieces.  Take IDs of the form T,
         * T/V, S-T, S-T/V, or S/V-T.  If the source is missing, return a
         * source of ANY.
         * @param id the id string, in any of several forms
         * @param pos INPUT-OUTPUT parameter.  On input, pos[0] is the
         * offset of the first character to parse in id.  On output,
         * pos[0] is the offset after the last parsed character.  If the
         * parse failed, pos[0] will be unchanged.
         * @param allowFilter if true, a UnicodeSet pattern is allowed
         * at any location between specs or delimiters, and is returned
         * as the fifth string in the array.
         * @return a Specs object, or null if the parse failed.  If
         * neither source nor target was seen in the parsed id, then the
         * parse fails.  If allowFilter is true, then the parsed filter
         * pattern is returned in the Specs object, otherwise the returned
         * filter reference is null.  If the parse fails for any reason
         * null is returned.
         */
        private static Specs ParseFilterID(string id, int[] pos,
                                           bool allowFilter)
        {
            string first     = null;
            string source    = null;
            string target    = null;
            string variant   = null;
            string filter    = null;
            char   delimiter = (char)0;
            int    specCount = 0;
            int    start     = pos[0];

            // This loop parses one of the following things with each
            // pass: a filter, a delimiter character (either '-' or '/'),
            // or a spec (source, target, or variant).
            for (; ;)
            {
                pos[0] = PatternProps.SkipWhiteSpace(id, pos[0]);
                if (pos[0] == id.Length)
                {
                    break;
                }

                // Parse filters
                if (allowFilter && filter == null &&
                    UnicodeSet.ResemblesPattern(id, pos[0]))
                {
                    ParsePosition ppos = new ParsePosition(pos[0]);
                    // Parse the set to get the position.
                    new UnicodeSet(id, ppos, null);
                    filter = id.Substring(pos[0], ppos.Index - pos[0]); // ICU4N: Corrected 2nd parameter
                    pos[0] = ppos.Index;
                    continue;
                }

                if (delimiter == 0)
                {
                    char c = id[pos[0]];
                    if ((c == TARGET_SEP && target == null) ||
                        (c == VARIANT_SEP && variant == null))
                    {
                        delimiter = c;
                        ++pos[0];
                        continue;
                    }
                }

                // We are about to try to parse a spec with no delimiter
                // when we can no longer do so (we can only do so at the
                // start); break.
                if (delimiter == 0 && specCount > 0)
                {
                    break;
                }

                string spec = Utility.ParseUnicodeIdentifier(id, pos);
                if (spec == null)
                {
                    // Note that if there was a trailing delimiter, we
                    // consume it.  So Foo-, Foo/, Foo-Bar/, and Foo/Bar-
                    // are legal.
                    break;
                }

                switch (delimiter)
                {
                case (char)0:
                    first = spec;
                    break;

                case TARGET_SEP:
                    target = spec;
                    break;

                case VARIANT_SEP:
                    variant = spec;
                    break;
                }
                ++specCount;
                delimiter = (char)0;
            }

            // A spec with no prior character is either source or target,
            // depending on whether an explicit "-target" was seen.
            if (first != null)
            {
                if (target == null)
                {
                    target = first;
                }
                else
                {
                    source = first;
                }
            }

            // Must have either source or target
            if (source == null && target == null)
            {
                pos[0] = start;
                return(null);
            }

            // Empty source or target defaults to ANY
            bool sawSource = true;

            if (source == null)
            {
                source    = ANY;
                sawSource = false;
            }
            if (target == null)
            {
                target = ANY;
            }

            return(new Specs(source, target, variant, sawSource, filter));
        }
Example #6
0
        /**
         * Parse a compound ID, consisting of an optional forward global
         * filter, a separator, one or more single IDs delimited by
         * separators, an an optional reverse global filter.  The
         * separator is a semicolon.  The global filters are UnicodeSet
         * patterns.  The reverse global filter must be enclosed in
         * parentheses.
         * @param id the pattern the parse
         * @param dir the direction.
         * @param canonID OUTPUT parameter that receives the canonical ID,
         * consisting of canonical IDs for all elements, as returned by
         * parseSingleID(), separated by semicolons.  Previous contents
         * are discarded.
         * @param list OUTPUT parameter that receives a list of SingleID
         * objects representing the parsed IDs.  Previous contents are
         * discarded.
         * @param globalFilter OUTPUT parameter that receives a pointer to
         * a newly created global filter for this ID in this direction, or
         * null if there is none.
         * @return true if the parse succeeds, that is, if the entire
         * id is consumed without syntax error.
         */
        public static bool ParseCompoundID(string id, TransliterationDirection dir,
                                           StringBuffer canonID,
                                           IList <SingleID> list,
                                           UnicodeSet[] globalFilter)
        {
            int[] pos        = new int[] { 0 };
            int[] withParens = new int[1];
            list.Clear();
            UnicodeSet filter;

            globalFilter[0] = null;
            canonID.Length  = 0;

            // Parse leading global filter, if any
            withParens[0] = 0; // parens disallowed
            filter        = ParseGlobalFilter(id, pos, dir, withParens, canonID);
            if (filter != null)
            {
                if (!Utility.ParseChar(id, pos, ID_DELIM))
                {
                    // Not a global filter; backup and resume
                    canonID.Length = 0;
                    pos[0]         = 0;
                }
                if (dir == Forward)
                {
                    globalFilter[0] = filter;
                }
            }

            bool sawDelimiter = true;

            for (; ;)
            {
                SingleID single = ParseSingleID(id, pos, dir);
                if (single == null)
                {
                    break;
                }
                if (dir == Forward)
                {
                    list.Add(single);
                }
                else
                {
                    list.Insert(0, single);
                }
                if (!Utility.ParseChar(id, pos, ID_DELIM))
                {
                    sawDelimiter = false;
                    break;
                }
            }

            if (list.Count == 0)
            {
                return(false);
            }

            // Construct canonical ID
            for (int i = 0; i < list.Count; ++i)
            {
                SingleID single = list[i];
                canonID.Append(single.CanonID);
                if (i != (list.Count - 1))
                {
                    canonID.Append(ID_DELIM);
                }
            }

            // Parse trailing global filter, if any, and only if we saw
            // a trailing delimiter after the IDs.
            if (sawDelimiter)
            {
                withParens[0] = 1; // parens required
                filter        = ParseGlobalFilter(id, pos, dir, withParens, canonID);
                if (filter != null)
                {
                    // Don't require trailing ';', but parse it if present
                    Utility.ParseChar(id, pos, ID_DELIM);

                    if (dir == Reverse)
                    {
                        globalFilter[0] = filter;
                    }
                }
            }

            // Trailing unparsed text is a syntax error
            pos[0] = PatternProps.SkipWhiteSpace(id, pos[0]);
            if (pos[0] != id.Length)
            {
                return(false);
            }

            return(true);
        }
Example #7
0
        /**
         * Parse a global filter of the form "[f]" or "([f])", depending
         * on 'withParens'.
         * @param id the pattern the parse
         * @param pos INPUT-OUTPUT parameter.  On input, the position of
         * the first character to parse.  On output, the position after
         * the last character parsed.
         * @param dir the direction.
         * @param withParens INPUT-OUTPUT parameter.  On entry, if
         * withParens[0] is 0, then parens are disallowed.  If it is 1,
         * then parens are requires.  If it is -1, then parens are
         * optional, and the return result will be set to 0 or 1.
         * @param canonID OUTPUT parameter.  The pattern for the filter
         * added to the canonID, either at the end, if dir is FORWARD, or
         * at the start, if dir is REVERSE.  The pattern will be enclosed
         * in parentheses if appropriate, and will be suffixed with an
         * ID_DELIM character.  May be null.
         * @return a UnicodeSet object or null.  A non-null results
         * indicates a successful parse, regardless of whether the filter
         * applies to the given direction.  The caller should discard it
         * if withParens != (dir == REVERSE).
         */
        public static UnicodeSet ParseGlobalFilter(string id, int[] pos, TransliterationDirection dir,
                                                   int[] withParens,
                                                   StringBuffer canonID)
        {
            UnicodeSet filter = null;
            int        start  = pos[0];

            if (withParens[0] == -1)
            {
                withParens[0] = Utility.ParseChar(id, pos, OPEN_REV) ? 1 : 0;
            }
            else if (withParens[0] == 1)
            {
                if (!Utility.ParseChar(id, pos, OPEN_REV))
                {
                    pos[0] = start;
                    return(null);
                }
            }

            pos[0] = PatternProps.SkipWhiteSpace(id, pos[0]);

            if (UnicodeSet.ResemblesPattern(id, pos[0]))
            {
                ParsePosition ppos = new ParsePosition(pos[0]);
                try
                {
                    filter = new UnicodeSet(id, ppos, null);
                }
                catch (ArgumentException)
                {
                    pos[0] = start;
                    return(null);
                }

                string pattern = id.Substring(pos[0], ppos.Index - pos[0]); // ICU4N: Corrected 2nd parameter
                pos[0] = ppos.Index;

                if (withParens[0] == 1 && !Utility.ParseChar(id, pos, CLOSE_REV))
                {
                    pos[0] = start;
                    return(null);
                }

                // In the forward direction, append the pattern to the
                // canonID.  In the reverse, insert it at zero, and invert
                // the presence of parens ("A" <-> "(A)").
                if (canonID != null)
                {
                    if (dir == Forward)
                    {
                        if (withParens[0] == 1)
                        {
                            pattern = OPEN_REV + pattern + CLOSE_REV;
                        }
                        canonID.Append(pattern + ID_DELIM);
                    }
                    else
                    {
                        if (withParens[0] == 0)
                        {
                            pattern = OPEN_REV + pattern + CLOSE_REV;
                        }
                        canonID.Insert(0, pattern + ID_DELIM);
                    }
                }
            }

            return(filter);
        }
Example #8
0
        /// <summary>
        /// Implements <see cref="Transliterator.HandleTransliterate(IReplaceable, Position, bool)"/>.
        /// </summary>
        protected override void HandleTransliterate(IReplaceable text,
                                                    Position offsets, bool isIncremental)
        {
            int maxLen = UCharacterName.Instance.MaxCharNameLength + 1; // allow for temporary trailing space

            StringBuffer name = new StringBuffer(maxLen);

            // Get the legal character set
            UnicodeSet legal = new UnicodeSet();

            UCharacterName.Instance.GetCharNameCharacters(legal);

            int cursor = offsets.Start;
            int limit  = offsets.Limit;

            // Modes:
            // 0 - looking for open delimiter
            // 1 - after open delimiter
            int mode    = 0;
            int openPos = -1; // open delim candidate pos

            int c;

            while (cursor < limit)
            {
                c = text.Char32At(cursor);

                switch (mode)
                {
                case 0:   // looking for open delimiter
                    if (c == OPEN_DELIM)
                    {     // quick check first
                        openPos = cursor;
                        int i = Utility.ParsePattern(OPEN_PAT, text, cursor, limit);
                        if (i >= 0 && i < limit)
                        {
                            mode        = 1;
                            name.Length = 0;
                            cursor      = i;
                            continue;     // *** reprocess char32At(cursor)
                        }
                    }
                    break;

                case 1:     // after open delimiter
                            // Look for legal chars.  If \s+ is found, convert it
                            // to a single space.  If closeDelimiter is found, exit
                            // the loop.  If any other character is found, exit the
                            // loop.  If the limit is reached, exit the loop.

                    // Convert \s+ => SPACE.  This assumes there are no
                    // runs of >1 space characters in names.
                    if (PatternProps.IsWhiteSpace(c))
                    {
                        // Ignore leading whitespace
                        if (name.Length > 0 &&
                            name[name.Length - 1] != SPACE)
                        {
                            name.Append(SPACE);
                            // If we are too long then abort.  maxLen includes
                            // temporary trailing space, so use '>'.
                            if (name.Length > maxLen)
                            {
                                mode = 0;
                            }
                        }
                        break;
                    }

                    if (c == CLOSE_DELIM)
                    {
                        int len = name.Length;

                        // Delete trailing space, if any
                        if (len > 0 &&
                            name[len - 1] == SPACE)
                        {
                            name.Length = --len;
                        }

                        c = UCharacter.GetCharFromExtendedName(name.ToString());
                        if (c != -1)
                        {
                            // Lookup succeeded

                            // assert(UTF16.getCharCount(CLOSE_DELIM) == 1);
                            cursor++;     // advance over CLOSE_DELIM

                            string str = UTF16.ValueOf(c);
                            text.Replace(openPos, cursor, str);

                            // Adjust indices for the change in the length of
                            // the string.  Do not assume that str.length() ==
                            // 1, in case of surrogates.
                            int delta = cursor - openPos - str.Length;
                            cursor -= delta;
                            limit  -= delta;
                            // assert(cursor == openPos + str.length());
                        }
                        // If the lookup failed, we leave things as-is and
                        // still switch to mode 0 and continue.
                        mode    = 0;
                        openPos = -1; // close off candidate
                        continue;     // *** reprocess char32At(cursor)
                    }

                    if (legal.Contains(c))
                    {
                        UTF16.Append(name, c);
                        // If we go past the longest possible name then abort.
                        // maxLen includes temporary trailing space, so use '>='.
                        if (name.Length >= maxLen)
                        {
                            mode = 0;
                        }
                    }

                    // Invalid character
                    else
                    {
                        --cursor;     // Backup and reprocess this character
                        mode = 0;
                    }

                    break;
                }

                cursor += UTF16.GetCharCount(c);
            }

            offsets.ContextLimit += limit - offsets.Limit;
            offsets.Limit         = limit;
            // In incremental mode, only advance the cursor up to the last
            // open delimiter candidate.
            offsets.Start = (isIncremental && openPos >= 0) ? openPos : cursor;
        }
Example #9
0
        /// <summary>
        /// Selects the phrase for the given <paramref name="keyword"/>.
        /// </summary>
        /// <param name="keyword">A phrase selection keyword.</param>
        /// <returns>The string containing the formatted select message.</returns>
        /// <exception cref="ArgumentException">When the given keyword is not a "pattern identifier".</exception>
        /// <stable>ICU 4.4</stable>
        public string Format(string keyword)
        {
            //Check for the validity of the keyword
            if (!PatternProps.IsIdentifier(keyword))
            {
                throw new ArgumentException("Invalid formatting argument.");
            }
            // If no pattern was applied, throw an exception
            if (msgPattern == null || msgPattern.CountParts() == 0)
            {
                throw new InvalidOperationException("Invalid format error.");
            }

            // Get the appropriate sub-message.
            int msgStart = FindSubMessage(msgPattern, 0, keyword);

            if (!msgPattern.JdkAposMode)
            {
                int msgLimit = msgPattern.GetLimitPartIndex(msgStart);
                return(msgPattern.PatternString.Substring(msgPattern.GetPart(msgStart).Limit,
                                                          msgPattern.GetPatternIndex(msgLimit)));
            }
            // JDK compatibility mode: Remove SKIP_SYNTAX.
            StringBuilder result    = null;
            int           prevIndex = msgPattern.GetPart(msgStart).Limit;

            for (int i = msgStart; ;)
            {
                MessagePatternPart     part = msgPattern.GetPart(++i);
                MessagePatternPartType type = part.Type;
                int index = part.Index;
                if (type == MessagePatternPartType.MsgLimit)
                {
                    if (result == null)
                    {
                        return(pattern.Substring(prevIndex, index - prevIndex)); // ICU4N: Corrected 2nd arg
                    }
                    else
                    {
                        return(result.Append(pattern, prevIndex, index).ToString());
                    }
                }
                else if (type == MessagePatternPartType.SkipSyntax)
                {
                    if (result == null)
                    {
                        result = new StringBuilder();
                    }
                    result.Append(pattern, prevIndex, index);
                    prevIndex = part.Limit;
                }
                else if (type == MessagePatternPartType.ArgStart)
                {
                    if (result == null)
                    {
                        result = new StringBuilder();
                    }
                    result.Append(pattern, prevIndex, index);
                    prevIndex = index;
                    i         = msgPattern.GetLimitPartIndex(i);
                    index     = msgPattern.GetPart(i).Limit;
                    MessagePattern.AppendReducedApostrophes(pattern, prevIndex, index, result);
                    prevIndex = index;
                }
            }
        }
Example #10
0
 private int ParseString(int i, StringBuilder raw)
 {
     raw.Length = 0;
     while (i < rules.Length)
     {
         char c = rules[i++];
         if (IsSyntaxChar(c))
         {
             if (c == 0x27)
             {  // apostrophe
                 if (i < rules.Length && rules[i] == 0x27)
                 {
                     // Double apostrophe, encodes a single one.
                     raw.Append((char)0x27);
                     ++i;
                     continue;
                 }
                 // Quote literal text until the next single apostrophe.
                 for (; ;)
                 {
                     if (i == rules.Length)
                     {
                         SetParseError("quoted literal text missing terminating apostrophe");
                         return(i);
                     }
                     c = rules[i++];
                     if (c == 0x27)
                     {
                         if (i < rules.Length && rules[i] == 0x27)
                         {
                             // Double apostrophe inside quoted literal text,
                             // still encodes a single apostrophe.
                             ++i;
                         }
                         else
                         {
                             break;
                         }
                     }
                     raw.Append(c);
                 }
             }
             else if (c == 0x5c)
             {  // backslash
                 if (i == rules.Length)
                 {
                     SetParseError("backslash escape at the end of the rule string");
                     return(i);
                 }
                 int cp = rules.CodePointAt(i);
                 raw.AppendCodePoint(cp);
                 i += Character.CharCount(cp);
             }
             else
             {
                 // Any other syntax character terminates a string.
                 --i;
                 break;
             }
         }
         else if (PatternProps.IsWhiteSpace(c))
         {
             // Unquoted white space terminates a string.
             --i;
             break;
         }
         else
         {
             raw.Append(c);
         }
     }
     for (int j = 0; j < raw.Length;)
     {
         int c = raw.CodePointAt(j);
         if (IsSurrogate(c))
         {
             SetParseError("string contains an unpaired surrogate");
             return(i);
         }
         if (0xfffd <= c && c <= 0xffff)
         {
             SetParseError("string contains U+FFFD, U+FFFE or U+FFFF");
             return(i);
         }
         j += Character.CharCount(c);
     }
     return(i);
 }