Exemplo n.º 1
0
        //----------------------------------------------------------------
        // Private implementation
        //----------------------------------------------------------------

        /**
         * Parse an ID into component pieces.  Take IDs of the form T,
         * T/V, S-T, S-T/V, or S/V-T.  If the source is missing, return a
         * source of ANY.
         * @param id the id string, in any of several forms
         * @param pos INPUT-OUTPUT parameter.  On input, pos[0] is the
         * offset of the first character to parse in id.  On output,
         * pos[0] is the offset after the last parsed character.  If the
         * parse failed, pos[0] will be unchanged.
         * @param allowFilter if true, a UnicodeSet pattern is allowed
         * at any location between specs or delimiters, and is returned
         * as the fifth string in the array.
         * @return a Specs object, or null if the parse failed.  If
         * neither source nor target was seen in the parsed id, then the
         * parse fails.  If allowFilter is true, then the parsed filter
         * pattern is returned in the Specs object, otherwise the returned
         * filter reference is null.  If the parse fails for any reason
         * null is returned.
         */
        private static Specs ParseFilterID(string id, int[] pos,
                                           bool allowFilter)
        {
            string first     = null;
            string source    = null;
            string target    = null;
            string variant   = null;
            string filter    = null;
            char   delimiter = (char)0;
            int    specCount = 0;
            int    start     = pos[0];

            // This loop parses one of the following things with each
            // pass: a filter, a delimiter character (either '-' or '/'),
            // or a spec (source, target, or variant).
            for (; ;)
            {
                pos[0] = PatternProps.SkipWhiteSpace(id, pos[0]);
                if (pos[0] == id.Length)
                {
                    break;
                }

                // Parse filters
                if (allowFilter && filter == null &&
                    UnicodeSet.ResemblesPattern(id, pos[0]))
                {
                    ParsePosition ppos = new ParsePosition(pos[0]);
                    // Parse the set to get the position.
                    new UnicodeSet(id, ppos, null);
                    filter = id.Substring(pos[0], ppos.Index - pos[0]); // ICU4N: Corrected 2nd parameter
                    pos[0] = ppos.Index;
                    continue;
                }

                if (delimiter == 0)
                {
                    char c = id[pos[0]];
                    if ((c == TARGET_SEP && target == null) ||
                        (c == VARIANT_SEP && variant == null))
                    {
                        delimiter = c;
                        ++pos[0];
                        continue;
                    }
                }

                // We are about to try to parse a spec with no delimiter
                // when we can no longer do so (we can only do so at the
                // start); break.
                if (delimiter == 0 && specCount > 0)
                {
                    break;
                }

                string spec = Utility.ParseUnicodeIdentifier(id, pos);
                if (spec == null)
                {
                    // Note that if there was a trailing delimiter, we
                    // consume it.  So Foo-, Foo/, Foo-Bar/, and Foo/Bar-
                    // are legal.
                    break;
                }

                switch (delimiter)
                {
                case (char)0:
                    first = spec;
                    break;

                case TARGET_SEP:
                    target = spec;
                    break;

                case VARIANT_SEP:
                    variant = spec;
                    break;
                }
                ++specCount;
                delimiter = (char)0;
            }

            // A spec with no prior character is either source or target,
            // depending on whether an explicit "-target" was seen.
            if (first != null)
            {
                if (target == null)
                {
                    target = first;
                }
                else
                {
                    source = first;
                }
            }

            // Must have either source or target
            if (source == null && target == null)
            {
                pos[0] = start;
                return(null);
            }

            // Empty source or target defaults to ANY
            bool sawSource = true;

            if (source == null)
            {
                source    = ANY;
                sawSource = false;
            }
            if (target == null)
            {
                target = ANY;
            }

            return(new Specs(source, target, variant, sawSource, filter));
        }
Exemplo n.º 2
0
        /**
         * Parse a global filter of the form "[f]" or "([f])", depending
         * on 'withParens'.
         * @param id the pattern the parse
         * @param pos INPUT-OUTPUT parameter.  On input, the position of
         * the first character to parse.  On output, the position after
         * the last character parsed.
         * @param dir the direction.
         * @param withParens INPUT-OUTPUT parameter.  On entry, if
         * withParens[0] is 0, then parens are disallowed.  If it is 1,
         * then parens are requires.  If it is -1, then parens are
         * optional, and the return result will be set to 0 or 1.
         * @param canonID OUTPUT parameter.  The pattern for the filter
         * added to the canonID, either at the end, if dir is FORWARD, or
         * at the start, if dir is REVERSE.  The pattern will be enclosed
         * in parentheses if appropriate, and will be suffixed with an
         * ID_DELIM character.  May be null.
         * @return a UnicodeSet object or null.  A non-null results
         * indicates a successful parse, regardless of whether the filter
         * applies to the given direction.  The caller should discard it
         * if withParens != (dir == REVERSE).
         */
        public static UnicodeSet ParseGlobalFilter(string id, int[] pos, TransliterationDirection dir,
                                                   int[] withParens,
                                                   StringBuffer canonID)
        {
            UnicodeSet filter = null;
            int        start  = pos[0];

            if (withParens[0] == -1)
            {
                withParens[0] = Utility.ParseChar(id, pos, OPEN_REV) ? 1 : 0;
            }
            else if (withParens[0] == 1)
            {
                if (!Utility.ParseChar(id, pos, OPEN_REV))
                {
                    pos[0] = start;
                    return(null);
                }
            }

            pos[0] = PatternProps.SkipWhiteSpace(id, pos[0]);

            if (UnicodeSet.ResemblesPattern(id, pos[0]))
            {
                ParsePosition ppos = new ParsePosition(pos[0]);
                try
                {
                    filter = new UnicodeSet(id, ppos, null);
                }
                catch (ArgumentException)
                {
                    pos[0] = start;
                    return(null);
                }

                string pattern = id.Substring(pos[0], ppos.Index - pos[0]); // ICU4N: Corrected 2nd parameter
                pos[0] = ppos.Index;

                if (withParens[0] == 1 && !Utility.ParseChar(id, pos, CLOSE_REV))
                {
                    pos[0] = start;
                    return(null);
                }

                // In the forward direction, append the pattern to the
                // canonID.  In the reverse, insert it at zero, and invert
                // the presence of parens ("A" <-> "(A)").
                if (canonID != null)
                {
                    if (dir == Forward)
                    {
                        if (withParens[0] == 1)
                        {
                            pattern = OPEN_REV + pattern + CLOSE_REV;
                        }
                        canonID.Append(pattern + ID_DELIM);
                    }
                    else
                    {
                        if (withParens[0] == 0)
                        {
                            pattern = OPEN_REV + pattern + CLOSE_REV;
                        }
                        canonID.Insert(0, pattern + ID_DELIM);
                    }
                }
            }

            return(filter);
        }