예제 #1
0
        /**
         * Parse a compound ID, consisting of an optional forward global
         * filter, a separator, one or more single IDs delimited by
         * separators, an an optional reverse global filter.  The
         * separator is a semicolon.  The global filters are UnicodeSet
         * patterns.  The reverse global filter must be enclosed in
         * parentheses.
         * @param id the pattern the parse
         * @param dir the direction.
         * @param canonID OUTPUT parameter that receives the canonical ID,
         * consisting of canonical IDs for all elements, as returned by
         * parseSingleID(), separated by semicolons.  Previous contents
         * are discarded.
         * @param list OUTPUT parameter that receives a list of SingleID
         * objects representing the parsed IDs.  Previous contents are
         * discarded.
         * @param globalFilter OUTPUT parameter that receives a pointer to
         * a newly created global filter for this ID in this direction, or
         * null if there is none.
         * @return true if the parse succeeds, that is, if the entire
         * id is consumed without syntax error.
         */
        public static bool ParseCompoundID(string id, TransliterationDirection dir,
                                           StringBuffer canonID,
                                           IList <SingleID> list,
                                           UnicodeSet[] globalFilter)
        {
            int[] pos        = new int[] { 0 };
            int[] withParens = new int[1];
            list.Clear();
            UnicodeSet filter;

            globalFilter[0] = null;
            canonID.Length  = 0;

            // Parse leading global filter, if any
            withParens[0] = 0; // parens disallowed
            filter        = ParseGlobalFilter(id, pos, dir, withParens, canonID);
            if (filter != null)
            {
                if (!Utility.ParseChar(id, pos, ID_DELIM))
                {
                    // Not a global filter; backup and resume
                    canonID.Length = 0;
                    pos[0]         = 0;
                }
                if (dir == Forward)
                {
                    globalFilter[0] = filter;
                }
            }

            bool sawDelimiter = true;

            for (; ;)
            {
                SingleID single = ParseSingleID(id, pos, dir);
                if (single == null)
                {
                    break;
                }
                if (dir == Forward)
                {
                    list.Add(single);
                }
                else
                {
                    list.Insert(0, single);
                }
                if (!Utility.ParseChar(id, pos, ID_DELIM))
                {
                    sawDelimiter = false;
                    break;
                }
            }

            if (list.Count == 0)
            {
                return(false);
            }

            // Construct canonical ID
            for (int i = 0; i < list.Count; ++i)
            {
                SingleID single = list[i];
                canonID.Append(single.CanonID);
                if (i != (list.Count - 1))
                {
                    canonID.Append(ID_DELIM);
                }
            }

            // Parse trailing global filter, if any, and only if we saw
            // a trailing delimiter after the IDs.
            if (sawDelimiter)
            {
                withParens[0] = 1; // parens required
                filter        = ParseGlobalFilter(id, pos, dir, withParens, canonID);
                if (filter != null)
                {
                    // Don't require trailing ';', but parse it if present
                    Utility.ParseChar(id, pos, ID_DELIM);

                    if (dir == Reverse)
                    {
                        globalFilter[0] = filter;
                    }
                }
            }

            // Trailing unparsed text is a syntax error
            pos[0] = PatternProps.SkipWhiteSpace(id, pos[0]);
            if (pos[0] != id.Length)
            {
                return(false);
            }

            return(true);
        }
예제 #2
0
        //----------------------------------------------------------------
        // Private implementation
        //----------------------------------------------------------------

        /**
         * Parse an ID into component pieces.  Take IDs of the form T,
         * T/V, S-T, S-T/V, or S/V-T.  If the source is missing, return a
         * source of ANY.
         * @param id the id string, in any of several forms
         * @param pos INPUT-OUTPUT parameter.  On input, pos[0] is the
         * offset of the first character to parse in id.  On output,
         * pos[0] is the offset after the last parsed character.  If the
         * parse failed, pos[0] will be unchanged.
         * @param allowFilter if true, a UnicodeSet pattern is allowed
         * at any location between specs or delimiters, and is returned
         * as the fifth string in the array.
         * @return a Specs object, or null if the parse failed.  If
         * neither source nor target was seen in the parsed id, then the
         * parse fails.  If allowFilter is true, then the parsed filter
         * pattern is returned in the Specs object, otherwise the returned
         * filter reference is null.  If the parse fails for any reason
         * null is returned.
         */
        private static Specs ParseFilterID(string id, int[] pos,
                                           bool allowFilter)
        {
            string first     = null;
            string source    = null;
            string target    = null;
            string variant   = null;
            string filter    = null;
            char   delimiter = (char)0;
            int    specCount = 0;
            int    start     = pos[0];

            // This loop parses one of the following things with each
            // pass: a filter, a delimiter character (either '-' or '/'),
            // or a spec (source, target, or variant).
            for (; ;)
            {
                pos[0] = PatternProps.SkipWhiteSpace(id, pos[0]);
                if (pos[0] == id.Length)
                {
                    break;
                }

                // Parse filters
                if (allowFilter && filter == null &&
                    UnicodeSet.ResemblesPattern(id, pos[0]))
                {
                    ParsePosition ppos = new ParsePosition(pos[0]);
                    // Parse the set to get the position.
                    new UnicodeSet(id, ppos, null);
                    filter = id.Substring(pos[0], ppos.Index - pos[0]); // ICU4N: Corrected 2nd parameter
                    pos[0] = ppos.Index;
                    continue;
                }

                if (delimiter == 0)
                {
                    char c = id[pos[0]];
                    if ((c == TARGET_SEP && target == null) ||
                        (c == VARIANT_SEP && variant == null))
                    {
                        delimiter = c;
                        ++pos[0];
                        continue;
                    }
                }

                // We are about to try to parse a spec with no delimiter
                // when we can no longer do so (we can only do so at the
                // start); break.
                if (delimiter == 0 && specCount > 0)
                {
                    break;
                }

                string spec = Utility.ParseUnicodeIdentifier(id, pos);
                if (spec == null)
                {
                    // Note that if there was a trailing delimiter, we
                    // consume it.  So Foo-, Foo/, Foo-Bar/, and Foo/Bar-
                    // are legal.
                    break;
                }

                switch (delimiter)
                {
                case (char)0:
                    first = spec;
                    break;

                case TARGET_SEP:
                    target = spec;
                    break;

                case VARIANT_SEP:
                    variant = spec;
                    break;
                }
                ++specCount;
                delimiter = (char)0;
            }

            // A spec with no prior character is either source or target,
            // depending on whether an explicit "-target" was seen.
            if (first != null)
            {
                if (target == null)
                {
                    target = first;
                }
                else
                {
                    source = first;
                }
            }

            // Must have either source or target
            if (source == null && target == null)
            {
                pos[0] = start;
                return(null);
            }

            // Empty source or target defaults to ANY
            bool sawSource = true;

            if (source == null)
            {
                source    = ANY;
                sawSource = false;
            }
            if (target == null)
            {
                target = ANY;
            }

            return(new Specs(source, target, variant, sawSource, filter));
        }
예제 #3
0
        /**
         * Parse a global filter of the form "[f]" or "([f])", depending
         * on 'withParens'.
         * @param id the pattern the parse
         * @param pos INPUT-OUTPUT parameter.  On input, the position of
         * the first character to parse.  On output, the position after
         * the last character parsed.
         * @param dir the direction.
         * @param withParens INPUT-OUTPUT parameter.  On entry, if
         * withParens[0] is 0, then parens are disallowed.  If it is 1,
         * then parens are requires.  If it is -1, then parens are
         * optional, and the return result will be set to 0 or 1.
         * @param canonID OUTPUT parameter.  The pattern for the filter
         * added to the canonID, either at the end, if dir is FORWARD, or
         * at the start, if dir is REVERSE.  The pattern will be enclosed
         * in parentheses if appropriate, and will be suffixed with an
         * ID_DELIM character.  May be null.
         * @return a UnicodeSet object or null.  A non-null results
         * indicates a successful parse, regardless of whether the filter
         * applies to the given direction.  The caller should discard it
         * if withParens != (dir == REVERSE).
         */
        public static UnicodeSet ParseGlobalFilter(string id, int[] pos, TransliterationDirection dir,
                                                   int[] withParens,
                                                   StringBuffer canonID)
        {
            UnicodeSet filter = null;
            int        start  = pos[0];

            if (withParens[0] == -1)
            {
                withParens[0] = Utility.ParseChar(id, pos, OPEN_REV) ? 1 : 0;
            }
            else if (withParens[0] == 1)
            {
                if (!Utility.ParseChar(id, pos, OPEN_REV))
                {
                    pos[0] = start;
                    return(null);
                }
            }

            pos[0] = PatternProps.SkipWhiteSpace(id, pos[0]);

            if (UnicodeSet.ResemblesPattern(id, pos[0]))
            {
                ParsePosition ppos = new ParsePosition(pos[0]);
                try
                {
                    filter = new UnicodeSet(id, ppos, null);
                }
                catch (ArgumentException)
                {
                    pos[0] = start;
                    return(null);
                }

                string pattern = id.Substring(pos[0], ppos.Index - pos[0]); // ICU4N: Corrected 2nd parameter
                pos[0] = ppos.Index;

                if (withParens[0] == 1 && !Utility.ParseChar(id, pos, CLOSE_REV))
                {
                    pos[0] = start;
                    return(null);
                }

                // In the forward direction, append the pattern to the
                // canonID.  In the reverse, insert it at zero, and invert
                // the presence of parens ("A" <-> "(A)").
                if (canonID != null)
                {
                    if (dir == Forward)
                    {
                        if (withParens[0] == 1)
                        {
                            pattern = OPEN_REV + pattern + CLOSE_REV;
                        }
                        canonID.Append(pattern + ID_DELIM);
                    }
                    else
                    {
                        if (withParens[0] == 0)
                        {
                            pattern = OPEN_REV + pattern + CLOSE_REV;
                        }
                        canonID.Insert(0, pattern + ID_DELIM);
                    }
                }
            }

            return(filter);
        }