/** * Parse a compound ID, consisting of an optional forward global * filter, a separator, one or more single IDs delimited by * separators, an an optional reverse global filter. The * separator is a semicolon. The global filters are UnicodeSet * patterns. The reverse global filter must be enclosed in * parentheses. * @param id the pattern the parse * @param dir the direction. * @param canonID OUTPUT parameter that receives the canonical ID, * consisting of canonical IDs for all elements, as returned by * parseSingleID(), separated by semicolons. Previous contents * are discarded. * @param list OUTPUT parameter that receives a list of SingleID * objects representing the parsed IDs. Previous contents are * discarded. * @param globalFilter OUTPUT parameter that receives a pointer to * a newly created global filter for this ID in this direction, or * null if there is none. * @return true if the parse succeeds, that is, if the entire * id is consumed without syntax error. */ public static bool ParseCompoundID(string id, TransliterationDirection dir, StringBuffer canonID, IList <SingleID> list, UnicodeSet[] globalFilter) { int[] pos = new int[] { 0 }; int[] withParens = new int[1]; list.Clear(); UnicodeSet filter; globalFilter[0] = null; canonID.Length = 0; // Parse leading global filter, if any withParens[0] = 0; // parens disallowed filter = ParseGlobalFilter(id, pos, dir, withParens, canonID); if (filter != null) { if (!Utility.ParseChar(id, pos, ID_DELIM)) { // Not a global filter; backup and resume canonID.Length = 0; pos[0] = 0; } if (dir == Forward) { globalFilter[0] = filter; } } bool sawDelimiter = true; for (; ;) { SingleID single = ParseSingleID(id, pos, dir); if (single == null) { break; } if (dir == Forward) { list.Add(single); } else { list.Insert(0, single); } if (!Utility.ParseChar(id, pos, ID_DELIM)) { sawDelimiter = false; break; } } if (list.Count == 0) { return(false); } // Construct canonical ID for (int i = 0; i < list.Count; ++i) { SingleID single = list[i]; canonID.Append(single.CanonID); if (i != (list.Count - 1)) { canonID.Append(ID_DELIM); } } // Parse trailing global filter, if any, and only if we saw // a trailing delimiter after the IDs. if (sawDelimiter) { withParens[0] = 1; // parens required filter = ParseGlobalFilter(id, pos, dir, withParens, canonID); if (filter != null) { // Don't require trailing ';', but parse it if present Utility.ParseChar(id, pos, ID_DELIM); if (dir == Reverse) { globalFilter[0] = filter; } } } // Trailing unparsed text is a syntax error pos[0] = PatternProps.SkipWhiteSpace(id, pos[0]); if (pos[0] != id.Length) { return(false); } return(true); }
//---------------------------------------------------------------- // Private implementation //---------------------------------------------------------------- /** * Parse an ID into component pieces. Take IDs of the form T, * T/V, S-T, S-T/V, or S/V-T. If the source is missing, return a * source of ANY. * @param id the id string, in any of several forms * @param pos INPUT-OUTPUT parameter. On input, pos[0] is the * offset of the first character to parse in id. On output, * pos[0] is the offset after the last parsed character. If the * parse failed, pos[0] will be unchanged. * @param allowFilter if true, a UnicodeSet pattern is allowed * at any location between specs or delimiters, and is returned * as the fifth string in the array. * @return a Specs object, or null if the parse failed. If * neither source nor target was seen in the parsed id, then the * parse fails. If allowFilter is true, then the parsed filter * pattern is returned in the Specs object, otherwise the returned * filter reference is null. If the parse fails for any reason * null is returned. */ private static Specs ParseFilterID(string id, int[] pos, bool allowFilter) { string first = null; string source = null; string target = null; string variant = null; string filter = null; char delimiter = (char)0; int specCount = 0; int start = pos[0]; // This loop parses one of the following things with each // pass: a filter, a delimiter character (either '-' or '/'), // or a spec (source, target, or variant). for (; ;) { pos[0] = PatternProps.SkipWhiteSpace(id, pos[0]); if (pos[0] == id.Length) { break; } // Parse filters if (allowFilter && filter == null && UnicodeSet.ResemblesPattern(id, pos[0])) { ParsePosition ppos = new ParsePosition(pos[0]); // Parse the set to get the position. new UnicodeSet(id, ppos, null); filter = id.Substring(pos[0], ppos.Index - pos[0]); // ICU4N: Corrected 2nd parameter pos[0] = ppos.Index; continue; } if (delimiter == 0) { char c = id[pos[0]]; if ((c == TARGET_SEP && target == null) || (c == VARIANT_SEP && variant == null)) { delimiter = c; ++pos[0]; continue; } } // We are about to try to parse a spec with no delimiter // when we can no longer do so (we can only do so at the // start); break. if (delimiter == 0 && specCount > 0) { break; } string spec = Utility.ParseUnicodeIdentifier(id, pos); if (spec == null) { // Note that if there was a trailing delimiter, we // consume it. So Foo-, Foo/, Foo-Bar/, and Foo/Bar- // are legal. break; } switch (delimiter) { case (char)0: first = spec; break; case TARGET_SEP: target = spec; break; case VARIANT_SEP: variant = spec; break; } ++specCount; delimiter = (char)0; } // A spec with no prior character is either source or target, // depending on whether an explicit "-target" was seen. if (first != null) { if (target == null) { target = first; } else { source = first; } } // Must have either source or target if (source == null && target == null) { pos[0] = start; return(null); } // Empty source or target defaults to ANY bool sawSource = true; if (source == null) { source = ANY; sawSource = false; } if (target == null) { target = ANY; } return(new Specs(source, target, variant, sawSource, filter)); }
/** * Parse a global filter of the form "[f]" or "([f])", depending * on 'withParens'. * @param id the pattern the parse * @param pos INPUT-OUTPUT parameter. On input, the position of * the first character to parse. On output, the position after * the last character parsed. * @param dir the direction. * @param withParens INPUT-OUTPUT parameter. On entry, if * withParens[0] is 0, then parens are disallowed. If it is 1, * then parens are requires. If it is -1, then parens are * optional, and the return result will be set to 0 or 1. * @param canonID OUTPUT parameter. The pattern for the filter * added to the canonID, either at the end, if dir is FORWARD, or * at the start, if dir is REVERSE. The pattern will be enclosed * in parentheses if appropriate, and will be suffixed with an * ID_DELIM character. May be null. * @return a UnicodeSet object or null. A non-null results * indicates a successful parse, regardless of whether the filter * applies to the given direction. The caller should discard it * if withParens != (dir == REVERSE). */ public static UnicodeSet ParseGlobalFilter(string id, int[] pos, TransliterationDirection dir, int[] withParens, StringBuffer canonID) { UnicodeSet filter = null; int start = pos[0]; if (withParens[0] == -1) { withParens[0] = Utility.ParseChar(id, pos, OPEN_REV) ? 1 : 0; } else if (withParens[0] == 1) { if (!Utility.ParseChar(id, pos, OPEN_REV)) { pos[0] = start; return(null); } } pos[0] = PatternProps.SkipWhiteSpace(id, pos[0]); if (UnicodeSet.ResemblesPattern(id, pos[0])) { ParsePosition ppos = new ParsePosition(pos[0]); try { filter = new UnicodeSet(id, ppos, null); } catch (ArgumentException) { pos[0] = start; return(null); } string pattern = id.Substring(pos[0], ppos.Index - pos[0]); // ICU4N: Corrected 2nd parameter pos[0] = ppos.Index; if (withParens[0] == 1 && !Utility.ParseChar(id, pos, CLOSE_REV)) { pos[0] = start; return(null); } // In the forward direction, append the pattern to the // canonID. In the reverse, insert it at zero, and invert // the presence of parens ("A" <-> "(A)"). if (canonID != null) { if (dir == Forward) { if (withParens[0] == 1) { pattern = OPEN_REV + pattern + CLOSE_REV; } canonID.Append(pattern + ID_DELIM); } else { if (withParens[0] == 0) { pattern = OPEN_REV + pattern + CLOSE_REV; } canonID.Insert(0, pattern + ID_DELIM); } } } return(filter); }