private int SkipWhiteSpace(int i) { while (i < rules.Length && PatternProps.IsWhiteSpace(rules[i])) { ++i; } return(i); }
private CollationStrength ParseResetAndPosition() { int i = SkipWhiteSpace(ruleIndex + 1); int j; char c; CollationStrength resetStrength; if (rules.RegionMatches(i, BEFORE, 0, BEFORE.Length, StringComparison.Ordinal) && (j = i + BEFORE.Length) < rules.Length && PatternProps.IsWhiteSpace(rules[j]) && ((j = SkipWhiteSpace(j + 1)) + 1) < rules.Length && 0x31 <= (c = rules[j]) && c <= 0x33 && rules[j + 1] == 0x5d) { // &[before n] with n=1 or 2 or 3 resetStrength = CollationStrength.Primary + (c - 0x31); i = SkipWhiteSpace(j + 2); } else { resetStrength = CollationStrength.Identical; } if (i >= rules.Length) { SetParseError("reset without position"); return((CollationStrength)UCOL_DEFAULT); } if (rules[i] == 0x5b) { // '[' i = ParseSpecialPosition(i, rawBuilder.Value); } else { i = ParseTailoringString(i, rawBuilder.Value); } try { sink.AddReset(resetStrength, rawBuilder); } catch (Exception e) { SetParseError("adding reset failed", e); return((CollationStrength)UCOL_DEFAULT); } ruleIndex = i; return(resetStrength); }
private void Parse(string ruleString) { rules = ruleString; ruleIndex = 0; while (ruleIndex < rules.Length) { char c = rules[ruleIndex]; if (PatternProps.IsWhiteSpace(c)) { ++ruleIndex; continue; } switch (c) { case '&': ParseRuleChain(); break; case '[': ParseSetting(); break; case '#': // starts a comment, until the end of the line ruleIndex = SkipComment(ruleIndex + 1); break; case '@': // is equivalent to [backwards 2] settings.SetFlag(CollationSettings.BackwardSecondary, true); ++ruleIndex; break; case '!': // '!' used to turn on Thai/Lao character reversal // Accept but ignore. The root collator has contractions // that are equivalent to the character reversal, where appropriate. ++ruleIndex; break; default: SetParseError("expected a reset or setting or comment"); break; } } }
private int ReadWords(int i, StringBuilder raw) { raw.Length = 0; i = SkipWhiteSpace(i); for (; ;) { if (i >= rules.Length) { return(0); } char c = rules[i]; if (IsSyntaxChar(c) && c != 0x2d && c != 0x5f) { // syntax except -_ if (raw.Length == 0) { return(i); } int lastIndex = raw.Length - 1; if (raw[lastIndex] == ' ') { // remove trailing space raw.Length = lastIndex; } return(i); } if (PatternProps.IsWhiteSpace(c)) { raw.Append(' '); i = SkipWhiteSpace(i + 1); } else { raw.Append(c); ++i; } } }
//---------------------------------------------------------------- // Private implementation //---------------------------------------------------------------- /** * Parse an ID into component pieces. Take IDs of the form T, * T/V, S-T, S-T/V, or S/V-T. If the source is missing, return a * source of ANY. * @param id the id string, in any of several forms * @param pos INPUT-OUTPUT parameter. On input, pos[0] is the * offset of the first character to parse in id. On output, * pos[0] is the offset after the last parsed character. If the * parse failed, pos[0] will be unchanged. * @param allowFilter if true, a UnicodeSet pattern is allowed * at any location between specs or delimiters, and is returned * as the fifth string in the array. * @return a Specs object, or null if the parse failed. If * neither source nor target was seen in the parsed id, then the * parse fails. If allowFilter is true, then the parsed filter * pattern is returned in the Specs object, otherwise the returned * filter reference is null. If the parse fails for any reason * null is returned. */ private static Specs ParseFilterID(string id, int[] pos, bool allowFilter) { string first = null; string source = null; string target = null; string variant = null; string filter = null; char delimiter = (char)0; int specCount = 0; int start = pos[0]; // This loop parses one of the following things with each // pass: a filter, a delimiter character (either '-' or '/'), // or a spec (source, target, or variant). for (; ;) { pos[0] = PatternProps.SkipWhiteSpace(id, pos[0]); if (pos[0] == id.Length) { break; } // Parse filters if (allowFilter && filter == null && UnicodeSet.ResemblesPattern(id, pos[0])) { ParsePosition ppos = new ParsePosition(pos[0]); // Parse the set to get the position. new UnicodeSet(id, ppos, null); filter = id.Substring(pos[0], ppos.Index - pos[0]); // ICU4N: Corrected 2nd parameter pos[0] = ppos.Index; continue; } if (delimiter == 0) { char c = id[pos[0]]; if ((c == TARGET_SEP && target == null) || (c == VARIANT_SEP && variant == null)) { delimiter = c; ++pos[0]; continue; } } // We are about to try to parse a spec with no delimiter // when we can no longer do so (we can only do so at the // start); break. if (delimiter == 0 && specCount > 0) { break; } string spec = Utility.ParseUnicodeIdentifier(id, pos); if (spec == null) { // Note that if there was a trailing delimiter, we // consume it. So Foo-, Foo/, Foo-Bar/, and Foo/Bar- // are legal. break; } switch (delimiter) { case (char)0: first = spec; break; case TARGET_SEP: target = spec; break; case VARIANT_SEP: variant = spec; break; } ++specCount; delimiter = (char)0; } // A spec with no prior character is either source or target, // depending on whether an explicit "-target" was seen. if (first != null) { if (target == null) { target = first; } else { source = first; } } // Must have either source or target if (source == null && target == null) { pos[0] = start; return(null); } // Empty source or target defaults to ANY bool sawSource = true; if (source == null) { source = ANY; sawSource = false; } if (target == null) { target = ANY; } return(new Specs(source, target, variant, sawSource, filter)); }
/** * Parse a compound ID, consisting of an optional forward global * filter, a separator, one or more single IDs delimited by * separators, an an optional reverse global filter. The * separator is a semicolon. The global filters are UnicodeSet * patterns. The reverse global filter must be enclosed in * parentheses. * @param id the pattern the parse * @param dir the direction. * @param canonID OUTPUT parameter that receives the canonical ID, * consisting of canonical IDs for all elements, as returned by * parseSingleID(), separated by semicolons. Previous contents * are discarded. * @param list OUTPUT parameter that receives a list of SingleID * objects representing the parsed IDs. Previous contents are * discarded. * @param globalFilter OUTPUT parameter that receives a pointer to * a newly created global filter for this ID in this direction, or * null if there is none. * @return true if the parse succeeds, that is, if the entire * id is consumed without syntax error. */ public static bool ParseCompoundID(string id, TransliterationDirection dir, StringBuffer canonID, IList <SingleID> list, UnicodeSet[] globalFilter) { int[] pos = new int[] { 0 }; int[] withParens = new int[1]; list.Clear(); UnicodeSet filter; globalFilter[0] = null; canonID.Length = 0; // Parse leading global filter, if any withParens[0] = 0; // parens disallowed filter = ParseGlobalFilter(id, pos, dir, withParens, canonID); if (filter != null) { if (!Utility.ParseChar(id, pos, ID_DELIM)) { // Not a global filter; backup and resume canonID.Length = 0; pos[0] = 0; } if (dir == Forward) { globalFilter[0] = filter; } } bool sawDelimiter = true; for (; ;) { SingleID single = ParseSingleID(id, pos, dir); if (single == null) { break; } if (dir == Forward) { list.Add(single); } else { list.Insert(0, single); } if (!Utility.ParseChar(id, pos, ID_DELIM)) { sawDelimiter = false; break; } } if (list.Count == 0) { return(false); } // Construct canonical ID for (int i = 0; i < list.Count; ++i) { SingleID single = list[i]; canonID.Append(single.CanonID); if (i != (list.Count - 1)) { canonID.Append(ID_DELIM); } } // Parse trailing global filter, if any, and only if we saw // a trailing delimiter after the IDs. if (sawDelimiter) { withParens[0] = 1; // parens required filter = ParseGlobalFilter(id, pos, dir, withParens, canonID); if (filter != null) { // Don't require trailing ';', but parse it if present Utility.ParseChar(id, pos, ID_DELIM); if (dir == Reverse) { globalFilter[0] = filter; } } } // Trailing unparsed text is a syntax error pos[0] = PatternProps.SkipWhiteSpace(id, pos[0]); if (pos[0] != id.Length) { return(false); } return(true); }
/** * Parse a global filter of the form "[f]" or "([f])", depending * on 'withParens'. * @param id the pattern the parse * @param pos INPUT-OUTPUT parameter. On input, the position of * the first character to parse. On output, the position after * the last character parsed. * @param dir the direction. * @param withParens INPUT-OUTPUT parameter. On entry, if * withParens[0] is 0, then parens are disallowed. If it is 1, * then parens are requires. If it is -1, then parens are * optional, and the return result will be set to 0 or 1. * @param canonID OUTPUT parameter. The pattern for the filter * added to the canonID, either at the end, if dir is FORWARD, or * at the start, if dir is REVERSE. The pattern will be enclosed * in parentheses if appropriate, and will be suffixed with an * ID_DELIM character. May be null. * @return a UnicodeSet object or null. A non-null results * indicates a successful parse, regardless of whether the filter * applies to the given direction. The caller should discard it * if withParens != (dir == REVERSE). */ public static UnicodeSet ParseGlobalFilter(string id, int[] pos, TransliterationDirection dir, int[] withParens, StringBuffer canonID) { UnicodeSet filter = null; int start = pos[0]; if (withParens[0] == -1) { withParens[0] = Utility.ParseChar(id, pos, OPEN_REV) ? 1 : 0; } else if (withParens[0] == 1) { if (!Utility.ParseChar(id, pos, OPEN_REV)) { pos[0] = start; return(null); } } pos[0] = PatternProps.SkipWhiteSpace(id, pos[0]); if (UnicodeSet.ResemblesPattern(id, pos[0])) { ParsePosition ppos = new ParsePosition(pos[0]); try { filter = new UnicodeSet(id, ppos, null); } catch (ArgumentException) { pos[0] = start; return(null); } string pattern = id.Substring(pos[0], ppos.Index - pos[0]); // ICU4N: Corrected 2nd parameter pos[0] = ppos.Index; if (withParens[0] == 1 && !Utility.ParseChar(id, pos, CLOSE_REV)) { pos[0] = start; return(null); } // In the forward direction, append the pattern to the // canonID. In the reverse, insert it at zero, and invert // the presence of parens ("A" <-> "(A)"). if (canonID != null) { if (dir == Forward) { if (withParens[0] == 1) { pattern = OPEN_REV + pattern + CLOSE_REV; } canonID.Append(pattern + ID_DELIM); } else { if (withParens[0] == 0) { pattern = OPEN_REV + pattern + CLOSE_REV; } canonID.Insert(0, pattern + ID_DELIM); } } } return(filter); }
/// <summary> /// Implements <see cref="Transliterator.HandleTransliterate(IReplaceable, Position, bool)"/>. /// </summary> protected override void HandleTransliterate(IReplaceable text, Position offsets, bool isIncremental) { int maxLen = UCharacterName.Instance.MaxCharNameLength + 1; // allow for temporary trailing space StringBuffer name = new StringBuffer(maxLen); // Get the legal character set UnicodeSet legal = new UnicodeSet(); UCharacterName.Instance.GetCharNameCharacters(legal); int cursor = offsets.Start; int limit = offsets.Limit; // Modes: // 0 - looking for open delimiter // 1 - after open delimiter int mode = 0; int openPos = -1; // open delim candidate pos int c; while (cursor < limit) { c = text.Char32At(cursor); switch (mode) { case 0: // looking for open delimiter if (c == OPEN_DELIM) { // quick check first openPos = cursor; int i = Utility.ParsePattern(OPEN_PAT, text, cursor, limit); if (i >= 0 && i < limit) { mode = 1; name.Length = 0; cursor = i; continue; // *** reprocess char32At(cursor) } } break; case 1: // after open delimiter // Look for legal chars. If \s+ is found, convert it // to a single space. If closeDelimiter is found, exit // the loop. If any other character is found, exit the // loop. If the limit is reached, exit the loop. // Convert \s+ => SPACE. This assumes there are no // runs of >1 space characters in names. if (PatternProps.IsWhiteSpace(c)) { // Ignore leading whitespace if (name.Length > 0 && name[name.Length - 1] != SPACE) { name.Append(SPACE); // If we are too long then abort. maxLen includes // temporary trailing space, so use '>'. if (name.Length > maxLen) { mode = 0; } } break; } if (c == CLOSE_DELIM) { int len = name.Length; // Delete trailing space, if any if (len > 0 && name[len - 1] == SPACE) { name.Length = --len; } c = UCharacter.GetCharFromExtendedName(name.ToString()); if (c != -1) { // Lookup succeeded // assert(UTF16.getCharCount(CLOSE_DELIM) == 1); cursor++; // advance over CLOSE_DELIM string str = UTF16.ValueOf(c); text.Replace(openPos, cursor, str); // Adjust indices for the change in the length of // the string. Do not assume that str.length() == // 1, in case of surrogates. int delta = cursor - openPos - str.Length; cursor -= delta; limit -= delta; // assert(cursor == openPos + str.length()); } // If the lookup failed, we leave things as-is and // still switch to mode 0 and continue. mode = 0; openPos = -1; // close off candidate continue; // *** reprocess char32At(cursor) } if (legal.Contains(c)) { UTF16.Append(name, c); // If we go past the longest possible name then abort. // maxLen includes temporary trailing space, so use '>='. if (name.Length >= maxLen) { mode = 0; } } // Invalid character else { --cursor; // Backup and reprocess this character mode = 0; } break; } cursor += UTF16.GetCharCount(c); } offsets.ContextLimit += limit - offsets.Limit; offsets.Limit = limit; // In incremental mode, only advance the cursor up to the last // open delimiter candidate. offsets.Start = (isIncremental && openPos >= 0) ? openPos : cursor; }
/// <summary> /// Selects the phrase for the given <paramref name="keyword"/>. /// </summary> /// <param name="keyword">A phrase selection keyword.</param> /// <returns>The string containing the formatted select message.</returns> /// <exception cref="ArgumentException">When the given keyword is not a "pattern identifier".</exception> /// <stable>ICU 4.4</stable> public string Format(string keyword) { //Check for the validity of the keyword if (!PatternProps.IsIdentifier(keyword)) { throw new ArgumentException("Invalid formatting argument."); } // If no pattern was applied, throw an exception if (msgPattern == null || msgPattern.CountParts() == 0) { throw new InvalidOperationException("Invalid format error."); } // Get the appropriate sub-message. int msgStart = FindSubMessage(msgPattern, 0, keyword); if (!msgPattern.JdkAposMode) { int msgLimit = msgPattern.GetLimitPartIndex(msgStart); return(msgPattern.PatternString.Substring(msgPattern.GetPart(msgStart).Limit, msgPattern.GetPatternIndex(msgLimit))); } // JDK compatibility mode: Remove SKIP_SYNTAX. StringBuilder result = null; int prevIndex = msgPattern.GetPart(msgStart).Limit; for (int i = msgStart; ;) { MessagePatternPart part = msgPattern.GetPart(++i); MessagePatternPartType type = part.Type; int index = part.Index; if (type == MessagePatternPartType.MsgLimit) { if (result == null) { return(pattern.Substring(prevIndex, index - prevIndex)); // ICU4N: Corrected 2nd arg } else { return(result.Append(pattern, prevIndex, index).ToString()); } } else if (type == MessagePatternPartType.SkipSyntax) { if (result == null) { result = new StringBuilder(); } result.Append(pattern, prevIndex, index); prevIndex = part.Limit; } else if (type == MessagePatternPartType.ArgStart) { if (result == null) { result = new StringBuilder(); } result.Append(pattern, prevIndex, index); prevIndex = index; i = msgPattern.GetLimitPartIndex(i); index = msgPattern.GetPart(i).Limit; MessagePattern.AppendReducedApostrophes(pattern, prevIndex, index, result); prevIndex = index; } } }
private int ParseString(int i, StringBuilder raw) { raw.Length = 0; while (i < rules.Length) { char c = rules[i++]; if (IsSyntaxChar(c)) { if (c == 0x27) { // apostrophe if (i < rules.Length && rules[i] == 0x27) { // Double apostrophe, encodes a single one. raw.Append((char)0x27); ++i; continue; } // Quote literal text until the next single apostrophe. for (; ;) { if (i == rules.Length) { SetParseError("quoted literal text missing terminating apostrophe"); return(i); } c = rules[i++]; if (c == 0x27) { if (i < rules.Length && rules[i] == 0x27) { // Double apostrophe inside quoted literal text, // still encodes a single apostrophe. ++i; } else { break; } } raw.Append(c); } } else if (c == 0x5c) { // backslash if (i == rules.Length) { SetParseError("backslash escape at the end of the rule string"); return(i); } int cp = rules.CodePointAt(i); raw.AppendCodePoint(cp); i += Character.CharCount(cp); } else { // Any other syntax character terminates a string. --i; break; } } else if (PatternProps.IsWhiteSpace(c)) { // Unquoted white space terminates a string. --i; break; } else { raw.Append(c); } } for (int j = 0; j < raw.Length;) { int c = raw.CodePointAt(j); if (IsSurrogate(c)) { SetParseError("string contains an unpaired surrogate"); return(i); } if (0xfffd <= c && c <= 0xffff) { SetParseError("string contains U+FFFD, U+FFFE or U+FFFF"); return(i); } j += Character.CharCount(c); } return(i); }