private int SkipWhiteSpace(int i) { while (i < rules.Length && PatternProps.IsWhiteSpace(rules[i])) { ++i; } return(i); }
private CollationStrength ParseResetAndPosition() { int i = SkipWhiteSpace(ruleIndex + 1); int j; char c; CollationStrength resetStrength; if (rules.RegionMatches(i, BEFORE, 0, BEFORE.Length, StringComparison.Ordinal) && (j = i + BEFORE.Length) < rules.Length && PatternProps.IsWhiteSpace(rules[j]) && ((j = SkipWhiteSpace(j + 1)) + 1) < rules.Length && 0x31 <= (c = rules[j]) && c <= 0x33 && rules[j + 1] == 0x5d) { // &[before n] with n=1 or 2 or 3 resetStrength = CollationStrength.Primary + (c - 0x31); i = SkipWhiteSpace(j + 2); } else { resetStrength = CollationStrength.Identical; } if (i >= rules.Length) { SetParseError("reset without position"); return((CollationStrength)UCOL_DEFAULT); } if (rules[i] == 0x5b) { // '[' i = ParseSpecialPosition(i, rawBuilder.Value); } else { i = ParseTailoringString(i, rawBuilder.Value); } try { sink.AddReset(resetStrength, rawBuilder); } catch (Exception e) { SetParseError("adding reset failed", e); return((CollationStrength)UCOL_DEFAULT); } ruleIndex = i; return(resetStrength); }
private void Parse(string ruleString) { rules = ruleString; ruleIndex = 0; while (ruleIndex < rules.Length) { char c = rules[ruleIndex]; if (PatternProps.IsWhiteSpace(c)) { ++ruleIndex; continue; } switch (c) { case '&': ParseRuleChain(); break; case '[': ParseSetting(); break; case '#': // starts a comment, until the end of the line ruleIndex = SkipComment(ruleIndex + 1); break; case '@': // is equivalent to [backwards 2] settings.SetFlag(CollationSettings.BackwardSecondary, true); ++ruleIndex; break; case '!': // '!' used to turn on Thai/Lao character reversal // Accept but ignore. The root collator has contractions // that are equivalent to the character reversal, where appropriate. ++ruleIndex; break; default: SetParseError("expected a reset or setting or comment"); break; } } }
private int ReadWords(int i, StringBuilder raw) { raw.Length = 0; i = SkipWhiteSpace(i); for (; ;) { if (i >= rules.Length) { return(0); } char c = rules[i]; if (IsSyntaxChar(c) && c != 0x2d && c != 0x5f) { // syntax except -_ if (raw.Length == 0) { return(i); } int lastIndex = raw.Length - 1; if (raw[lastIndex] == ' ') { // remove trailing space raw.Length = lastIndex; } return(i); } if (PatternProps.IsWhiteSpace(c)) { raw.Append(' '); i = SkipWhiteSpace(i + 1); } else { raw.Append(c); ++i; } } }
/// <summary> /// Implements <see cref="Transliterator.HandleTransliterate(IReplaceable, Position, bool)"/>. /// </summary> protected override void HandleTransliterate(IReplaceable text, Position offsets, bool isIncremental) { int maxLen = UCharacterName.Instance.MaxCharNameLength + 1; // allow for temporary trailing space StringBuffer name = new StringBuffer(maxLen); // Get the legal character set UnicodeSet legal = new UnicodeSet(); UCharacterName.Instance.GetCharNameCharacters(legal); int cursor = offsets.Start; int limit = offsets.Limit; // Modes: // 0 - looking for open delimiter // 1 - after open delimiter int mode = 0; int openPos = -1; // open delim candidate pos int c; while (cursor < limit) { c = text.Char32At(cursor); switch (mode) { case 0: // looking for open delimiter if (c == OPEN_DELIM) { // quick check first openPos = cursor; int i = Utility.ParsePattern(OPEN_PAT, text, cursor, limit); if (i >= 0 && i < limit) { mode = 1; name.Length = 0; cursor = i; continue; // *** reprocess char32At(cursor) } } break; case 1: // after open delimiter // Look for legal chars. If \s+ is found, convert it // to a single space. If closeDelimiter is found, exit // the loop. If any other character is found, exit the // loop. If the limit is reached, exit the loop. // Convert \s+ => SPACE. This assumes there are no // runs of >1 space characters in names. if (PatternProps.IsWhiteSpace(c)) { // Ignore leading whitespace if (name.Length > 0 && name[name.Length - 1] != SPACE) { name.Append(SPACE); // If we are too long then abort. maxLen includes // temporary trailing space, so use '>'. if (name.Length > maxLen) { mode = 0; } } break; } if (c == CLOSE_DELIM) { int len = name.Length; // Delete trailing space, if any if (len > 0 && name[len - 1] == SPACE) { name.Length = --len; } c = UCharacter.GetCharFromExtendedName(name.ToString()); if (c != -1) { // Lookup succeeded // assert(UTF16.getCharCount(CLOSE_DELIM) == 1); cursor++; // advance over CLOSE_DELIM string str = UTF16.ValueOf(c); text.Replace(openPos, cursor, str); // Adjust indices for the change in the length of // the string. Do not assume that str.length() == // 1, in case of surrogates. int delta = cursor - openPos - str.Length; cursor -= delta; limit -= delta; // assert(cursor == openPos + str.length()); } // If the lookup failed, we leave things as-is and // still switch to mode 0 and continue. mode = 0; openPos = -1; // close off candidate continue; // *** reprocess char32At(cursor) } if (legal.Contains(c)) { UTF16.Append(name, c); // If we go past the longest possible name then abort. // maxLen includes temporary trailing space, so use '>='. if (name.Length >= maxLen) { mode = 0; } } // Invalid character else { --cursor; // Backup and reprocess this character mode = 0; } break; } cursor += UTF16.GetCharCount(c); } offsets.ContextLimit += limit - offsets.Limit; offsets.Limit = limit; // In incremental mode, only advance the cursor up to the last // open delimiter candidate. offsets.Start = (isIncremental && openPos >= 0) ? openPos : cursor; }
private int ParseString(int i, StringBuilder raw) { raw.Length = 0; while (i < rules.Length) { char c = rules[i++]; if (IsSyntaxChar(c)) { if (c == 0x27) { // apostrophe if (i < rules.Length && rules[i] == 0x27) { // Double apostrophe, encodes a single one. raw.Append((char)0x27); ++i; continue; } // Quote literal text until the next single apostrophe. for (; ;) { if (i == rules.Length) { SetParseError("quoted literal text missing terminating apostrophe"); return(i); } c = rules[i++]; if (c == 0x27) { if (i < rules.Length && rules[i] == 0x27) { // Double apostrophe inside quoted literal text, // still encodes a single apostrophe. ++i; } else { break; } } raw.Append(c); } } else if (c == 0x5c) { // backslash if (i == rules.Length) { SetParseError("backslash escape at the end of the rule string"); return(i); } int cp = rules.CodePointAt(i); raw.AppendCodePoint(cp); i += Character.CharCount(cp); } else { // Any other syntax character terminates a string. --i; break; } } else if (PatternProps.IsWhiteSpace(c)) { // Unquoted white space terminates a string. --i; break; } else { raw.Append(c); } } for (int j = 0; j < raw.Length;) { int c = raw.CodePointAt(j); if (IsSurrogate(c)) { SetParseError("string contains an unpaired surrogate"); return(i); } if (0xfffd <= c && c <= 0xffff) { SetParseError("string contains U+FFFD, U+FFFE or U+FFFF"); return(i); } j += Character.CharCount(c); } return(i); }