private int Next() { int c; if (forward) { if (index < contextLimit) { c = rep.Char32At(index); index += UTF16.GetCharCount(c); return(c); } else { // forward context iteration reached the limit reachedLimit = true; } } else if (!forward && index > contextStart) { c = rep.Char32At(index - 1); index -= UTF16.GetCharCount(c); return(c); } return(-1); }
/// <summary> /// Default implementation of <see cref="IUnicodeMatcher.Matches(IReplaceable, int[], int, bool)"/> for Unicode /// filters. Matches a single 16-bit code unit at offset. /// </summary> /// <stable>ICU 2.0</stable> public virtual MatchDegree Matches(IReplaceable text, int[] offset, int limit, bool incremental) { int c; if (offset[0] < limit && Contains(c = text.Char32At(offset[0]))) { offset[0] += UTF16.GetCharCount(c); return(MatchDegree.Match); } if (offset[0] > limit && Contains(text.Char32At(offset[0]))) { // Backup offset by 1, unless the preceding character is a // surrogate pair -- then backup by 2 (keep offset pointing at // the lead surrogate). --offset[0]; if (offset[0] >= 0) { offset[0] -= UTF16.GetCharCount(text.Char32At(offset[0])) - 1; } return(MatchDegree.Match); } if (incremental && offset[0] == limit) { return(MatchDegree.PartialMatch); } return(MatchDegree.Mismatch); }
/// <summary> /// Implements <see cref="Transliterator.HandleTransliterate(IReplaceable, Position, bool)"/>. /// </summary> protected override void HandleTransliterate(IReplaceable text, Position pos, bool incremental) { int start = pos.Start; int limit = pos.Limit; StringBuilder buf = new StringBuilder(prefix); int prefixLen = prefix.Length; bool redoPrefix = false; while (start < limit) { int c = grokSupplementals ? text.Char32At(start) : text[start]; int charLen = grokSupplementals ? UTF16.GetCharCount(c) : 1; if ((c & 0xFFFF0000) != 0 && supplementalHandler != null) { buf.Length = 0; buf.Append(supplementalHandler.prefix); Utility.AppendNumber(buf, c, supplementalHandler.radix, supplementalHandler.minDigits); buf.Append(supplementalHandler.suffix); redoPrefix = true; } else { if (redoPrefix) { buf.Length = 0; buf.Append(prefix); redoPrefix = false; } else { buf.Length = prefixLen; } Utility.AppendNumber(buf, c, radix, minDigits); buf.Append(suffix); } text.Replace(start, start + charLen, buf.ToString()); start += buf.Length; limit += buf.Length - charLen; } pos.ContextLimit += limit - pos.Limit; pos.Limit = limit; pos.Start = start; }
/// <summary> /// Iterate forward through the string to fetch the next code point /// to be case-mapped, and set the context indexes for it. /// </summary> /// <returns>The next code point to be case-mapped, or <0 when the iteration is done.</returns> public virtual int NextCaseMapCP() { int c; if (cpLimit < limit) { cpStart = cpLimit; c = rep.Char32At(cpLimit); cpLimit += UTF16.GetCharCount(c); return(c); } else { return(-1); } }
/// <summary> /// Find the source and target sets, subject to the input filter. /// There is a known issue with filters containing multiple characters. /// </summary> // TODO: Problem: the rule is [{ab}]c > x // The filter is [a{bc}]. // If the input is abc, then the rule will work. // However, following code applying the filter won't catch that case. internal void AddSourceTargetSet(UnicodeSet filter, UnicodeSet sourceSet, UnicodeSet targetSet, UnicodeSet revisiting) { int limit = anteContextLength + keyLength; UnicodeSet tempSource = new UnicodeSet(); UnicodeSet temp = new UnicodeSet(); // We need to walk through the pattern. // Iff some of the characters at ALL of the the positions are matched by the filter, then we add temp to toUnionTo for (int i = anteContextLength; i < limit;) { int ch = UTF16.CharAt(pattern, i); i += UTF16.GetCharCount(ch); IUnicodeMatcher matcher = data.LookupMatcher(ch); if (matcher == null) { if (!filter.Contains(ch)) { return; } tempSource.Add(ch); } else { try { if (!filter.ContainsSome((UnicodeSet)matcher)) { return; } matcher.AddMatchSetTo(tempSource); } catch (InvalidCastException) { // if the matcher is not a UnicodeSet temp.Clear(); matcher.AddMatchSetTo(temp); if (!filter.ContainsSome(temp)) { return; } tempSource.AddAll(temp); } } } // if we made our way through the gauntlet, add to source/target sourceSet.AddAll(tempSource); output.AddReplacementSetTo(targetSet); }
/// <summary> /// Transliterate the given text with the given UTransPosition /// indices. Return TRUE if the transliteration should continue /// or FALSE if it should halt (because of a U_PARTIAL_MATCH match). /// Note that FALSE is only ever returned if isIncremental is TRUE. /// </summary> /// <param name="text">The text to be transliterated.</param> /// <param name="pos">The position indices, which will be updated.</param> /// <param name="incremental">If TRUE, assume new text may be inserted /// at index.Limit, and return FALSE if thre is a partial match.</param> /// <returns>TRUE unless a U_PARTIAL_MATCH has been obtained, /// indicating that transliteration should stop until more text /// arrives.</returns> public virtual bool Transliterate(IReplaceable text, TransliterationPosition pos, bool incremental) { int indexByte = text.Char32At(pos.Start) & 0xFF; for (int i = index[indexByte]; i < index[indexByte + 1]; ++i) { MatchDegree m = rules[i].MatchAndReplace(text, pos, incremental); switch (m) { case MatchDegree.Match: if (Transliterator.DEBUG) { Console.Out.WriteLine((incremental ? "Rule.i: match " : "Rule: match ") + rules[i].ToRule(true) + " => " + UtilityExtensions.FormatInput(text, pos)); } return(true); case MatchDegree.PartialMatch: if (Transliterator.DEBUG) { Console.Out.WriteLine((incremental ? "Rule.i: partial match " : "Rule: partial match ") + rules[i].ToRule(true) + " => " + UtilityExtensions.FormatInput(text, pos)); } return(false); default: if (Transliterator.DEBUG) { Console.Out.WriteLine("Rule: no match " + rules[i]); } break; } } // No match or partial match from any rule pos.Start += UTF16.GetCharCount(text.Char32At(pos.Start)); if (Transliterator.DEBUG) { Console.Out.WriteLine((incremental ? "Rule.i: no match => " : "Rule: no match => ") + UtilityExtensions.FormatInput(text, pos)); } return(true); }
public static void Permute(string source, bool skipZeros, ISet <string> output) { // TODO: optimize //if (PROGRESS) System.out.println("Permute: " + source); // optimization: // if zero or one character, just return a set with it // we check for length < 2 to keep from counting code points all the time if (source.Length <= 2 && UTF16.CountCodePoint(source) <= 1) { output.Add(source); return; } // otherwise iterate through the string, and recursively permute all the other characters ISet <string> subpermute = new HashSet <string>(); int cp; for (int i = 0; i < source.Length; i += UTF16.GetCharCount(cp)) { cp = UTF16.CharAt(source, i); // optimization: // if the character is canonical combining class zero, // don't permute it if (skipZeros && i != 0 && UCharacter.GetCombiningClass(cp) == 0) { //System.out.println("Skipping " + Utility.hex(UTF16.valueOf(source, i))); continue; } // see what the permutations of the characters before and after this one are subpermute.Clear(); Permute(source.Substring(0, i - 0) // ICU4N: Checked 2nd parameter + source.Substring(i + UTF16.GetCharCount(cp)), skipZeros, subpermute); // ICU4N: Substring only has 1 parameter // prefix this character to all of them string chStr = UTF16.ValueOf(source, i); foreach (string s in subpermute) { string piece = chStr + s; //if (PROGRESS) System.out.println(" Piece: " + piece); output.Add(piece); } } }
/// <summary> /// Implementation of <see cref="IUnicodeMatcher"/> API. Union the set of all /// characters that may be matched by this object into the given /// set. /// </summary> /// <param name="toUnionTo">The set into which to union the source characters.</param> public virtual void AddMatchSetTo(UnicodeSet toUnionTo) { int ch; for (int i = 0; i < pattern.Length; i += UTF16.GetCharCount(ch)) { ch = UTF16.CharAt(pattern, i); IUnicodeMatcher matcher = data.LookupMatcher(ch); if (matcher == null) { toUnionTo.Add(ch); } else { matcher.AddMatchSetTo(toUnionTo); } } }
/// <summary> /// Union the set of all characters that may output by this object /// into the given set. /// </summary> /// <param name="toUnionTo">The set into which to union the output characters.</param> public virtual void AddReplacementSetTo(UnicodeSet toUnionTo) { int ch; for (int i = 0; i < output.Length; i += UTF16.GetCharCount(ch)) { ch = UTF16.CharAt(output, i); IUnicodeReplacer r = data.LookupReplacer(ch); if (r == null) { toUnionTo.Add(ch); } else { r.AddReplacementSetTo(toUnionTo); } } }
/// <summary> /// Implements <see cref="Transliterator.HandleTransliterate(IReplaceable, Position, bool)"/>. /// </summary> protected override void HandleTransliterate(IReplaceable text, Position offsets, bool isIncremental) { int cursor = offsets.Start; int limit = offsets.Limit; StringBuilder str = new StringBuilder(); str.Append(OPEN_DELIM); int len; string name; while (cursor < limit) { int c = text.Char32At(cursor); if ((name = UCharacter.GetExtendedName(c)) != null) { str.Length = OPEN_DELIM_LEN; str.Append(name).Append(CLOSE_DELIM); int clen = UTF16.GetCharCount(c); text.Replace(cursor, cursor + clen, str.ToString()); len = str.Length; cursor += len; // advance cursor by 1 and adjust for new text limit += len - clen; // change in length } else { ++cursor; } } offsets.ContextLimit += limit - offsets.Limit; offsets.Limit = limit; offsets.Start = cursor; }
// // RBBISymbolTable::parseReference This function from the abstract symbol table interface // looks for a $variable name in the source text. // It does not look it up, only scans for it. // It is used by the UnicodeSet parser. // public virtual string ParseReference(string text, ParsePosition pos, int limit) { int start = pos.Index; int i = start; string result = ""; while (i < limit) { int c = UTF16.CharAt(text, i); if ((i == start && !UChar.IsUnicodeIdentifierStart(c)) || !UChar.IsUnicodeIdentifierPart(c)) { break; } i += UTF16.GetCharCount(c); } if (i == start) { // No valid name chars return(result); // Indicate failure with empty string } pos.Index = i; result = text.Substring(start, i - start); // ICU4N: Corrected 2nd parameter return(result); }
/// <summary> /// Implements <see cref="Transliterator.HandleTransliterate(IReplaceable, Position, bool)"/>. /// </summary> protected override void HandleTransliterate(IReplaceable text, Position pos, bool isIncremental) { int start = pos.Start; int limit = pos.Limit; int i, ipat; //loop: while (start < limit) { // Loop over the forms in spec[]. Exit this loop when we // match one of the specs. Exit the outer loop if a // partial match is detected and isIncremental is true. for (ipat = 0; spec[ipat] != END;) { // Read the header int prefixLen = spec[ipat++]; int suffixLen = spec[ipat++]; int radix = spec[ipat++]; int minDigits = spec[ipat++]; int maxDigits = spec[ipat++]; // s is a copy of start that is advanced over the // characters as we parse them. int s = start; bool match = true; for (i = 0; i < prefixLen; ++i) { if (s >= limit) { if (i > 0) { // We've already matched a character. This is // a partial match, so we return if in // incremental mode. In non-incremental mode, // go to the next spec. if (isIncremental) { goto loop_break; } match = false; break; } } char c = text[s++]; if (c != spec[ipat + i]) { match = false; break; } } if (match) { int u = 0; int digitCount = 0; for (; ;) { if (s >= limit) { // Check for partial match in incremental mode. if (s > start && isIncremental) { goto loop_break; } break; } int ch = text.Char32At(s); int digit = UCharacter.Digit(ch, radix); if (digit < 0) { break; } s += UTF16.GetCharCount(ch); u = (u * radix) + digit; if (++digitCount == maxDigits) { break; } } match = (digitCount >= minDigits); if (match) { for (i = 0; i < suffixLen; ++i) { if (s >= limit) { // Check for partial match in incremental mode. if (s > start && isIncremental) { goto loop_break; } match = false; break; } char c = text[s++]; if (c != spec[ipat + prefixLen + i]) { match = false; break; } } if (match) { // At this point, we have a match string str = UTF16.ValueOf(u); text.Replace(start, s, str); limit -= s - start - str.Length; // The following break statement leaves the // loop that is traversing the forms in // spec[]. We then parse the next input // character. break; } } } ipat += prefixLen + suffixLen; } if (start < limit) { start += UTF16.GetCharCount(text.Char32At(start)); } } loop_break : { } pos.ContextLimit += limit - pos.Limit; pos.Limit = limit; pos.Start = start; }
/// <summary> /// Implements <see cref="Transliterator.HandleTransliterate(IReplaceable, Position, bool)"/>. /// </summary> protected override void HandleTransliterate(IReplaceable text, Position offsets, bool isIncremental) { lock (this) { // TODO reimplement, see ustrcase.c // using a real word break iterator // instead of just looking for a transition between cased and uncased characters // call CaseMapTransliterator::handleTransliterate() for lowercasing? (set fMap) // needs to take isIncremental into account because case mappings are context-sensitive // also detect when lowercasing function did not finish because of context if (offsets.Start >= offsets.Limit) { return; } // case type: >0 cased (UCaseProps.LOWER etc.) ==0 uncased <0 case-ignorable int type; // Our mode; we are either converting letter toTitle or // toLower. bool doTitle = true; // Determine if there is a preceding context of cased case-ignorable*, // in which case we want to start in toLower mode. If the // prior context is anything else (including empty) then start // in toTitle mode. int c, start; for (start = offsets.Start - 1; start >= offsets.ContextStart; start -= UTF16.GetCharCount(c)) { c = text.Char32At(start); type = csp.GetTypeOrIgnorable(c); if (type > 0) { // cased doTitle = false; break; } else if (type == 0) { // uncased but not ignorable break; } // else (type<0) case-ignorable: continue } // Convert things after a cased character toLower; things // after a uncased, non-case-ignorable character toTitle. Case-ignorable // characters are copied directly and do not change the mode. iter.SetText(text); iter.SetIndex(offsets.Start); iter.SetLimit(offsets.Limit); iter.SetContextLimits(offsets.ContextStart, offsets.ContextLimit); result.Length = 0; // Walk through original string // If there is a case change, modify corresponding position in replaceable int delta; while ((c = iter.NextCaseMapCP()) >= 0) { type = csp.GetTypeOrIgnorable(c); if (type >= 0) { // not case-ignorable if (doTitle) { c = csp.ToFullTitle(c, iter, result, caseLocale); } else { c = csp.ToFullLower(c, iter, result, caseLocale); } doTitle = type == 0; // doTitle=isUncased if (iter.DidReachLimit && isIncremental) { // the case mapping function tried to look beyond the context limit // wait for more input offsets.Start = iter.CaseMapCPStart; return; } /* decode the result */ if (c < 0) { /* c mapped to itself, no change */ continue; } else if (c <= UCaseProps.MAX_STRING_LENGTH) { /* replace by the mapping string */ delta = iter.Replace(result.ToString()); result.Length = 0; } else { /* replace by single-code point mapping */ delta = iter.Replace(UTF16.ValueOf(c)); } if (delta != 0) { offsets.Limit += delta; offsets.ContextLimit += delta; } } } offsets.Start = offsets.Limit; } }
internal static int PosAfter(IReplaceable str, int pos) { return((pos >= 0 && pos < str.Length) ? pos + UTF16.GetCharCount(str.Char32At(pos)) : pos + 1); }
/// <summary> /// See if the decomposition of cp2 is at segment starting at <paramref name="segmentPos"/> /// (with canonical rearrangment!). /// If so, take the remainder, and return the equivalents. /// </summary> /// <param name="comp"></param> /// <param name="segment"></param> /// <param name="segmentPos"></param> /// <param name="buf"></param> /// <returns></returns> private ISet <string> Extract(int comp, string segment, int segmentPos, StringBuffer buf) { if (PROGRESS) { Console.Out.WriteLine(" extract: " + Utility.Hex(UTF16.ValueOf(comp)) + ", " + Utility.Hex(segment.Substring(segmentPos))); } string decomp = nfcImpl.GetDecomposition(comp); if (decomp == null) { decomp = UTF16.ValueOf(comp); } // See if it matches the start of segment (at segmentPos) bool ok = false; int cp; int decompPos = 0; int decompCp = UTF16.CharAt(decomp, 0); decompPos += UTF16.GetCharCount(decompCp); // adjust position to skip first char //int decompClass = getClass(decompCp); buf.Length = 0; // initialize working buffer, shared among callees for (int i = segmentPos; i < segment.Length; i += UTF16.GetCharCount(cp)) { cp = UTF16.CharAt(segment, i); if (cp == decompCp) { // if equal, eat another cp from decomp if (PROGRESS) { Console.Out.WriteLine(" matches: " + Utility.Hex(UTF16.ValueOf(cp))); } if (decompPos == decomp.Length) { // done, have all decomp characters! buf.Append(segment.Substring(i + UTF16.GetCharCount(cp))); // add remaining segment chars ok = true; break; } decompCp = UTF16.CharAt(decomp, decompPos); decompPos += UTF16.GetCharCount(decompCp); //decompClass = getClass(decompCp); } else { if (PROGRESS) { Console.Out.WriteLine(" buffer: " + Utility.Hex(UTF16.ValueOf(cp))); } // brute force approach UTF16.Append(buf, cp); /* TODO: optimize * // since we know that the classes are monotonically increasing, after zero * // e.g. 0 5 7 9 0 3 * // we can do an optimization * // there are only a few cases that work: zero, less, same, greater * // if both classes are the same, we fail * // if the decomp class < the segment class, we fail * * segClass = getClass(cp); * if (decompClass <= segClass) return null; */ } } if (!ok) { return(null); // we failed, characters left over } if (PROGRESS) { Console.Out.WriteLine("Matches"); } if (buf.Length == 0) { return(SET_WITH_NULL_STRING); // succeed, but no remainder } string remainder = buf.ToString(); // brute force approach // to check to make sure result is canonically equivalent /* * String trial = Normalizer.normalize(UTF16.valueOf(comp) + remainder, Normalizer.DECOMP, 0); * if (!segment.regionMatches(segmentPos, trial, 0, segment.length() - segmentPos)) return null; */ if (0 != Normalizer.Compare(UTF16.ValueOf(comp) + remainder, segment.Substring(segmentPos), 0)) { return(null); } // get the remaining combinations return(GetEquivalents2(remainder)); }
//= public static UnicodeReplacer valueOf(String output, //= int cursorPos, //= RuleBasedTransliterator.Data data) { //= if (output.length() == 1) { //= char c = output.charAt(0); //= UnicodeReplacer r = data.lookupReplacer(c); //= if (r != null) { //= return r; //= } //= } //= return new StringReplacer(output, cursorPos, data); //= } /// <summary> /// <see cref="IUnicodeReplacer"/> API /// </summary> public virtual int Replace(IReplaceable text, int start, int limit, int[] cursor) { int outLen; int newStart = 0; // NOTE: It should be possible to _always_ run the complex // processing code; just slower. If not, then there is a bug // in the complex processing code. // Simple (no nested replacers) Processing Code : if (!isComplex) { text.Replace(start, limit, output); outLen = output.Length; // Setup default cursor position (for cursorPos within output) newStart = cursorPos; } // Complex (nested replacers) Processing Code : else { /* When there are segments to be copied, use the Replaceable.copy() * API in order to retain out-of-band data. Copy everything to the * end of the string, then copy them back over the key. This preserves * the integrity of indices into the key and surrounding context while * generating the output text. */ StringBuffer buf = new StringBuffer(); int oOutput; // offset into 'output' isComplex = false; // The temporary buffer starts at tempStart, and extends // to destLimit + tempExtra. The start of the buffer has a single // character from before the key. This provides style // data when addition characters are filled into the // temporary buffer. If there is nothing to the left, use // the non-character U+FFFF, which Replaceable subclasses // should treat specially as a "no-style character." // destStart points to the point after the style context // character, so it is tempStart+1 or tempStart+2. int tempStart = text.Length; // start of temp buffer int destStart = tempStart; // copy new text to here if (start > 0) { int len = UTF16.GetCharCount(text.Char32At(start - 1)); text.Copy(start - len, start, tempStart); destStart += len; } else { text.Replace(tempStart, tempStart, "\uFFFF"); destStart++; } int destLimit = destStart; int tempExtra = 0; // temp chars after destLimit for (oOutput = 0; oOutput < output.Length;) { if (oOutput == cursorPos) { // Record the position of the cursor newStart = buf.Length + destLimit - destStart; // relative to start // the buf.length() was inserted for bug 5789 // the problem is that if we are accumulating into a buffer (when r == null below) // then the actual length of the text at that point needs to add the buf length. // there was an alternative suggested in #5789, but that looks like it won't work // if we have accumulated some stuff in the dest part AND have a non-zero buffer. } int c = UTF16.CharAt(output, oOutput); // When we are at the last position copy the right style // context character into the temporary buffer. We don't // do this before because it will provide an incorrect // right context for previous replace() operations. int nextIndex = oOutput + UTF16.GetCharCount(c); if (nextIndex == output.Length) { tempExtra = UTF16.GetCharCount(text.Char32At(limit)); text.Copy(limit, limit + tempExtra, destLimit); } IUnicodeReplacer r = data.LookupReplacer(c); if (r == null) { // Accumulate straight (non-segment) text. UTF16.Append(buf, c); } else { isComplex = true; // Insert any accumulated straight text. if (buf.Length > 0) { text.Replace(destLimit, destLimit, buf.ToString()); destLimit += buf.Length; buf.Length = 0; } // Delegate output generation to replacer object int len = r.Replace(text, destLimit, destLimit, cursor); destLimit += len; } oOutput = nextIndex; } // Insert any accumulated straight text. if (buf.Length > 0) { text.Replace(destLimit, destLimit, buf.ToString()); destLimit += buf.Length; } if (oOutput == cursorPos) { // Record the position of the cursor newStart = destLimit - destStart; // relative to start } outLen = destLimit - destStart; // Copy new text to start, and delete it text.Copy(destStart, destLimit, start); text.Replace(tempStart + outLen, destLimit + tempExtra + outLen, ""); // Delete the old text (the key) text.Replace(start + outLen, limit + outLen, ""); } if (hasCursor) { // Adjust the cursor for positions outside the key. These // refer to code points rather than code units. If cursorPos // is within the output string, then use newStart, which has // already been set above. if (cursorPos < 0) { newStart = start; int n = cursorPos; // Outside the output string, cursorPos counts code points while (n < 0 && newStart > 0) { newStart -= UTF16.GetCharCount(text.Char32At(newStart - 1)); ++n; } newStart += n; } else if (cursorPos > output.Length) { newStart = start + outLen; int n = cursorPos - output.Length; // Outside the output string, cursorPos counts code points while (n > 0 && newStart < text.Length) { newStart += UTF16.GetCharCount(text.Char32At(newStart)); --n; } newStart += n; } else { // Cursor is within output string. It has been set up above // to be relative to start. newStart += start; } cursor[0] = newStart; } return(outLen); }
/// <summary> /// Implements <see cref="Transliterator.HandleTransliterate(IReplaceable, Position, bool)"/>. /// </summary> protected override void HandleTransliterate(IReplaceable text, Position offsets, bool isIncremental) { int maxLen = UCharacterName.Instance.MaxCharNameLength + 1; // allow for temporary trailing space StringBuffer name = new StringBuffer(maxLen); // Get the legal character set UnicodeSet legal = new UnicodeSet(); UCharacterName.Instance.GetCharNameCharacters(legal); int cursor = offsets.Start; int limit = offsets.Limit; // Modes: // 0 - looking for open delimiter // 1 - after open delimiter int mode = 0; int openPos = -1; // open delim candidate pos int c; while (cursor < limit) { c = text.Char32At(cursor); switch (mode) { case 0: // looking for open delimiter if (c == OPEN_DELIM) { // quick check first openPos = cursor; int i = Utility.ParsePattern(OPEN_PAT, text, cursor, limit); if (i >= 0 && i < limit) { mode = 1; name.Length = 0; cursor = i; continue; // *** reprocess char32At(cursor) } } break; case 1: // after open delimiter // Look for legal chars. If \s+ is found, convert it // to a single space. If closeDelimiter is found, exit // the loop. If any other character is found, exit the // loop. If the limit is reached, exit the loop. // Convert \s+ => SPACE. This assumes there are no // runs of >1 space characters in names. if (PatternProps.IsWhiteSpace(c)) { // Ignore leading whitespace if (name.Length > 0 && name[name.Length - 1] != SPACE) { name.Append(SPACE); // If we are too long then abort. maxLen includes // temporary trailing space, so use '>'. if (name.Length > maxLen) { mode = 0; } } break; } if (c == CLOSE_DELIM) { int len = name.Length; // Delete trailing space, if any if (len > 0 && name[len - 1] == SPACE) { name.Length = --len; } c = UCharacter.GetCharFromExtendedName(name.ToString()); if (c != -1) { // Lookup succeeded // assert(UTF16.getCharCount(CLOSE_DELIM) == 1); cursor++; // advance over CLOSE_DELIM string str = UTF16.ValueOf(c); text.Replace(openPos, cursor, str); // Adjust indices for the change in the length of // the string. Do not assume that str.length() == // 1, in case of surrogates. int delta = cursor - openPos - str.Length; cursor -= delta; limit -= delta; // assert(cursor == openPos + str.length()); } // If the lookup failed, we leave things as-is and // still switch to mode 0 and continue. mode = 0; openPos = -1; // close off candidate continue; // *** reprocess char32At(cursor) } if (legal.Contains(c)) { UTF16.Append(name, c); // If we go past the longest possible name then abort. // maxLen includes temporary trailing space, so use '>='. if (name.Length >= maxLen) { mode = 0; } } // Invalid character else { --cursor; // Backup and reprocess this character mode = 0; } break; } cursor += UTF16.GetCharCount(c); } offsets.ContextLimit += limit - offsets.Limit; offsets.Limit = limit; // In incremental mode, only advance the cursor up to the last // open delimiter candidate. offsets.Start = (isIncremental && openPos >= 0) ? openPos : cursor; }
internal static int PosBefore(IReplaceable str, int pos) { return((pos > 0) ? pos - UTF16.GetCharCount(str.Char32At(pos - 1)) : pos - 1); }