/// <summary> /// Implements <see cref="M:IBM.ICU.Text.Transliterator.HandleTransliterate(IBM.ICU.Text.Replaceable, null, System.Boolean)"/>. /// </summary> /// protected internal override void HandleTransliterate(Replaceable text, Transliterator.Position index, bool incremental) { // Our caller (filteredTransliterate) has already narrowed us // to an unfiltered run. Delete it. text.Replace(index.start, index.limit, ""); int len = index.limit - index.start; index.contextLimit -= len; index.limit -= len; }
/// <summary> /// Implements <see cref="M:IBM.ICU.Text.Transliterator.HandleTransliterate(IBM.ICU.Text.Replaceable, null, System.Boolean)"/>. /// </summary> /// protected internal override void HandleTransliterate(Replaceable text, Transliterator.Position pos, bool incremental) { int start = pos.start; int limit = pos.limit; StringBuilder buf = new StringBuilder(prefix); int prefixLen = prefix.Length; bool redoPrefix = false; while (start < limit) { int c = (grokSupplementals) ? (int)(text.Char32At(start)) : (int)(text.CharAt(start)); int charLen = (grokSupplementals) ? IBM.ICU.Text.UTF16.GetCharCount(c) : 1; if ((c & -65536) != 0 && supplementalHandler != null) { buf.Length = 0; buf.Append(supplementalHandler.prefix); IBM.ICU.Impl.Utility.AppendNumber(buf, c, supplementalHandler.radix, supplementalHandler.minDigits); buf.Append(supplementalHandler.suffix); redoPrefix = true; } else { if (redoPrefix) { buf.Length = 0; buf.Append(prefix); redoPrefix = false; } else { buf.Length = prefixLen; } IBM.ICU.Impl.Utility.AppendNumber(buf, c, radix, minDigits); buf.Append(suffix); } text.Replace(start, start + charLen, buf.ToString()); start += buf.Length; limit += buf.Length - charLen; } pos.contextLimit += limit - pos.limit; pos.limit = limit; pos.start = start; }
/// <summary> /// Converts the range from lastSafe to limit. /// </summary> /// /// <param name="verify">If non-null, check to see that all replacement characters arein it. If not, abort the conversion and returnInteger.MIN_VALUE.</param> /// <returns>return the delta in length (new - old), or Integer.MIN_VALUE if /// the verify aborted.</returns> internal int Convert(Replaceable text, int lastSafe, int limit, UnicodeSet verify) { // System.out.println("t: " + // com.ibm.icu.impl.Utility.hex(text.toString()) + ", s: " + lastSafe + // ", l: " + limit); int len = limit - lastSafe; String input = null; lock (buffer) { if (buffer.Length < len) { buffer = new char[len]; // rare, and we don't care if we grow // too large } text.GetChars(lastSafe, limit, buffer, 0); input = new String(buffer, 0, len); // TODO: fix normalizer to take // char[] } String output = IBM.ICU.Text.Normalizer.Normalize(input, mode, options); // verify OK, if specified if (verify != null) { bool skip = !skippable.ContainsAll(output); if (DEBUG) { System.Console.Out.WriteLine(((skip) ? " SKIP: " : "NOSKIP: ") + IBM.ICU.Impl.Utility.Escape(input) + " => " + IBM.ICU.Impl.Utility.Escape(output)); } if (skip) { return(Int32.MinValue); } } if (output.Equals(input)) { return(0); } text.Replace(lastSafe, limit, output); return(output.Length - len); }
/// <summary> /// UnicodeReplacer API /// </summary> /// public virtual int Replace(Replaceable text, int start, int limit, int[] cursor) { int outLen = 0; // Copy segment with out-of-band data int dest = limit; // If there was no match, that means that a quantifier // matched zero-length. E.g., x (a)* y matched "xy". if (matchStart >= 0) { if (matchStart != matchLimit) { text.Copy(matchStart, matchLimit, dest); outLen = matchLimit - matchStart; } } text.Replace(start, limit, ""); // delete original text return(outLen); }
/// <summary> /// Implements <see cref="M:IBM.ICU.Text.Transliterator.HandleTransliterate(IBM.ICU.Text.Replaceable, null, System.Boolean)"/>. /// </summary> /// protected internal override void HandleTransliterate(Replaceable text, Transliterator.Position offsets, bool isIncremental) { int cursor = offsets.start; int limit = offsets.limit; StringBuilder str = new StringBuilder(); str.Append(OPEN_DELIM); int len; String name; while (cursor < limit) { int c = text.Char32At(cursor); if ((name = IBM.ICU.Lang.UCharacter.GetExtendedName(c)) != null) { str.Length = OPEN_DELIM_LEN; str.Append(name).Append(CLOSE_DELIM); int clen = IBM.ICU.Text.UTF16.GetCharCount(c); text.Replace(cursor, cursor + clen, str.ToString()); len = str.Length; cursor += len; // advance cursor by 1 and adjust for new text limit += len - clen; // change in length } else { ++cursor; } } offsets.contextLimit += limit - offsets.limit; offsets.limit = limit; offsets.start = cursor; }
/// <summary> /// Implements <see cref="M:IBM.ICU.Text.Transliterator.HandleTransliterate(IBM.ICU.Text.Replaceable, null, System.Boolean)"/>. /// </summary> /// protected internal override void HandleTransliterate(Replaceable text, Transliterator.Position offsets, bool isIncremental) { int maxLen = IBM.ICU.Impl.UCharacterName.GetInstance().GetMaxCharNameLength() + 1; // allow // for // temporary // trailing // space StringBuilder name = new StringBuilder(maxLen); // Get the legal character set UnicodeSet legal = new UnicodeSet(); IBM.ICU.Impl.UCharacterName.GetInstance().GetCharNameCharacters(legal); int cursor = offsets.start; int limit = offsets.limit; // Modes: // 0 - looking for open delimiter // 1 - after open delimiter int mode = 0; int openPos = -1; // open delim candidate pos int c; while (cursor < limit) { c = text.Char32At(cursor); switch (mode) { case 0: // looking for open delimiter if (c == OPEN_DELIM) // quick check first { openPos = cursor; int i = IBM.ICU.Impl.Utility.ParsePattern(OPEN_PAT, text, cursor, limit); if (i >= 0 && i < limit) { mode = 1; name.Length = 0; cursor = i; continue; // *** reprocess char32At(cursor) } } break; case 1: // after open delimiter // Look for legal chars. If \s+ is found, convert it // to a single space. If closeDelimiter is found, exit // the loop. If any other character is found, exit the // loop. If the limit is reached, exit the loop. // Convert \s+ => SPACE. This assumes there are no // runs of >1 space characters in names. if (IBM.ICU.Impl.UCharacterProperty.IsRuleWhiteSpace(c)) { // Ignore leading whitespace if (name.Length > 0 && name[name.Length - 1] != SPACE) { name.Append(SPACE); // If we are too long then abort. maxLen includes // temporary trailing space, so use '>'. if (name.Length > maxLen) { mode = 0; } } break; } if (c == CLOSE_DELIM) { int len = name.Length; // Delete trailing space, if any if (len > 0 && name[len - 1] == SPACE) { name.Length = --len; } c = IBM.ICU.Lang.UCharacter.GetCharFromExtendedName(name.ToString()); if (c != -1) { // Lookup succeeded // assert(UTF16.getCharCount(CLOSE_DELIM) == 1); cursor++; // advance over CLOSE_DELIM String str = IBM.ICU.Text.UTF16.ValueOf(c); text.Replace(openPos, cursor, str); // Adjust indices for the change in the length of // the string. Do not assume that str.length() == // 1, in case of surrogates. int delta = cursor - openPos - str.Length; cursor -= delta; limit -= delta; // assert(cursor == openPos + str.length()); } // If the lookup failed, we leave things as-is and // still switch to mode 0 and continue. mode = 0; openPos = -1; // close off candidate continue; // *** reprocess char32At(cursor) } if (legal.Contains(c)) { IBM.ICU.Text.UTF16.Append(name, c); // If we go past the longest possible name then abort. // maxLen includes temporary trailing space, so use '>='. if (name.Length >= maxLen) { mode = 0; } } // Invalid character else { --cursor; // Backup and reprocess this character mode = 0; } break; } cursor += IBM.ICU.Text.UTF16.GetCharCount(c); } offsets.contextLimit += limit - offsets.limit; offsets.limit = limit; // In incremental mode, only advance the cursor up to the last // open delimiter candidate. offsets.start = (isIncremental && openPos >= 0) ? openPos : cursor; }
protected internal override void HandleTransliterate(Replaceable text, Transliterator.Position pos, bool incremental) { boundaryCount = 0; int boundary = 0; GetBreakIterator(); // Lazy-create it if necessary bi.SetText(new BreakTransliterator.ReplaceableCharacterIterator(text, pos.start, pos.limit, pos.start)); // TODO: fix clumsy workaround used below. /* * char[] tempBuffer = new char[text.length()]; text.getChars(0, * text.length(), tempBuffer, 0); bi.setText(new * StringCharacterIterator(new String(tempBuffer), pos.start, pos.limit, * pos.start)); */ // end debugging // To make things much easier, we will stack the boundaries, and then // insert at the end. // generally, we won't need too many, since we will be filtered. for (boundary = bi.First(); boundary != IBM.ICU.Text.BreakIterator.DONE && boundary < pos.limit; boundary = bi.Next()) { if (boundary == 0) { continue; } // HACK: Check to see that preceeding item was a letter int cp = IBM.ICU.Text.UTF16.CharAt(text, boundary - 1); int type = IBM.ICU.Lang.UCharacter.GetType(cp); // System.out.println(Integer.toString(cp,16) + " (before): " + // type); if (((1 << type) & LETTER_OR_MARK_MASK) == 0) { continue; } cp = IBM.ICU.Text.UTF16.CharAt(text, boundary); type = IBM.ICU.Lang.UCharacter.GetType(cp); // System.out.println(Integer.toString(cp,16) + " (after): " + // type); if (((1 << type) & LETTER_OR_MARK_MASK) == 0) { continue; } if (boundaryCount >= boundaries.Length) // realloc if necessary { int[] temp = new int[boundaries.Length * 2]; System.Array.Copy((Array)(boundaries), 0, (Array)(temp), 0, boundaries.Length); boundaries = temp; } boundaries[boundaryCount++] = boundary; // System.out.println(boundary); } int delta = 0; int lastBoundary = 0; if (boundaryCount != 0) // if we found something, adjust { delta = boundaryCount * insertion.Length; lastBoundary = boundaries[boundaryCount - 1]; // we do this from the end backwards, so that we don't have to keep // updating. while (boundaryCount > 0) { boundary = boundaries[--boundaryCount]; text.Replace(boundary, boundary, insertion); } } // Now fix up the return values pos.contextLimit += delta; pos.limit += delta; pos.start = (incremental) ? lastBoundary + delta : pos.limit; }
// = public static UnicodeReplacer valueOf(String output, // = int cursorPos, // = RuleBasedTransliterator.Data data) { // = if (output.length() == 1) { // = char c = output.charAt(0); // = UnicodeReplacer r = data.lookupReplacer(c); // = if (r != null) { // = return r; // = } // = } // = return new StringReplacer(output, cursorPos, data); // = } /// <summary> /// UnicodeReplacer API /// </summary> /// public virtual int Replace(Replaceable text, int start, int limit, int[] cursor) { int outLen; int newStart = 0; // NOTE: It should be possible to _always_ run the complex // processing code; just slower. If not, then there is a bug // in the complex processing code. // Simple (no nested replacers) Processing Code : if (!isComplex) { text.Replace(start, limit, output); outLen = output.Length; // Setup default cursor position (for cursorPos within output) newStart = cursorPos; } // Complex (nested replacers) Processing Code : else { /* * When there are segments to be copied, use the Replaceable.copy() * API in order to retain out-of-band data. Copy everything to the * end of the string, then copy them back over the key. This * preserves the integrity of indices into the key and surrounding * context while generating the output text. */ StringBuilder buf = new StringBuilder(); int oOutput; // offset into 'output' isComplex = false; // The temporary buffer starts at tempStart, and : // to destLimit + tempExtra. The start of the buffer has a single // character from before the key. This provides style // data when addition characters are filled into the // temporary buffer. If there is nothing to the left, use // the non-character U+FFFF, which Replaceable subclasses // should treat specially as a "no-style character." // destStart points to the point after the style context // character, so it is tempStart+1 or tempStart+2. int tempStart = text.Length(); // start of temp buffer int destStart = tempStart; // copy new text to here if (start > 0) { int len = IBM.ICU.Text.UTF16.GetCharCount(text.Char32At(start - 1)); text.Copy(start - len, start, tempStart); destStart += len; } else { text.Replace(tempStart, tempStart, "\uFFFF"); destStart++; } int destLimit = destStart; int tempExtra = 0; // temp chars after destLimit for (oOutput = 0; oOutput < output.Length;) { if (oOutput == cursorPos) { // Record the position of the cursor newStart = buf.Length + destLimit - destStart; // relative // to start // the buf.length() was inserted for bug 5789 // the problem is that if we are accumulating into a buffer // (when r == null below) // then the actual length of the text at that point needs to // add the buf length. // there was an alternative suggested in #5789, but that // looks like it won't work // if we have accumulated some stuff in the dest part AND // have a non-zero buffer. } int c = IBM.ICU.Text.UTF16.CharAt(output, oOutput); // When we are at the last position copy the right style // context character into the temporary buffer. We don't // do this before because it will provide an incorrect // right context for previous replace() operations. int nextIndex = oOutput + IBM.ICU.Text.UTF16.GetCharCount(c); if (nextIndex == output.Length) { tempExtra = IBM.ICU.Text.UTF16.GetCharCount(text.Char32At(limit)); text.Copy(limit, limit + tempExtra, destLimit); } UnicodeReplacer r = data.LookupReplacer(c); if (r == null) { // Accumulate straight (non-segment) text. IBM.ICU.Text.UTF16.Append(buf, c); } else { isComplex = true; // Insert any accumulated straight text. if (buf.Length > 0) { text.Replace(destLimit, destLimit, buf.ToString()); destLimit += buf.Length; buf.Length = 0; } // Delegate output generation to replacer object int len_0 = r.Replace(text, destLimit, destLimit, cursor); destLimit += len_0; } oOutput = nextIndex; } // Insert any accumulated straight text. if (buf.Length > 0) { text.Replace(destLimit, destLimit, buf.ToString()); destLimit += buf.Length; } if (oOutput == cursorPos) { // Record the position of the cursor newStart = destLimit - destStart; // relative to start } outLen = destLimit - destStart; // Copy new text to start, and delete it text.Copy(destStart, destLimit, start); text.Replace(tempStart + outLen, destLimit + tempExtra + outLen, ""); // Delete the old text (the key) text.Replace(start + outLen, limit + outLen, ""); } if (hasCursor) { // Adjust the cursor for positions outside the key. These // refer to code points rather than code units. If cursorPos // is within the output string, then use newStart, which has // already been set above. if (cursorPos < 0) { newStart = start; int n = cursorPos; // Outside the output string, cursorPos counts code points while (n < 0 && newStart > 0) { newStart -= IBM.ICU.Text.UTF16.GetCharCount(text.Char32At(newStart - 1)); ++n; } newStart += n; } else if (cursorPos > output.Length) { newStart = start + outLen; int n_1 = cursorPos - output.Length; // Outside the output string, cursorPos counts code points while (n_1 > 0 && newStart < text.Length()) { newStart += IBM.ICU.Text.UTF16.GetCharCount(text.Char32At(newStart)); --n_1; } newStart += n_1; } else { // Cursor is within output string. It has been set up above // to be relative to start. newStart += start; } cursor[0] = newStart; } return(outLen); }
/// <summary> /// Implements <see cref="M:IBM.ICU.Text.Transliterator.HandleTransliterate(IBM.ICU.Text.Replaceable, null, System.Boolean)"/>. /// </summary> /// protected internal override void HandleTransliterate(Replaceable text, Transliterator.Position pos, bool isIncremental) { int start = pos.start; int limit = pos.limit; int i, j, ipat; loop : { while (start < limit) { // Loop over the forms in spec[]. Exit this loop when we // match one of the specs. Exit the outer loop if a // partial match is detected and isIncremental is true. for (j = 0, ipat = 0; spec[ipat] != END; ++j) { // Read the header int prefixLen = spec[ipat++]; int suffixLen = spec[ipat++]; int radix = spec[ipat++]; int minDigits = spec[ipat++]; int maxDigits = spec[ipat++]; // s is a copy of start that is advanced over the // characters as we parse them. int s = start; bool match = true; for (i = 0; i < prefixLen; ++i) { if (s >= limit) { if (i > 0) { // We've already matched a character. This is // a partial match, so we return if in // incremental mode. In non-incremental mode, // go to the next spec. if (isIncremental) { goto gotoloop; } match = false; break; } } char c = text.CharAt(s++); if (c != spec[ipat + i]) { match = false; break; } } if (match) { int u = 0; int digitCount = 0; for (;;) { if (s >= limit) { // Check for partial match in incremental mode. if (s > start && isIncremental) { goto gotoloop; } break; } int ch = text.Char32At(s); int digit = IBM.ICU.Lang.UCharacter.Digit(ch, radix); if (digit < 0) { break; } s += IBM.ICU.Text.UTF16.GetCharCount(ch); u = (u * radix) + digit; if (++digitCount == maxDigits) { break; } } match = (digitCount >= minDigits); if (match) { for (i = 0; i < suffixLen; ++i) { if (s >= limit) { // Check for partial match in incremental mode. if (s > start && isIncremental) { goto gotoloop; } match = false; break; } char c_0 = text.CharAt(s++); if (c_0 != spec[ipat + prefixLen + i]) { match = false; break; } } if (match) { // At this point, we have a match String str = IBM.ICU.Text.UTF16.ValueOf(u); text.Replace(start, s, str); limit -= s - start - str.Length; // The following break statement leaves the // loop that is traversing the forms in // spec[]. We then parse the next input // character. break; } } } ipat += prefixLen + suffixLen; } if (start < limit) { start += IBM.ICU.Text.UTF16.GetCharCount(text.Char32At(start)); } } } gotoloop: ; pos.contextLimit += limit - pos.limit; pos.limit = limit; pos.start = start; }