/// <summary> /// <see cref="IUnicodeReplacer"/> API /// </summary> public virtual string ToReplacerPattern(bool escapeUnprintable) { StringBuffer rule = new StringBuffer(); StringBuffer quoteBuf = new StringBuffer(); int cursor = cursorPos; // Handle a cursor preceding the output if (hasCursor && cursor < 0) { while (cursor++ < 0) { Utility.AppendToRule(rule, '@', true, escapeUnprintable, quoteBuf); } // Fall through and append '|' below } for (int i = 0; i < output.Length; ++i) { if (hasCursor && i == cursor) { Utility.AppendToRule(rule, '|', true, escapeUnprintable, quoteBuf); } char c = output[i]; // Ok to use 16-bits here IUnicodeReplacer r = data.LookupReplacer(c); if (r == null) { Utility.AppendToRule(rule, c, false, escapeUnprintable, quoteBuf); } else { StringBuffer buf = new StringBuffer(" "); buf.Append(r.ToReplacerPattern(escapeUnprintable)); buf.Append(' '); Utility.AppendToRule(rule, buf.ToString(), true, escapeUnprintable, quoteBuf); } } // Handle a cursor after the output. Use > rather than >= because // if cursor == output.length() it is at the end of the output, // which is the default position, so we need not emit it. if (hasCursor && cursor > output.Length) { cursor -= output.Length; while (cursor-- > 0) { Utility.AppendToRule(rule, '@', true, escapeUnprintable, quoteBuf); } Utility.AppendToRule(rule, '|', true, escapeUnprintable, quoteBuf); } // Flush quoteBuf out to result Utility.AppendToRule(rule, -1, true, escapeUnprintable, quoteBuf); return(rule.ToString()); }
/// <summary> /// Union the set of all characters that may output by this object /// into the given set. /// </summary> /// <param name="toUnionTo">The set into which to union the output characters.</param> public virtual void AddReplacementSetTo(UnicodeSet toUnionTo) { int ch; for (int i = 0; i < output.Length; i += UTF16.GetCharCount(ch)) { ch = UTF16.CharAt(output, i); IUnicodeReplacer r = data.LookupReplacer(ch); if (r == null) { toUnionTo.Add(ch); } else { r.AddReplacementSetTo(toUnionTo); } } }
/// <summary> /// Construct a replacer that takes the output of the given /// <paramref name="replacer"/>, passes it through the given <paramref name="transliterator"/>, and emits /// the result as output. /// </summary> public FunctionReplacer(Transliterator transliterator, IUnicodeReplacer replacer) { translit = transliterator; this.replacer = replacer; }
//= public static UnicodeReplacer valueOf(String output, //= int cursorPos, //= RuleBasedTransliterator.Data data) { //= if (output.length() == 1) { //= char c = output.charAt(0); //= UnicodeReplacer r = data.lookupReplacer(c); //= if (r != null) { //= return r; //= } //= } //= return new StringReplacer(output, cursorPos, data); //= } /// <summary> /// <see cref="IUnicodeReplacer"/> API /// </summary> public virtual int Replace(IReplaceable text, int start, int limit, int[] cursor) { int outLen; int newStart = 0; // NOTE: It should be possible to _always_ run the complex // processing code; just slower. If not, then there is a bug // in the complex processing code. // Simple (no nested replacers) Processing Code : if (!isComplex) { text.Replace(start, limit, output); outLen = output.Length; // Setup default cursor position (for cursorPos within output) newStart = cursorPos; } // Complex (nested replacers) Processing Code : else { /* When there are segments to be copied, use the Replaceable.copy() * API in order to retain out-of-band data. Copy everything to the * end of the string, then copy them back over the key. This preserves * the integrity of indices into the key and surrounding context while * generating the output text. */ StringBuffer buf = new StringBuffer(); int oOutput; // offset into 'output' isComplex = false; // The temporary buffer starts at tempStart, and extends // to destLimit + tempExtra. The start of the buffer has a single // character from before the key. This provides style // data when addition characters are filled into the // temporary buffer. If there is nothing to the left, use // the non-character U+FFFF, which Replaceable subclasses // should treat specially as a "no-style character." // destStart points to the point after the style context // character, so it is tempStart+1 or tempStart+2. int tempStart = text.Length; // start of temp buffer int destStart = tempStart; // copy new text to here if (start > 0) { int len = UTF16.GetCharCount(text.Char32At(start - 1)); text.Copy(start - len, start, tempStart); destStart += len; } else { text.Replace(tempStart, tempStart, "\uFFFF"); destStart++; } int destLimit = destStart; int tempExtra = 0; // temp chars after destLimit for (oOutput = 0; oOutput < output.Length;) { if (oOutput == cursorPos) { // Record the position of the cursor newStart = buf.Length + destLimit - destStart; // relative to start // the buf.length() was inserted for bug 5789 // the problem is that if we are accumulating into a buffer (when r == null below) // then the actual length of the text at that point needs to add the buf length. // there was an alternative suggested in #5789, but that looks like it won't work // if we have accumulated some stuff in the dest part AND have a non-zero buffer. } int c = UTF16.CharAt(output, oOutput); // When we are at the last position copy the right style // context character into the temporary buffer. We don't // do this before because it will provide an incorrect // right context for previous replace() operations. int nextIndex = oOutput + UTF16.GetCharCount(c); if (nextIndex == output.Length) { tempExtra = UTF16.GetCharCount(text.Char32At(limit)); text.Copy(limit, limit + tempExtra, destLimit); } IUnicodeReplacer r = data.LookupReplacer(c); if (r == null) { // Accumulate straight (non-segment) text. UTF16.Append(buf, c); } else { isComplex = true; // Insert any accumulated straight text. if (buf.Length > 0) { text.Replace(destLimit, destLimit, buf.ToString()); destLimit += buf.Length; buf.Length = 0; } // Delegate output generation to replacer object int len = r.Replace(text, destLimit, destLimit, cursor); destLimit += len; } oOutput = nextIndex; } // Insert any accumulated straight text. if (buf.Length > 0) { text.Replace(destLimit, destLimit, buf.ToString()); destLimit += buf.Length; } if (oOutput == cursorPos) { // Record the position of the cursor newStart = destLimit - destStart; // relative to start } outLen = destLimit - destStart; // Copy new text to start, and delete it text.Copy(destStart, destLimit, start); text.Replace(tempStart + outLen, destLimit + tempExtra + outLen, ""); // Delete the old text (the key) text.Replace(start + outLen, limit + outLen, ""); } if (hasCursor) { // Adjust the cursor for positions outside the key. These // refer to code points rather than code units. If cursorPos // is within the output string, then use newStart, which has // already been set above. if (cursorPos < 0) { newStart = start; int n = cursorPos; // Outside the output string, cursorPos counts code points while (n < 0 && newStart > 0) { newStart -= UTF16.GetCharCount(text.Char32At(newStart - 1)); ++n; } newStart += n; } else if (cursorPos > output.Length) { newStart = start + outLen; int n = cursorPos - output.Length; // Outside the output string, cursorPos counts code points while (n > 0 && newStart < text.Length) { newStart += UTF16.GetCharCount(text.Char32At(newStart)); --n; } newStart += n; } else { // Cursor is within output string. It has been set up above // to be relative to start. newStart += start; } cursor[0] = newStart; } return(outLen); }
#pragma warning restore 612, 618 /** * Construct a new rule with the given input, output text, and other * attributes. A cursor position may be specified for the output text. * @param input input string, including key and optional ante and * post context * @param anteContextPos offset into input to end of ante context, or -1 if * none. Must be <= input.length() if not -1. * @param postContextPos offset into input to start of post context, or -1 * if none. Must be <= input.length() if not -1, and must be >= * anteContextPos. * @param output output string * @param cursorPos offset into output at which cursor is located, or -1 if * none. If less than zero, then the cursor is placed after the * <code>output</code>; that is, -1 is equivalent to * <code>output.length()</code>. If greater than * <code>output.length()</code> then an exception is thrown. * @param cursorOffset an offset to be added to cursorPos to position the * cursor either in the ante context, if < 0, or in the post context, if > * 0. For example, the rule "abc{def} > | @@@ xyz;" changes "def" to * "xyz" and moves the cursor to before "a". It would have a cursorOffset * of -3. * @param segs array of UnicodeMatcher corresponding to input pattern * segments, or null if there are none * @param anchorStart true if the the rule is anchored on the left to * the context start * @param anchorEnd true if the rule is anchored on the right to the * context limit */ public TransliterationRule(string input, int anteContextPos, int postContextPos, string output, int cursorPos, int cursorOffset, IUnicodeMatcher[] segs, bool anchorStart, bool anchorEnd, #pragma warning disable 612, 618 RuleBasedTransliterator.Data theData) #pragma warning restore 612, 618 { data = theData; // Do range checks only when warranted to save time if (anteContextPos < 0) { anteContextLength = 0; } else { if (anteContextPos > input.Length) { throw new ArgumentException("Invalid ante context"); } anteContextLength = anteContextPos; } if (postContextPos < 0) { keyLength = input.Length - anteContextLength; } else { if (postContextPos < anteContextLength || postContextPos > input.Length) { throw new ArgumentException("Invalid post context"); } keyLength = postContextPos - anteContextLength; } if (cursorPos < 0) { cursorPos = output.Length; } else if (cursorPos > output.Length) { throw new ArgumentException("Invalid cursor position"); } // We don't validate the segments array. The caller must // guarantee that the segments are well-formed (that is, that // all $n references in the output refer to indices of this // array, and that no array elements are null). this.segments = segs; pattern = input; flags = 0; if (anchorStart) { flags |= (byte)ANCHOR_START; } if (anchorEnd) { flags |= (byte)ANCHOR_END; } anteContext = null; if (anteContextLength > 0) { anteContext = new StringMatcher(pattern.Substring(0, anteContextLength), // ICU4N: Checked 2nd parameter 0, data); } key = null; if (keyLength > 0) { key = new StringMatcher(pattern.Substring(anteContextLength, keyLength), // ICU4N: (anteContextLength + keyLength) - anteContextLength == keyLength 0, data); } int postContextLength = pattern.Length - keyLength - anteContextLength; postContext = null; if (postContextLength > 0) { postContext = new StringMatcher(pattern.Substring(anteContextLength + keyLength), 0, data); } this.output = new StringReplacer(output, cursorPos + cursorOffset, data); }