/// <summary> /// UnicodeReplacer API /// </summary> /// public virtual int Replace(Replaceable text, int start, int limit, int[] cursor) { // First delegate to subordinate replacer int len = replacer.Replace(text, start, limit, cursor); limit = start + len; // Now transliterate limit = translit.Transliterate(text, start, limit); return(limit - start); }
// = public static UnicodeReplacer valueOf(String output, // = int cursorPos, // = RuleBasedTransliterator.Data data) { // = if (output.length() == 1) { // = char c = output.charAt(0); // = UnicodeReplacer r = data.lookupReplacer(c); // = if (r != null) { // = return r; // = } // = } // = return new StringReplacer(output, cursorPos, data); // = } /// <summary> /// UnicodeReplacer API /// </summary> /// public virtual int Replace(Replaceable text, int start, int limit, int[] cursor) { int outLen; int newStart = 0; // NOTE: It should be possible to _always_ run the complex // processing code; just slower. If not, then there is a bug // in the complex processing code. // Simple (no nested replacers) Processing Code : if (!isComplex) { text.Replace(start, limit, output); outLen = output.Length; // Setup default cursor position (for cursorPos within output) newStart = cursorPos; } // Complex (nested replacers) Processing Code : else { /* * When there are segments to be copied, use the Replaceable.copy() * API in order to retain out-of-band data. Copy everything to the * end of the string, then copy them back over the key. This * preserves the integrity of indices into the key and surrounding * context while generating the output text. */ StringBuilder buf = new StringBuilder(); int oOutput; // offset into 'output' isComplex = false; // The temporary buffer starts at tempStart, and : // to destLimit + tempExtra. The start of the buffer has a single // character from before the key. This provides style // data when addition characters are filled into the // temporary buffer. If there is nothing to the left, use // the non-character U+FFFF, which Replaceable subclasses // should treat specially as a "no-style character." // destStart points to the point after the style context // character, so it is tempStart+1 or tempStart+2. int tempStart = text.Length(); // start of temp buffer int destStart = tempStart; // copy new text to here if (start > 0) { int len = IBM.ICU.Text.UTF16.GetCharCount(text.Char32At(start - 1)); text.Copy(start - len, start, tempStart); destStart += len; } else { text.Replace(tempStart, tempStart, "\uFFFF"); destStart++; } int destLimit = destStart; int tempExtra = 0; // temp chars after destLimit for (oOutput = 0; oOutput < output.Length;) { if (oOutput == cursorPos) { // Record the position of the cursor newStart = buf.Length + destLimit - destStart; // relative // to start // the buf.length() was inserted for bug 5789 // the problem is that if we are accumulating into a buffer // (when r == null below) // then the actual length of the text at that point needs to // add the buf length. // there was an alternative suggested in #5789, but that // looks like it won't work // if we have accumulated some stuff in the dest part AND // have a non-zero buffer. } int c = IBM.ICU.Text.UTF16.CharAt(output, oOutput); // When we are at the last position copy the right style // context character into the temporary buffer. We don't // do this before because it will provide an incorrect // right context for previous replace() operations. int nextIndex = oOutput + IBM.ICU.Text.UTF16.GetCharCount(c); if (nextIndex == output.Length) { tempExtra = IBM.ICU.Text.UTF16.GetCharCount(text.Char32At(limit)); text.Copy(limit, limit + tempExtra, destLimit); } UnicodeReplacer r = data.LookupReplacer(c); if (r == null) { // Accumulate straight (non-segment) text. IBM.ICU.Text.UTF16.Append(buf, c); } else { isComplex = true; // Insert any accumulated straight text. if (buf.Length > 0) { text.Replace(destLimit, destLimit, buf.ToString()); destLimit += buf.Length; buf.Length = 0; } // Delegate output generation to replacer object int len_0 = r.Replace(text, destLimit, destLimit, cursor); destLimit += len_0; } oOutput = nextIndex; } // Insert any accumulated straight text. if (buf.Length > 0) { text.Replace(destLimit, destLimit, buf.ToString()); destLimit += buf.Length; } if (oOutput == cursorPos) { // Record the position of the cursor newStart = destLimit - destStart; // relative to start } outLen = destLimit - destStart; // Copy new text to start, and delete it text.Copy(destStart, destLimit, start); text.Replace(tempStart + outLen, destLimit + tempExtra + outLen, ""); // Delete the old text (the key) text.Replace(start + outLen, limit + outLen, ""); } if (hasCursor) { // Adjust the cursor for positions outside the key. These // refer to code points rather than code units. If cursorPos // is within the output string, then use newStart, which has // already been set above. if (cursorPos < 0) { newStart = start; int n = cursorPos; // Outside the output string, cursorPos counts code points while (n < 0 && newStart > 0) { newStart -= IBM.ICU.Text.UTF16.GetCharCount(text.Char32At(newStart - 1)); ++n; } newStart += n; } else if (cursorPos > output.Length) { newStart = start + outLen; int n_1 = cursorPos - output.Length; // Outside the output string, cursorPos counts code points while (n_1 > 0 && newStart < text.Length()) { newStart += IBM.ICU.Text.UTF16.GetCharCount(text.Char32At(newStart)); --n_1; } newStart += n_1; } else { // Cursor is within output string. It has been set up above // to be relative to start. newStart += start; } cursor[0] = newStart; } return(outLen); }
/// <summary> /// Attempt a match and replacement at the given position. Return the degree /// of match between this rule and the given text. The degree of match may be /// mismatch, a partial match, or a full match. A mismatch means at least one /// character of the text does not match the context or key. A partial match /// means some context and key characters match, but the text is not long /// enough to match all of them. A full match means all context and key /// characters match. /// If a full match is obtained, perform a replacement, update pos, and /// return U_MATCH. Otherwise both text and pos are unchanged. /// </summary> /// /// <param name="text">the text</param> /// <param name="pos">the position indices</param> /// <param name="incremental">if TRUE, test for partial matches that may be completed byadditional text inserted at pos.limit.</param> /// <returns>one of <c>U_MISMATCH</c>, <c>U_PARTIAL_MATCH</c>, or /// <c>U_MATCH</c>. If incremental is FALSE then /// U_PARTIAL_MATCH will not be returned.</returns> public int MatchAndReplace(Replaceable text, Transliterator.Position pos, bool incremental) { // Matching and replacing are done in one method because the // replacement operation needs information obtained during the // match. Another way to do this is to have the match method // create a match result struct with relevant offsets, and to pass // this into the replace method. // ============================ MATCH =========================== // Reset segment match data if (segments != null) { for (int i = 0; i < segments.Length; ++i) { ((StringMatcher)segments[i]).ResetMatch(); } } int keyLimit; int[] intRef = new int[1]; // ------------------------ Ante Context ------------------------ // A mismatch in the ante context, or with the start anchor, // is an outright U_MISMATCH regardless of whether we are // incremental or not. int oText; // offset into 'text' int minOText; // Note (1): We process text in 16-bit code units, rather than // 32-bit code points. This works because stand-ins are // always in the BMP and because we are doing a literal match // operation, which can be done 16-bits at a time. int anteLimit = PosBefore(text, pos.contextStart); int match; // Start reverse match at char before pos.start intRef[0] = PosBefore(text, pos.start); if (anteContext != null) { match = anteContext.Matches(text, intRef, anteLimit, false); if (match != IBM.ICU.Text.UnicodeMatcher_Constants.U_MATCH) { return(IBM.ICU.Text.UnicodeMatcher_Constants.U_MISMATCH); } } oText = intRef[0]; minOText = PosAfter(text, oText); // ------------------------ Start Anchor ------------------------ if (((flags & ANCHOR_START) != 0) && oText != anteLimit) { return(IBM.ICU.Text.UnicodeMatcher_Constants.U_MISMATCH); } // -------------------- Key and Post Context -------------------- intRef[0] = pos.start; if (key != null) { match = key.Matches(text, intRef, pos.limit, incremental); if (match != IBM.ICU.Text.UnicodeMatcher_Constants.U_MATCH) { return(match); } } keyLimit = intRef[0]; if (postContext != null) { if (incremental && keyLimit == pos.limit) { // The key matches just before pos.limit, and there is // a postContext. Since we are in incremental mode, // we must assume more characters may be inserted at // pos.limit -- this is a partial match. return(IBM.ICU.Text.UnicodeMatcher_Constants.U_PARTIAL_MATCH); } match = postContext.Matches(text, intRef, pos.contextLimit, incremental); if (match != IBM.ICU.Text.UnicodeMatcher_Constants.U_MATCH) { return(match); } } oText = intRef[0]; // ------------------------- Stop Anchor ------------------------ if (((flags & ANCHOR_END)) != 0) { if (oText != pos.contextLimit) { return(IBM.ICU.Text.UnicodeMatcher_Constants.U_MISMATCH); } if (incremental) { return(IBM.ICU.Text.UnicodeMatcher_Constants.U_PARTIAL_MATCH); } } // =========================== REPLACE ========================== // We have a full match. The key is between pos.start and // keyLimit. int newLength = output.Replace(text, pos.start, keyLimit, intRef); int lenDelta = newLength - (keyLimit - pos.start); int newStart = intRef[0]; oText += lenDelta; pos.limit += lenDelta; pos.contextLimit += lenDelta; // Restrict new value of start to [minOText, min(oText, pos.limit)]. pos.start = Math.Max(minOText, Math.Min(Math.Min(oText, pos.limit), newStart)); return(IBM.ICU.Text.UnicodeMatcher_Constants.U_MATCH); }