/// <summary> /// <see cref="IUnicodeReplacer"/> API /// </summary> public virtual int Replace(IReplaceable text, int start, int limit, int[] cursor) { // First delegate to subordinate replacer int len = replacer.Replace(text, start, limit, cursor); limit = start + len; // Now transliterate limit = translit.Transliterate(text, start, limit); return(limit - start); }
//= public static UnicodeReplacer valueOf(String output, //= int cursorPos, //= RuleBasedTransliterator.Data data) { //= if (output.length() == 1) { //= char c = output.charAt(0); //= UnicodeReplacer r = data.lookupReplacer(c); //= if (r != null) { //= return r; //= } //= } //= return new StringReplacer(output, cursorPos, data); //= } /// <summary> /// <see cref="IUnicodeReplacer"/> API /// </summary> public virtual int Replace(IReplaceable text, int start, int limit, int[] cursor) { int outLen; int newStart = 0; // NOTE: It should be possible to _always_ run the complex // processing code; just slower. If not, then there is a bug // in the complex processing code. // Simple (no nested replacers) Processing Code : if (!isComplex) { text.Replace(start, limit, output); outLen = output.Length; // Setup default cursor position (for cursorPos within output) newStart = cursorPos; } // Complex (nested replacers) Processing Code : else { /* When there are segments to be copied, use the Replaceable.copy() * API in order to retain out-of-band data. Copy everything to the * end of the string, then copy them back over the key. This preserves * the integrity of indices into the key and surrounding context while * generating the output text. */ StringBuffer buf = new StringBuffer(); int oOutput; // offset into 'output' isComplex = false; // The temporary buffer starts at tempStart, and extends // to destLimit + tempExtra. The start of the buffer has a single // character from before the key. This provides style // data when addition characters are filled into the // temporary buffer. If there is nothing to the left, use // the non-character U+FFFF, which Replaceable subclasses // should treat specially as a "no-style character." // destStart points to the point after the style context // character, so it is tempStart+1 or tempStart+2. int tempStart = text.Length; // start of temp buffer int destStart = tempStart; // copy new text to here if (start > 0) { int len = UTF16.GetCharCount(text.Char32At(start - 1)); text.Copy(start - len, start, tempStart); destStart += len; } else { text.Replace(tempStart, tempStart, "\uFFFF"); destStart++; } int destLimit = destStart; int tempExtra = 0; // temp chars after destLimit for (oOutput = 0; oOutput < output.Length;) { if (oOutput == cursorPos) { // Record the position of the cursor newStart = buf.Length + destLimit - destStart; // relative to start // the buf.length() was inserted for bug 5789 // the problem is that if we are accumulating into a buffer (when r == null below) // then the actual length of the text at that point needs to add the buf length. // there was an alternative suggested in #5789, but that looks like it won't work // if we have accumulated some stuff in the dest part AND have a non-zero buffer. } int c = UTF16.CharAt(output, oOutput); // When we are at the last position copy the right style // context character into the temporary buffer. We don't // do this before because it will provide an incorrect // right context for previous replace() operations. int nextIndex = oOutput + UTF16.GetCharCount(c); if (nextIndex == output.Length) { tempExtra = UTF16.GetCharCount(text.Char32At(limit)); text.Copy(limit, limit + tempExtra, destLimit); } IUnicodeReplacer r = data.LookupReplacer(c); if (r == null) { // Accumulate straight (non-segment) text. UTF16.Append(buf, c); } else { isComplex = true; // Insert any accumulated straight text. if (buf.Length > 0) { text.Replace(destLimit, destLimit, buf.ToString()); destLimit += buf.Length; buf.Length = 0; } // Delegate output generation to replacer object int len = r.Replace(text, destLimit, destLimit, cursor); destLimit += len; } oOutput = nextIndex; } // Insert any accumulated straight text. if (buf.Length > 0) { text.Replace(destLimit, destLimit, buf.ToString()); destLimit += buf.Length; } if (oOutput == cursorPos) { // Record the position of the cursor newStart = destLimit - destStart; // relative to start } outLen = destLimit - destStart; // Copy new text to start, and delete it text.Copy(destStart, destLimit, start); text.Replace(tempStart + outLen, destLimit + tempExtra + outLen, ""); // Delete the old text (the key) text.Replace(start + outLen, limit + outLen, ""); } if (hasCursor) { // Adjust the cursor for positions outside the key. These // refer to code points rather than code units. If cursorPos // is within the output string, then use newStart, which has // already been set above. if (cursorPos < 0) { newStart = start; int n = cursorPos; // Outside the output string, cursorPos counts code points while (n < 0 && newStart > 0) { newStart -= UTF16.GetCharCount(text.Char32At(newStart - 1)); ++n; } newStart += n; } else if (cursorPos > output.Length) { newStart = start + outLen; int n = cursorPos - output.Length; // Outside the output string, cursorPos counts code points while (n > 0 && newStart < text.Length) { newStart += UTF16.GetCharCount(text.Char32At(newStart)); --n; } newStart += n; } else { // Cursor is within output string. It has been set up above // to be relative to start. newStart += start; } cursor[0] = newStart; } return(outLen); }
/** * Attempt a match and replacement at the given position. Return * the degree of match between this rule and the given text. The * degree of match may be mismatch, a partial match, or a full * match. A mismatch means at least one character of the text * does not match the context or key. A partial match means some * context and key characters match, but the text is not long * enough to match all of them. A full match means all context * and key characters match. * * If a full match is obtained, perform a replacement, update pos, * and return U_MATCH. Otherwise both text and pos are unchanged. * * @param text the text * @param pos the position indices * @param incremental if TRUE, test for partial matches that may * be completed by additional text inserted at pos.limit. * @return one of <code>U_MISMATCH</code>, * <code>U_PARTIAL_MATCH</code>, or <code>U_MATCH</code>. If * incremental is FALSE then U_PARTIAL_MATCH will not be returned. */ public virtual MatchDegree MatchAndReplace(IReplaceable text, TransliterationPosition pos, bool incremental) { // Matching and replacing are done in one method because the // replacement operation needs information obtained during the // match. Another way to do this is to have the match method // create a match result struct with relevant offsets, and to pass // this into the replace method. // ============================ MATCH =========================== // Reset segment match data if (segments != null) { for (int i = 0; i < segments.Length; ++i) { ((StringMatcher)segments[i]).ResetMatch(); } } int keyLimit; int[] intRef = new int[1]; // ------------------------ Ante Context ------------------------ // A mismatch in the ante context, or with the start anchor, // is an outright U_MISMATCH regardless of whether we are // incremental or not. int oText; // offset into 'text' int minOText; // Note (1): We process text in 16-bit code units, rather than // 32-bit code points. This works because stand-ins are // always in the BMP and because we are doing a literal match // operation, which can be done 16-bits at a time. int anteLimit = PosBefore(text, pos.ContextStart); MatchDegree match; // Start reverse match at char before pos.start intRef[0] = PosBefore(text, pos.Start); if (anteContext != null) { match = anteContext.Matches(text, intRef, anteLimit, false); if (match != MatchDegree.Match) { return(MatchDegree.Mismatch); } } oText = intRef[0]; minOText = PosAfter(text, oText); // ------------------------ Start Anchor ------------------------ if (((flags & ANCHOR_START) != 0) && oText != anteLimit) { return(MatchDegree.Mismatch); } // -------------------- Key and Post Context -------------------- intRef[0] = pos.Start; if (key != null) { match = key.Matches(text, intRef, pos.Limit, incremental); if (match != MatchDegree.Match) { return(match); } } keyLimit = intRef[0]; if (postContext != null) { if (incremental && keyLimit == pos.Limit) { // The key matches just before pos.limit, and there is // a postContext. Since we are in incremental mode, // we must assume more characters may be inserted at // pos.limit -- this is a partial match. return(MatchDegree.PartialMatch); } match = postContext.Matches(text, intRef, pos.ContextLimit, incremental); if (match != MatchDegree.Match) { return(match); } } oText = intRef[0]; // ------------------------- Stop Anchor ------------------------ if (((flags & ANCHOR_END)) != 0) { if (oText != pos.ContextLimit) { return(MatchDegree.Mismatch); } if (incremental) { return(MatchDegree.PartialMatch); } } // =========================== REPLACE ========================== // We have a full match. The key is between pos.start and // keyLimit. int newLength = output.Replace(text, pos.Start, keyLimit, intRef); int lenDelta = newLength - (keyLimit - pos.Start); int newStart = intRef[0]; oText += lenDelta; pos.Limit += lenDelta; pos.ContextLimit += lenDelta; // Restrict new value of start to [minOText, min(oText, pos.limit)]. pos.Start = Math.Max(minOText, Math.Min(Math.Min(oText, pos.Limit), newStart)); return(MatchDegree.Match); }