Ejemplo n.º 1
0
        /// <summary>
        /// <see cref="IUnicodeReplacer"/> API
        /// </summary>
        public virtual int Replace(IReplaceable text,
                                   int start,
                                   int limit,
                                   int[] cursor)
        {
            // First delegate to subordinate replacer
            int len = replacer.Replace(text, start, limit, cursor);

            limit = start + len;

            // Now transliterate
            limit = translit.Transliterate(text, start, limit);

            return(limit - start);
        }
Ejemplo n.º 2
0
        //=    public static UnicodeReplacer valueOf(String output,
        //=                                          int cursorPos,
        //=                                          RuleBasedTransliterator.Data data) {
        //=        if (output.length() == 1) {
        //=            char c = output.charAt(0);
        //=            UnicodeReplacer r = data.lookupReplacer(c);
        //=            if (r != null) {
        //=                return r;
        //=            }
        //=        }
        //=        return new StringReplacer(output, cursorPos, data);
        //=    }

        /// <summary>
        /// <see cref="IUnicodeReplacer"/> API
        /// </summary>
        public virtual int Replace(IReplaceable text,
                                   int start,
                                   int limit,
                                   int[] cursor)
        {
            int outLen;
            int newStart = 0;

            // NOTE: It should be possible to _always_ run the complex
            // processing code; just slower.  If not, then there is a bug
            // in the complex processing code.

            // Simple (no nested replacers) Processing Code :
            if (!isComplex)
            {
                text.Replace(start, limit, output);
                outLen = output.Length;

                // Setup default cursor position (for cursorPos within output)
                newStart = cursorPos;
            }

            // Complex (nested replacers) Processing Code :
            else
            {
                /* When there are segments to be copied, use the Replaceable.copy()
                 * API in order to retain out-of-band data.  Copy everything to the
                 * end of the string, then copy them back over the key.  This preserves
                 * the integrity of indices into the key and surrounding context while
                 * generating the output text.
                 */
                StringBuffer buf = new StringBuffer();
                int          oOutput; // offset into 'output'
                isComplex = false;

                // The temporary buffer starts at tempStart, and extends
                // to destLimit + tempExtra.  The start of the buffer has a single
                // character from before the key.  This provides style
                // data when addition characters are filled into the
                // temporary buffer.  If there is nothing to the left, use
                // the non-character U+FFFF, which Replaceable subclasses
                // should treat specially as a "no-style character."
                // destStart points to the point after the style context
                // character, so it is tempStart+1 or tempStart+2.
                int tempStart = text.Length; // start of temp buffer
                int destStart = tempStart;   // copy new text to here
                if (start > 0)
                {
                    int len = UTF16.GetCharCount(text.Char32At(start - 1));
                    text.Copy(start - len, start, tempStart);
                    destStart += len;
                }
                else
                {
                    text.Replace(tempStart, tempStart, "\uFFFF");
                    destStart++;
                }
                int destLimit = destStart;
                int tempExtra = 0; // temp chars after destLimit

                for (oOutput = 0; oOutput < output.Length;)
                {
                    if (oOutput == cursorPos)
                    {
                        // Record the position of the cursor
                        newStart = buf.Length + destLimit - destStart; // relative to start
                                                                       // the buf.length() was inserted for bug 5789
                                                                       // the problem is that if we are accumulating into a buffer (when r == null below)
                                                                       // then the actual length of the text at that point needs to add the buf length.
                                                                       // there was an alternative suggested in #5789, but that looks like it won't work
                                                                       // if we have accumulated some stuff in the dest part AND have a non-zero buffer.
                    }
                    int c = UTF16.CharAt(output, oOutput);

                    // When we are at the last position copy the right style
                    // context character into the temporary buffer.  We don't
                    // do this before because it will provide an incorrect
                    // right context for previous replace() operations.
                    int nextIndex = oOutput + UTF16.GetCharCount(c);
                    if (nextIndex == output.Length)
                    {
                        tempExtra = UTF16.GetCharCount(text.Char32At(limit));
                        text.Copy(limit, limit + tempExtra, destLimit);
                    }

                    IUnicodeReplacer r = data.LookupReplacer(c);
                    if (r == null)
                    {
                        // Accumulate straight (non-segment) text.
                        UTF16.Append(buf, c);
                    }
                    else
                    {
                        isComplex = true;

                        // Insert any accumulated straight text.
                        if (buf.Length > 0)
                        {
                            text.Replace(destLimit, destLimit, buf.ToString());
                            destLimit += buf.Length;
                            buf.Length = 0;
                        }

                        // Delegate output generation to replacer object
                        int len = r.Replace(text, destLimit, destLimit, cursor);
                        destLimit += len;
                    }
                    oOutput = nextIndex;
                }
                // Insert any accumulated straight text.
                if (buf.Length > 0)
                {
                    text.Replace(destLimit, destLimit, buf.ToString());
                    destLimit += buf.Length;
                }
                if (oOutput == cursorPos)
                {
                    // Record the position of the cursor
                    newStart = destLimit - destStart; // relative to start
                }

                outLen = destLimit - destStart;

                // Copy new text to start, and delete it
                text.Copy(destStart, destLimit, start);
                text.Replace(tempStart + outLen, destLimit + tempExtra + outLen, "");

                // Delete the old text (the key)
                text.Replace(start + outLen, limit + outLen, "");
            }

            if (hasCursor)
            {
                // Adjust the cursor for positions outside the key.  These
                // refer to code points rather than code units.  If cursorPos
                // is within the output string, then use newStart, which has
                // already been set above.
                if (cursorPos < 0)
                {
                    newStart = start;
                    int n = cursorPos;
                    // Outside the output string, cursorPos counts code points
                    while (n < 0 && newStart > 0)
                    {
                        newStart -= UTF16.GetCharCount(text.Char32At(newStart - 1));
                        ++n;
                    }
                    newStart += n;
                }
                else if (cursorPos > output.Length)
                {
                    newStart = start + outLen;
                    int n = cursorPos - output.Length;
                    // Outside the output string, cursorPos counts code points
                    while (n > 0 && newStart < text.Length)
                    {
                        newStart += UTF16.GetCharCount(text.Char32At(newStart));
                        --n;
                    }
                    newStart += n;
                }
                else
                {
                    // Cursor is within output string.  It has been set up above
                    // to be relative to start.
                    newStart += start;
                }

                cursor[0] = newStart;
            }

            return(outLen);
        }
Ejemplo n.º 3
0
        /**
         * Attempt a match and replacement at the given position.  Return
         * the degree of match between this rule and the given text.  The
         * degree of match may be mismatch, a partial match, or a full
         * match.  A mismatch means at least one character of the text
         * does not match the context or key.  A partial match means some
         * context and key characters match, but the text is not long
         * enough to match all of them.  A full match means all context
         * and key characters match.
         *
         * If a full match is obtained, perform a replacement, update pos,
         * and return U_MATCH.  Otherwise both text and pos are unchanged.
         *
         * @param text the text
         * @param pos the position indices
         * @param incremental if TRUE, test for partial matches that may
         * be completed by additional text inserted at pos.limit.
         * @return one of <code>U_MISMATCH</code>,
         * <code>U_PARTIAL_MATCH</code>, or <code>U_MATCH</code>.  If
         * incremental is FALSE then U_PARTIAL_MATCH will not be returned.
         */
        public virtual MatchDegree MatchAndReplace(IReplaceable text,
                                                   TransliterationPosition pos,
                                                   bool incremental)
        {
            // Matching and replacing are done in one method because the
            // replacement operation needs information obtained during the
            // match.  Another way to do this is to have the match method
            // create a match result struct with relevant offsets, and to pass
            // this into the replace method.

            // ============================ MATCH ===========================

            // Reset segment match data
            if (segments != null)
            {
                for (int i = 0; i < segments.Length; ++i)
                {
                    ((StringMatcher)segments[i]).ResetMatch();
                }
            }

            int keyLimit;

            int[] intRef = new int[1];

            // ------------------------ Ante Context ------------------------

            // A mismatch in the ante context, or with the start anchor,
            // is an outright U_MISMATCH regardless of whether we are
            // incremental or not.
            int oText; // offset into 'text'
            int minOText;

            // Note (1): We process text in 16-bit code units, rather than
            // 32-bit code points.  This works because stand-ins are
            // always in the BMP and because we are doing a literal match
            // operation, which can be done 16-bits at a time.

            int anteLimit = PosBefore(text, pos.ContextStart);

            MatchDegree match;

            // Start reverse match at char before pos.start
            intRef[0] = PosBefore(text, pos.Start);

            if (anteContext != null)
            {
                match = anteContext.Matches(text, intRef, anteLimit, false);
                if (match != MatchDegree.Match)
                {
                    return(MatchDegree.Mismatch);
                }
            }

            oText = intRef[0];

            minOText = PosAfter(text, oText);

            // ------------------------ Start Anchor ------------------------

            if (((flags & ANCHOR_START) != 0) && oText != anteLimit)
            {
                return(MatchDegree.Mismatch);
            }

            // -------------------- Key and Post Context --------------------

            intRef[0] = pos.Start;

            if (key != null)
            {
                match = key.Matches(text, intRef, pos.Limit, incremental);
                if (match != MatchDegree.Match)
                {
                    return(match);
                }
            }

            keyLimit = intRef[0];

            if (postContext != null)
            {
                if (incremental && keyLimit == pos.Limit)
                {
                    // The key matches just before pos.limit, and there is
                    // a postContext.  Since we are in incremental mode,
                    // we must assume more characters may be inserted at
                    // pos.limit -- this is a partial match.
                    return(MatchDegree.PartialMatch);
                }

                match = postContext.Matches(text, intRef, pos.ContextLimit, incremental);
                if (match != MatchDegree.Match)
                {
                    return(match);
                }
            }

            oText = intRef[0];

            // ------------------------- Stop Anchor ------------------------

            if (((flags & ANCHOR_END)) != 0)
            {
                if (oText != pos.ContextLimit)
                {
                    return(MatchDegree.Mismatch);
                }
                if (incremental)
                {
                    return(MatchDegree.PartialMatch);
                }
            }

            // =========================== REPLACE ==========================

            // We have a full match.  The key is between pos.start and
            // keyLimit.

            int newLength = output.Replace(text, pos.Start, keyLimit, intRef);
            int lenDelta  = newLength - (keyLimit - pos.Start);
            int newStart  = intRef[0];

            oText            += lenDelta;
            pos.Limit        += lenDelta;
            pos.ContextLimit += lenDelta;
            // Restrict new value of start to [minOText, min(oText, pos.limit)].
            pos.Start = Math.Max(minOText, Math.Min(Math.Min(oText, pos.Limit), newStart));
            return(MatchDegree.Match);
        }