コード例 #1
0
        /// <summary>
        /// Default implementation of UnicodeMatcher::matches() for Unicode filters.
        /// Matches a single 16-bit code unit at offset.
        /// </summary>
        ///
        /// @stable ICU 2.0
        public virtual int Matches(Replaceable text, int[] offset, int limit,
                                   bool incremental)
        {
            int c;

            if (offset[0] < limit && Contains(c = text.Char32At(offset[0])))
            {
                offset[0] += IBM.ICU.Text.UTF16.GetCharCount(c);
                return(IBM.ICU.Text.UnicodeMatcher_Constants.U_MATCH);
            }
            if (offset[0] > limit && Contains(c = text.Char32At(offset[0])))
            {
                // Backup offset by 1, unless the preceding character is a
                // surrogate pair -- then backup by 2 (keep offset pointing at
                // the lead surrogate).
                --offset[0];
                if (offset[0] >= 0)
                {
                    offset[0] -= IBM.ICU.Text.UTF16.GetCharCount(text.Char32At(offset[0])) - 1;
                }
                return(IBM.ICU.Text.UnicodeMatcher_Constants.U_MATCH);
            }
            if (incremental && offset[0] == limit)
            {
                return(IBM.ICU.Text.UnicodeMatcher_Constants.U_PARTIAL_MATCH);
            }
            return(IBM.ICU.Text.UnicodeMatcher_Constants.U_MISMATCH);
        }
コード例 #2
0
            /// <summary>
            /// Returns TRUE if there are any more runs. TRUE is always returned at
            /// least once. Upon return, the caller should examine scriptCode, start,
            /// and limit.
            /// </summary>
            ///
            public bool Next()
            {
                int ch;
                int s;

                scriptCode = IBM.ICU.Lang.UScript.INVALID_CODE;     // don't know script yet
                start      = limit;

                // Are we done?
                if (start == textLimit)
                {
                    return(false);
                }

                // Move start back to include adjacent COMMON or INHERITED
                // characters
                while (start > textStart)
                {
                    ch = text.Char32At(start - 1);     // look back
                    s  = IBM.ICU.Lang.UScript.GetScript(ch);
                    if (s == IBM.ICU.Lang.UScript.COMMON || s == IBM.ICU.Lang.UScript.INHERITED)
                    {
                        --start;
                    }
                    else
                    {
                        break;
                    }
                }

                // Move limit ahead to include COMMON, INHERITED, and characters
                // of the current script.
                while (limit < textLimit)
                {
                    ch = text.Char32At(limit);     // look ahead
                    s  = IBM.ICU.Lang.UScript.GetScript(ch);
                    if (s != IBM.ICU.Lang.UScript.COMMON && s != IBM.ICU.Lang.UScript.INHERITED)
                    {
                        if (scriptCode == IBM.ICU.Lang.UScript.INVALID_CODE)
                        {
                            scriptCode = s;
                        }
                        else if (s != scriptCode)
                        {
                            break;
                        }
                    }
                    ++limit;
                }

                // Return TRUE even if the entire text is COMMON / INHERITED, in
                // which case scriptCode will be UScript.INVALID_CODE.
                return(true);
            }
コード例 #3
0
        /// <summary>
        /// Transliterate the given text with the given UTransPosition indices.
        /// Return TRUE if the transliteration should continue or FALSE if it should
        /// halt (because of a U_PARTIAL_MATCH match). Note that FALSE is only ever
        /// returned if isIncremental is TRUE.
        /// </summary>
        ///
        /// <param name="text">the text to be transliterated</param>
        /// <param name="pos">the position indices, which will be updated</param>
        /// <param name="incremental">if TRUE, assume new text may be inserted at index.limit, andreturn FALSE if thre is a partial match.</param>
        /// <returns>TRUE unless a U_PARTIAL_MATCH has been obtained, indicating that
        /// transliteration should stop until more text arrives.</returns>
        public bool Transliterate(Replaceable text, Transliterator.Position pos,
                                  bool incremental)
        {
            int indexByte = text.Char32At(pos.start) & 0xFF;

            for (int i = index[indexByte]; i < index[indexByte + 1]; ++i)
            {
                int m = rules[i].MatchAndReplace(text, pos, incremental);
                switch (m)
                {
                case IBM.ICU.Text.UnicodeMatcher_Constants.U_MATCH:
                    if (IBM.ICU.Text.Transliterator.DEBUG)
                    {
                        System.Console.Out.WriteLine(((incremental) ? "Rule.i: match "
                                    : "Rule: match ")
                                                     + rules[i].ToRule(true)
                                                     + " => "
                                                     + IBM.ICU.Impl.UtilityExtensions.FormatInput(text, pos));
                    }
                    return(true);

                case IBM.ICU.Text.UnicodeMatcher_Constants.U_PARTIAL_MATCH:
                    if (IBM.ICU.Text.Transliterator.DEBUG)
                    {
                        System.Console.Out
                        .WriteLine(((incremental) ? "Rule.i: partial match "
                                            : "Rule: partial match ")
                                   + rules[i].ToRule(true)
                                   + " => "
                                   + IBM.ICU.Impl.UtilityExtensions.FormatInput(text, pos));
                    }
                    return(false);

                default:
                    if (IBM.ICU.Text.Transliterator.DEBUG)
                    {
                        System.Console.Out.WriteLine("Rule: no match " + rules[i]);
                    }
                    break;
                }
            }
            // No match or partial match from any rule
            pos.start += IBM.ICU.Text.UTF16.GetCharCount(text.Char32At(pos.start));
            if (IBM.ICU.Text.Transliterator.DEBUG)
            {
                System.Console.Out.WriteLine(((incremental) ? "Rule.i: no match => "
                            : "Rule: no match => ")
                                             + IBM.ICU.Impl.UtilityExtensions.FormatInput(text, pos));
            }
            return(true);
        }
コード例 #4
0
        /// <summary>
        /// Implements <see cref="M:IBM.ICU.Text.Transliterator.HandleTransliterate(IBM.ICU.Text.Replaceable, null, System.Boolean)"/>.
        /// </summary>
        ///
        protected internal override void HandleTransliterate(Replaceable text, Transliterator.Position offsets,
                                                             bool isIncremental)
        {
            int start = offsets.start;
            int limit = offsets.limit;

            if (start >= limit)
            {
                return;
            }

            int overallDelta = 0;

            // Walk through the string looking for safe characters.
            // Whenever you hit one normalize from the start of the last
            // safe character up to just before the next safe character
            // Also, if you hit the end and we are not in incremental mode,
            // do to end.

            // TODO: fix for surrogates
            // TODO: add QuickCheck, so we rarely convert OK stuff

            int lastSafe = start;     // go back to start in any event
            int cp;

            for (int i = start + 1; i < limit; i += IBM.ICU.Text.UTF16.GetCharCount(cp))
            {
                cp = text.Char32At(i);
                if (IBM.ICU.Lang.UCharacter.GetCombiningClass(cp) == 0 &&
                    !unsafeStart.Contains(cp))
                {
                    int delta = Convert(text, lastSafe, i, null);
                    i            += delta;
                    limit        += delta;
                    overallDelta += delta;
                    lastSafe      = i;
                }
            }
            if (!isIncremental)
            {
                int delta_0 = Convert(text, lastSafe, limit, null);
                overallDelta += delta_0;
                lastSafe      = limit + delta_0;
            }
            else
            {
                // We are incremental, so accept the last characters IF they turn
                // into skippables
                int delta_1 = Convert(text, lastSafe, limit, skippable);
                if (delta_1 != Int32.MinValue)
                {
                    overallDelta += delta_1;
                    lastSafe      = limit + delta_1;
                }
            }
            offsets.contextLimit += overallDelta;
            offsets.limit        += overallDelta;
            offsets.start         = lastSafe;
        }
コード例 #5
0
        /// <summary>
        /// Implements <see cref="M:IBM.ICU.Text.Transliterator.HandleTransliterate(IBM.ICU.Text.Replaceable, null, System.Boolean)"/>.
        /// </summary>
        ///
        protected internal override void HandleTransliterate(Replaceable text, Transliterator.Position pos,
                                                             bool incremental)
        {
            int start = pos.start;
            int limit = pos.limit;

            StringBuilder buf        = new StringBuilder(prefix);
            int           prefixLen  = prefix.Length;
            bool          redoPrefix = false;

            while (start < limit)
            {
                int c       = (grokSupplementals) ? (int)(text.Char32At(start)) : (int)(text.CharAt(start));
                int charLen = (grokSupplementals) ? IBM.ICU.Text.UTF16.GetCharCount(c) : 1;

                if ((c & -65536) != 0 && supplementalHandler != null)
                {
                    buf.Length = 0;
                    buf.Append(supplementalHandler.prefix);
                    IBM.ICU.Impl.Utility.AppendNumber(buf, c, supplementalHandler.radix,
                                                      supplementalHandler.minDigits);
                    buf.Append(supplementalHandler.suffix);
                    redoPrefix = true;
                }
                else
                {
                    if (redoPrefix)
                    {
                        buf.Length = 0;
                        buf.Append(prefix);
                        redoPrefix = false;
                    }
                    else
                    {
                        buf.Length = prefixLen;
                    }
                    IBM.ICU.Impl.Utility.AppendNumber(buf, c, radix, minDigits);
                    buf.Append(suffix);
                }

                text.Replace(start, start + charLen, buf.ToString());
                start += buf.Length;
                limit += buf.Length - charLen;
            }

            pos.contextLimit += limit - pos.limit;
            pos.limit         = limit;
            pos.start         = start;
        }
コード例 #6
0
        /// <summary>
        /// Implements <see cref="M:IBM.ICU.Text.Transliterator.HandleTransliterate(IBM.ICU.Text.Replaceable, null, System.Boolean)"/>.
        /// </summary>
        ///
        protected internal override void HandleTransliterate(Replaceable text, Transliterator.Position offsets,
                                                             bool isIncremental)
        {
            int cursor = offsets.start;
            int limit  = offsets.limit;

            StringBuilder str = new StringBuilder();

            str.Append(OPEN_DELIM);
            int    len;
            String name;

            while (cursor < limit)
            {
                int c = text.Char32At(cursor);
                if ((name = IBM.ICU.Lang.UCharacter.GetExtendedName(c)) != null)
                {
                    str.Length = OPEN_DELIM_LEN;
                    str.Append(name).Append(CLOSE_DELIM);

                    int clen = IBM.ICU.Text.UTF16.GetCharCount(c);
                    text.Replace(cursor, cursor + clen, str.ToString());
                    len     = str.Length;
                    cursor += len;        // advance cursor by 1 and adjust for new text
                    limit  += len - clen; // change in length
                }
                else
                {
                    ++cursor;
                }
            }

            offsets.contextLimit += limit - offsets.limit;
            offsets.limit         = limit;
            offsets.start         = cursor;
        }
コード例 #7
0
        /// <summary>
        /// Implements <see cref="M:IBM.ICU.Text.Transliterator.HandleTransliterate(IBM.ICU.Text.Replaceable, null, System.Boolean)"/>.
        /// </summary>
        ///
        protected internal override void HandleTransliterate(Replaceable text, Transliterator.Position offsets,
                                                             bool isIncremental)
        {
            int maxLen = IBM.ICU.Impl.UCharacterName.GetInstance().GetMaxCharNameLength() + 1;     // allow
            // for
            // temporary
            // trailing
            // space

            StringBuilder name = new StringBuilder(maxLen);

            // Get the legal character set
            UnicodeSet legal = new UnicodeSet();

            IBM.ICU.Impl.UCharacterName.GetInstance().GetCharNameCharacters(legal);

            int cursor = offsets.start;
            int limit  = offsets.limit;

            // Modes:
            // 0 - looking for open delimiter
            // 1 - after open delimiter
            int mode    = 0;
            int openPos = -1;     // open delim candidate pos

            int c;

            while (cursor < limit)
            {
                c = text.Char32At(cursor);

                switch (mode)
                {
                case 0:                  // looking for open delimiter
                    if (c == OPEN_DELIM) // quick check first
                    {
                        openPos = cursor;
                        int i = IBM.ICU.Impl.Utility.ParsePattern(OPEN_PAT, text, cursor, limit);
                        if (i >= 0 && i < limit)
                        {
                            mode        = 1;
                            name.Length = 0;
                            cursor      = i;
                            continue;     // *** reprocess char32At(cursor)
                        }
                    }
                    break;

                case 1:     // after open delimiter
                    // Look for legal chars. If \s+ is found, convert it
                    // to a single space. If closeDelimiter is found, exit
                    // the loop. If any other character is found, exit the
                    // loop. If the limit is reached, exit the loop.

                    // Convert \s+ => SPACE. This assumes there are no
                    // runs of >1 space characters in names.
                    if (IBM.ICU.Impl.UCharacterProperty.IsRuleWhiteSpace(c))
                    {
                        // Ignore leading whitespace
                        if (name.Length > 0 &&
                            name[name.Length - 1] != SPACE)
                        {
                            name.Append(SPACE);
                            // If we are too long then abort. maxLen includes
                            // temporary trailing space, so use '>'.
                            if (name.Length > maxLen)
                            {
                                mode = 0;
                            }
                        }
                        break;
                    }

                    if (c == CLOSE_DELIM)
                    {
                        int len = name.Length;

                        // Delete trailing space, if any
                        if (len > 0 && name[len - 1] == SPACE)
                        {
                            name.Length = --len;
                        }

                        c = IBM.ICU.Lang.UCharacter.GetCharFromExtendedName(name.ToString());
                        if (c != -1)
                        {
                            // Lookup succeeded

                            // assert(UTF16.getCharCount(CLOSE_DELIM) == 1);
                            cursor++;     // advance over CLOSE_DELIM

                            String str = IBM.ICU.Text.UTF16.ValueOf(c);
                            text.Replace(openPos, cursor, str);

                            // Adjust indices for the change in the length of
                            // the string. Do not assume that str.length() ==
                            // 1, in case of surrogates.
                            int delta = cursor - openPos - str.Length;
                            cursor -= delta;
                            limit  -= delta;
                            // assert(cursor == openPos + str.length());
                        }
                        // If the lookup failed, we leave things as-is and
                        // still switch to mode 0 and continue.
                        mode    = 0;
                        openPos = -1; // close off candidate
                        continue;     // *** reprocess char32At(cursor)
                    }

                    if (legal.Contains(c))
                    {
                        IBM.ICU.Text.UTF16.Append(name, c);
                        // If we go past the longest possible name then abort.
                        // maxLen includes temporary trailing space, so use '>='.
                        if (name.Length >= maxLen)
                        {
                            mode = 0;
                        }
                    }

                    // Invalid character
                    else
                    {
                        --cursor;     // Backup and reprocess this character
                        mode = 0;
                    }

                    break;
                }

                cursor += IBM.ICU.Text.UTF16.GetCharCount(c);
            }

            offsets.contextLimit += limit - offsets.limit;
            offsets.limit         = limit;
            // In incremental mode, only advance the cursor up to the last
            // open delimiter candidate.
            offsets.start = (isIncremental && openPos >= 0) ? openPos : cursor;
        }
コード例 #8
0
        // = public static UnicodeReplacer valueOf(String output,
        // = int cursorPos,
        // = RuleBasedTransliterator.Data data) {
        // = if (output.length() == 1) {
        // = char c = output.charAt(0);
        // = UnicodeReplacer r = data.lookupReplacer(c);
        // = if (r != null) {
        // = return r;
        // = }
        // = }
        // = return new StringReplacer(output, cursorPos, data);
        // = }

        /// <summary>
        /// UnicodeReplacer API
        /// </summary>
        ///
        public virtual int Replace(Replaceable text, int start, int limit, int[] cursor)
        {
            int outLen;
            int newStart = 0;

            // NOTE: It should be possible to _always_ run the complex
            // processing code; just slower. If not, then there is a bug
            // in the complex processing code.

            // Simple (no nested replacers) Processing Code :
            if (!isComplex)
            {
                text.Replace(start, limit, output);
                outLen = output.Length;

                // Setup default cursor position (for cursorPos within output)
                newStart = cursorPos;
            }

            // Complex (nested replacers) Processing Code :
            else
            {
                /*
                 * When there are segments to be copied, use the Replaceable.copy()
                 * API in order to retain out-of-band data. Copy everything to the
                 * end of the string, then copy them back over the key. This
                 * preserves the integrity of indices into the key and surrounding
                 * context while generating the output text.
                 */
                StringBuilder buf = new StringBuilder();
                int           oOutput; // offset into 'output'
                isComplex = false;

                // The temporary buffer starts at tempStart, and :
                // to destLimit + tempExtra. The start of the buffer has a single
                // character from before the key. This provides style
                // data when addition characters are filled into the
                // temporary buffer. If there is nothing to the left, use
                // the non-character U+FFFF, which Replaceable subclasses
                // should treat specially as a "no-style character."
                // destStart points to the point after the style context
                // character, so it is tempStart+1 or tempStart+2.
                int tempStart = text.Length(); // start of temp buffer
                int destStart = tempStart;     // copy new text to here
                if (start > 0)
                {
                    int len = IBM.ICU.Text.UTF16.GetCharCount(text.Char32At(start - 1));
                    text.Copy(start - len, start, tempStart);
                    destStart += len;
                }
                else
                {
                    text.Replace(tempStart, tempStart, "\uFFFF");
                    destStart++;
                }
                int destLimit = destStart;
                int tempExtra = 0;     // temp chars after destLimit

                for (oOutput = 0; oOutput < output.Length;)
                {
                    if (oOutput == cursorPos)
                    {
                        // Record the position of the cursor
                        newStart = buf.Length + destLimit - destStart;     // relative
                                                                           // to start
                        // the buf.length() was inserted for bug 5789
                        // the problem is that if we are accumulating into a buffer
                        // (when r == null below)
                        // then the actual length of the text at that point needs to
                        // add the buf length.
                        // there was an alternative suggested in #5789, but that
                        // looks like it won't work
                        // if we have accumulated some stuff in the dest part AND
                        // have a non-zero buffer.
                    }
                    int c = IBM.ICU.Text.UTF16.CharAt(output, oOutput);

                    // When we are at the last position copy the right style
                    // context character into the temporary buffer. We don't
                    // do this before because it will provide an incorrect
                    // right context for previous replace() operations.
                    int nextIndex = oOutput + IBM.ICU.Text.UTF16.GetCharCount(c);
                    if (nextIndex == output.Length)
                    {
                        tempExtra = IBM.ICU.Text.UTF16.GetCharCount(text.Char32At(limit));
                        text.Copy(limit, limit + tempExtra, destLimit);
                    }

                    UnicodeReplacer r = data.LookupReplacer(c);
                    if (r == null)
                    {
                        // Accumulate straight (non-segment) text.
                        IBM.ICU.Text.UTF16.Append(buf, c);
                    }
                    else
                    {
                        isComplex = true;

                        // Insert any accumulated straight text.
                        if (buf.Length > 0)
                        {
                            text.Replace(destLimit, destLimit, buf.ToString());
                            destLimit += buf.Length;
                            buf.Length = 0;
                        }

                        // Delegate output generation to replacer object
                        int len_0 = r.Replace(text, destLimit, destLimit, cursor);
                        destLimit += len_0;
                    }
                    oOutput = nextIndex;
                }
                // Insert any accumulated straight text.
                if (buf.Length > 0)
                {
                    text.Replace(destLimit, destLimit, buf.ToString());
                    destLimit += buf.Length;
                }
                if (oOutput == cursorPos)
                {
                    // Record the position of the cursor
                    newStart = destLimit - destStart;     // relative to start
                }

                outLen = destLimit - destStart;

                // Copy new text to start, and delete it
                text.Copy(destStart, destLimit, start);
                text.Replace(tempStart + outLen, destLimit + tempExtra + outLen, "");

                // Delete the old text (the key)
                text.Replace(start + outLen, limit + outLen, "");
            }

            if (hasCursor)
            {
                // Adjust the cursor for positions outside the key. These
                // refer to code points rather than code units. If cursorPos
                // is within the output string, then use newStart, which has
                // already been set above.
                if (cursorPos < 0)
                {
                    newStart = start;
                    int n = cursorPos;
                    // Outside the output string, cursorPos counts code points
                    while (n < 0 && newStart > 0)
                    {
                        newStart -= IBM.ICU.Text.UTF16.GetCharCount(text.Char32At(newStart - 1));
                        ++n;
                    }
                    newStart += n;
                }
                else if (cursorPos > output.Length)
                {
                    newStart = start + outLen;
                    int n_1 = cursorPos - output.Length;
                    // Outside the output string, cursorPos counts code points
                    while (n_1 > 0 && newStart < text.Length())
                    {
                        newStart += IBM.ICU.Text.UTF16.GetCharCount(text.Char32At(newStart));
                        --n_1;
                    }
                    newStart += n_1;
                }
                else
                {
                    // Cursor is within output string. It has been set up above
                    // to be relative to start.
                    newStart += start;
                }

                cursor[0] = newStart;
            }

            return(outLen);
        }
コード例 #9
0
        /// <summary>
        /// Implements <see cref="M:IBM.ICU.Text.Transliterator.HandleTransliterate(IBM.ICU.Text.Replaceable, null, System.Boolean)"/>.
        /// </summary>
        ///
        protected internal override void HandleTransliterate(Replaceable text, Transliterator.Position pos,
                                                             bool isIncremental)
        {
            int start = pos.start;
            int limit = pos.limit;
            int i, j, ipat;

            loop : {
                while (start < limit)
                {
                    // Loop over the forms in spec[]. Exit this loop when we
                    // match one of the specs. Exit the outer loop if a
                    // partial match is detected and isIncremental is true.
                    for (j = 0, ipat = 0; spec[ipat] != END; ++j)
                    {
                        // Read the header
                        int prefixLen = spec[ipat++];
                        int suffixLen = spec[ipat++];
                        int radix     = spec[ipat++];
                        int minDigits = spec[ipat++];
                        int maxDigits = spec[ipat++];

                        // s is a copy of start that is advanced over the
                        // characters as we parse them.
                        int  s     = start;
                        bool match = true;

                        for (i = 0; i < prefixLen; ++i)
                        {
                            if (s >= limit)
                            {
                                if (i > 0)
                                {
                                    // We've already matched a character. This is
                                    // a partial match, so we return if in
                                    // incremental mode. In non-incremental mode,
                                    // go to the next spec.
                                    if (isIncremental)
                                    {
                                        goto gotoloop;
                                    }
                                    match = false;
                                    break;
                                }
                            }
                            char c = text.CharAt(s++);
                            if (c != spec[ipat + i])
                            {
                                match = false;
                                break;
                            }
                        }

                        if (match)
                        {
                            int u          = 0;
                            int digitCount = 0;
                            for (;;)
                            {
                                if (s >= limit)
                                {
                                    // Check for partial match in incremental mode.
                                    if (s > start && isIncremental)
                                    {
                                        goto gotoloop;
                                    }
                                    break;
                                }
                                int ch    = text.Char32At(s);
                                int digit = IBM.ICU.Lang.UCharacter.Digit(ch, radix);
                                if (digit < 0)
                                {
                                    break;
                                }
                                s += IBM.ICU.Text.UTF16.GetCharCount(ch);
                                u  = (u * radix) + digit;
                                if (++digitCount == maxDigits)
                                {
                                    break;
                                }
                            }

                            match = (digitCount >= minDigits);

                            if (match)
                            {
                                for (i = 0; i < suffixLen; ++i)
                                {
                                    if (s >= limit)
                                    {
                                        // Check for partial match in incremental mode.
                                        if (s > start && isIncremental)
                                        {
                                            goto gotoloop;
                                        }
                                        match = false;
                                        break;
                                    }
                                    char c_0 = text.CharAt(s++);
                                    if (c_0 != spec[ipat + prefixLen + i])
                                    {
                                        match = false;
                                        break;
                                    }
                                }

                                if (match)
                                {
                                    // At this point, we have a match
                                    String str = IBM.ICU.Text.UTF16.ValueOf(u);
                                    text.Replace(start, s, str);
                                    limit -= s - start - str.Length;
                                    // The following break statement leaves the
                                    // loop that is traversing the forms in
                                    // spec[]. We then parse the next input
                                    // character.
                                    break;
                                }
                            }
                        }

                        ipat += prefixLen + suffixLen;
                    }

                    if (start < limit)
                    {
                        start += IBM.ICU.Text.UTF16.GetCharCount(text.Char32At(start));
                    }
                }
            }
gotoloop:
            ;

            pos.contextLimit += limit - pos.limit;
            pos.limit         = limit;
            pos.start         = start;
        }
コード例 #10
0
 static internal int PosAfter(Replaceable str, int pos)
 {
     return((pos >= 0 && pos < str.Length()) ? pos
            + IBM.ICU.Text.UTF16.GetCharCount(str.Char32At(pos)) : pos + 1);
 }
コード例 #11
0
 static internal int PosBefore(Replaceable str, int pos)
 {
     return((pos > 0) ? pos - IBM.ICU.Text.UTF16.GetCharCount(str.Char32At(pos - 1))
                 : pos - 1);
 }
コード例 #12
0
        /// <summary>
        /// Implements <see cref="M:IBM.ICU.Text.Transliterator.HandleTransliterate(IBM.ICU.Text.Replaceable, null, System.Boolean)"/>.
        /// </summary>
        ///
        protected internal override void HandleTransliterate(Replaceable text, Transliterator.Position offsets,
                                                             bool isIncremental)
        {
            // TODO reimplement, see ustrcase.c
            // using a real word break iterator
            // instead of just looking for a transition between cased and uncased
            // characters
            // call CaseMapTransliterator::handleTransliterate() for lowercasing?
            // (set fMap)
            // needs to take isIncremental into account because case mappings are
            // context-sensitive
            // also detect when lowercasing function did not finish because of
            // context

            if (offsets.start >= offsets.limit)
            {
                return;
            }

            // case type: >0 cased (UCaseProps.LOWER etc.) ==0 uncased <0
            // case-ignorable
            int type;

            // Our mode; we are either converting letter toTitle or
            // toLower.
            bool doTitle = true;

            // Determine if there is a preceding context of cased case-ignorable*,
            // in which case we want to start in toLower mode. If the
            // prior context is anything else (including empty) then start
            // in toTitle mode.
            int c, start;

            for (start = offsets.start - 1; start >= offsets.contextStart; start -= IBM.ICU.Text.UTF16
                                                                                    .GetCharCount(c))
            {
                c    = text.Char32At(start);
                type = csp.GetTypeOrIgnorable(c);
                if (type > 0)       // cased
                {
                    doTitle = false;
                    break;
                }
                else if (type == 0)         // uncased but not ignorable
                {
                    break;
                }
                // else (type<0) case-ignorable: continue
            }

            // Convert things after a cased character toLower; things
            // after a uncased, non-case-ignorable character toTitle. Case-ignorable
            // characters are copied directly and do not change the mode.

            iter.SetText(text);
            iter.SetIndex(offsets.start);
            iter.SetLimit(offsets.limit);
            iter.SetContextLimits(offsets.contextStart, offsets.contextLimit);

            result.Length = 0;

            // Walk through original string
            // If there is a case change, modify corresponding position in
            // replaceable
            int delta;

            while ((c = iter.NextCaseMapCP()) >= 0)
            {
                type = csp.GetTypeOrIgnorable(c);
                if (type >= 0)       // not case-ignorable
                {
                    if (doTitle)
                    {
                        c = csp.ToFullTitle(c, iter, result, locale, locCache);
                    }
                    else
                    {
                        c = csp.ToFullLower(c, iter, result, locale, locCache);
                    }
                    doTitle = type == 0;     // doTitle=isUncased

                    if (iter.DidReachLimit() && isIncremental)
                    {
                        // the case mapping function tried to look beyond the
                        // context limit
                        // wait for more input
                        offsets.start = iter.GetCaseMapCPStart();
                        return;
                    }

                    /* decode the result */
                    if (c < 0)
                    {
                        /* c mapped to itself, no change */
                        continue;
                    }
                    else if (c <= IBM.ICU.Impl.UCaseProps.MAX_STRING_LENGTH)
                    {
                        /* replace by the mapping string */
                        delta         = iter.Replace(result.ToString());
                        result.Length = 0;
                    }
                    else
                    {
                        /* replace by single-code point mapping */
                        delta = iter.Replace(IBM.ICU.Text.UTF16.ValueOf(c));
                    }

                    if (delta != 0)
                    {
                        offsets.limit        += delta;
                        offsets.contextLimit += delta;
                    }
                }
            }
            offsets.start = offsets.limit;
        }