Example #1
0
        /// <summary>
        /// Implements <see cref="M:IBM.ICU.Text.Transliterator.HandleTransliterate(IBM.ICU.Text.Replaceable, null, System.Boolean)"/>.
        /// </summary>
        ///
        protected internal override void HandleTransliterate(Replaceable text, Transliterator.Position index,
                                                             bool incremental)
        {
            // Our caller (filteredTransliterate) has already narrowed us
            // to an unfiltered run. Delete it.
            text.Replace(index.start, index.limit, "");
            int len = index.limit - index.start;

            index.contextLimit -= len;
            index.limit        -= len;
        }
        /// <summary>
        /// Implements <see cref="M:IBM.ICU.Text.Transliterator.HandleTransliterate(IBM.ICU.Text.Replaceable, null, System.Boolean)"/>.
        /// </summary>
        ///
        protected internal override void HandleTransliterate(Replaceable text, Transliterator.Position pos,
                                                             bool incremental)
        {
            int start = pos.start;
            int limit = pos.limit;

            StringBuilder buf        = new StringBuilder(prefix);
            int           prefixLen  = prefix.Length;
            bool          redoPrefix = false;

            while (start < limit)
            {
                int c       = (grokSupplementals) ? (int)(text.Char32At(start)) : (int)(text.CharAt(start));
                int charLen = (grokSupplementals) ? IBM.ICU.Text.UTF16.GetCharCount(c) : 1;

                if ((c & -65536) != 0 && supplementalHandler != null)
                {
                    buf.Length = 0;
                    buf.Append(supplementalHandler.prefix);
                    IBM.ICU.Impl.Utility.AppendNumber(buf, c, supplementalHandler.radix,
                                                      supplementalHandler.minDigits);
                    buf.Append(supplementalHandler.suffix);
                    redoPrefix = true;
                }
                else
                {
                    if (redoPrefix)
                    {
                        buf.Length = 0;
                        buf.Append(prefix);
                        redoPrefix = false;
                    }
                    else
                    {
                        buf.Length = prefixLen;
                    }
                    IBM.ICU.Impl.Utility.AppendNumber(buf, c, radix, minDigits);
                    buf.Append(suffix);
                }

                text.Replace(start, start + charLen, buf.ToString());
                start += buf.Length;
                limit += buf.Length - charLen;
            }

            pos.contextLimit += limit - pos.limit;
            pos.limit         = limit;
            pos.start         = start;
        }
        /// <summary>
        /// Converts the range from lastSafe to limit.
        /// </summary>
        ///
        /// <param name="verify">If non-null, check to see that all replacement characters arein it. If not, abort the conversion and returnInteger.MIN_VALUE.</param>
        /// <returns>return the delta in length (new - old), or Integer.MIN_VALUE if
        /// the verify aborted.</returns>
        internal int Convert(Replaceable text, int lastSafe, int limit, UnicodeSet verify)
        {
            // System.out.println("t: " +
            // com.ibm.icu.impl.Utility.hex(text.toString()) + ", s: " + lastSafe +
            // ", l: " + limit);

            int    len   = limit - lastSafe;
            String input = null;

            lock (buffer) {
                if (buffer.Length < len)
                {
                    buffer = new char[len];             // rare, and we don't care if we grow
                                                        // too large
                }
                text.GetChars(lastSafe, limit, buffer, 0);
                input = new String(buffer, 0, len);             // TODO: fix normalizer to take
                                                                // char[]
            }
            String output = IBM.ICU.Text.Normalizer.Normalize(input, mode, options);

            // verify OK, if specified
            if (verify != null)
            {
                bool skip = !skippable.ContainsAll(output);
                if (DEBUG)
                {
                    System.Console.Out.WriteLine(((skip) ? "  SKIP: " : "NOSKIP: ")
                                                 + IBM.ICU.Impl.Utility.Escape(input) + " => "
                                                 + IBM.ICU.Impl.Utility.Escape(output));
                }
                if (skip)
                {
                    return(Int32.MinValue);
                }
            }

            if (output.Equals(input))
            {
                return(0);
            }
            text.Replace(lastSafe, limit, output);
            return(output.Length - len);
        }
Example #4
0
        /// <summary>
        /// UnicodeReplacer API
        /// </summary>
        ///
        public virtual int Replace(Replaceable text, int start, int limit, int[] cursor)
        {
            int outLen = 0;

            // Copy segment with out-of-band data
            int dest = limit;

            // If there was no match, that means that a quantifier
            // matched zero-length. E.g., x (a)* y matched "xy".
            if (matchStart >= 0)
            {
                if (matchStart != matchLimit)
                {
                    text.Copy(matchStart, matchLimit, dest);
                    outLen = matchLimit - matchStart;
                }
            }

            text.Replace(start, limit, "");     // delete original text

            return(outLen);
        }
        /// <summary>
        /// Implements <see cref="M:IBM.ICU.Text.Transliterator.HandleTransliterate(IBM.ICU.Text.Replaceable, null, System.Boolean)"/>.
        /// </summary>
        ///
        protected internal override void HandleTransliterate(Replaceable text, Transliterator.Position offsets,
                                                             bool isIncremental)
        {
            int cursor = offsets.start;
            int limit  = offsets.limit;

            StringBuilder str = new StringBuilder();

            str.Append(OPEN_DELIM);
            int    len;
            String name;

            while (cursor < limit)
            {
                int c = text.Char32At(cursor);
                if ((name = IBM.ICU.Lang.UCharacter.GetExtendedName(c)) != null)
                {
                    str.Length = OPEN_DELIM_LEN;
                    str.Append(name).Append(CLOSE_DELIM);

                    int clen = IBM.ICU.Text.UTF16.GetCharCount(c);
                    text.Replace(cursor, cursor + clen, str.ToString());
                    len     = str.Length;
                    cursor += len;        // advance cursor by 1 and adjust for new text
                    limit  += len - clen; // change in length
                }
                else
                {
                    ++cursor;
                }
            }

            offsets.contextLimit += limit - offsets.limit;
            offsets.limit         = limit;
            offsets.start         = cursor;
        }
        /// <summary>
        /// Implements <see cref="M:IBM.ICU.Text.Transliterator.HandleTransliterate(IBM.ICU.Text.Replaceable, null, System.Boolean)"/>.
        /// </summary>
        ///
        protected internal override void HandleTransliterate(Replaceable text, Transliterator.Position offsets,
                                                             bool isIncremental)
        {
            int maxLen = IBM.ICU.Impl.UCharacterName.GetInstance().GetMaxCharNameLength() + 1;     // allow
            // for
            // temporary
            // trailing
            // space

            StringBuilder name = new StringBuilder(maxLen);

            // Get the legal character set
            UnicodeSet legal = new UnicodeSet();

            IBM.ICU.Impl.UCharacterName.GetInstance().GetCharNameCharacters(legal);

            int cursor = offsets.start;
            int limit  = offsets.limit;

            // Modes:
            // 0 - looking for open delimiter
            // 1 - after open delimiter
            int mode    = 0;
            int openPos = -1;     // open delim candidate pos

            int c;

            while (cursor < limit)
            {
                c = text.Char32At(cursor);

                switch (mode)
                {
                case 0:                  // looking for open delimiter
                    if (c == OPEN_DELIM) // quick check first
                    {
                        openPos = cursor;
                        int i = IBM.ICU.Impl.Utility.ParsePattern(OPEN_PAT, text, cursor, limit);
                        if (i >= 0 && i < limit)
                        {
                            mode        = 1;
                            name.Length = 0;
                            cursor      = i;
                            continue;     // *** reprocess char32At(cursor)
                        }
                    }
                    break;

                case 1:     // after open delimiter
                    // Look for legal chars. If \s+ is found, convert it
                    // to a single space. If closeDelimiter is found, exit
                    // the loop. If any other character is found, exit the
                    // loop. If the limit is reached, exit the loop.

                    // Convert \s+ => SPACE. This assumes there are no
                    // runs of >1 space characters in names.
                    if (IBM.ICU.Impl.UCharacterProperty.IsRuleWhiteSpace(c))
                    {
                        // Ignore leading whitespace
                        if (name.Length > 0 &&
                            name[name.Length - 1] != SPACE)
                        {
                            name.Append(SPACE);
                            // If we are too long then abort. maxLen includes
                            // temporary trailing space, so use '>'.
                            if (name.Length > maxLen)
                            {
                                mode = 0;
                            }
                        }
                        break;
                    }

                    if (c == CLOSE_DELIM)
                    {
                        int len = name.Length;

                        // Delete trailing space, if any
                        if (len > 0 && name[len - 1] == SPACE)
                        {
                            name.Length = --len;
                        }

                        c = IBM.ICU.Lang.UCharacter.GetCharFromExtendedName(name.ToString());
                        if (c != -1)
                        {
                            // Lookup succeeded

                            // assert(UTF16.getCharCount(CLOSE_DELIM) == 1);
                            cursor++;     // advance over CLOSE_DELIM

                            String str = IBM.ICU.Text.UTF16.ValueOf(c);
                            text.Replace(openPos, cursor, str);

                            // Adjust indices for the change in the length of
                            // the string. Do not assume that str.length() ==
                            // 1, in case of surrogates.
                            int delta = cursor - openPos - str.Length;
                            cursor -= delta;
                            limit  -= delta;
                            // assert(cursor == openPos + str.length());
                        }
                        // If the lookup failed, we leave things as-is and
                        // still switch to mode 0 and continue.
                        mode    = 0;
                        openPos = -1; // close off candidate
                        continue;     // *** reprocess char32At(cursor)
                    }

                    if (legal.Contains(c))
                    {
                        IBM.ICU.Text.UTF16.Append(name, c);
                        // If we go past the longest possible name then abort.
                        // maxLen includes temporary trailing space, so use '>='.
                        if (name.Length >= maxLen)
                        {
                            mode = 0;
                        }
                    }

                    // Invalid character
                    else
                    {
                        --cursor;     // Backup and reprocess this character
                        mode = 0;
                    }

                    break;
                }

                cursor += IBM.ICU.Text.UTF16.GetCharCount(c);
            }

            offsets.contextLimit += limit - offsets.limit;
            offsets.limit         = limit;
            // In incremental mode, only advance the cursor up to the last
            // open delimiter candidate.
            offsets.start = (isIncremental && openPos >= 0) ? openPos : cursor;
        }
Example #7
0
        protected internal override void HandleTransliterate(Replaceable text, Transliterator.Position pos,
                                                             bool incremental)
        {
            boundaryCount = 0;
            int boundary = 0;

            GetBreakIterator();     // Lazy-create it if necessary
            bi.SetText(new BreakTransliterator.ReplaceableCharacterIterator(text, pos.start, pos.limit,
                                                                            pos.start));
            // TODO: fix clumsy workaround used below.

            /*
             * char[] tempBuffer = new char[text.length()]; text.getChars(0,
             * text.length(), tempBuffer, 0); bi.setText(new
             * StringCharacterIterator(new String(tempBuffer), pos.start, pos.limit,
             * pos.start));
             */
            // end debugging

            // To make things much easier, we will stack the boundaries, and then
            // insert at the end.
            // generally, we won't need too many, since we will be filtered.

            for (boundary = bi.First(); boundary != IBM.ICU.Text.BreakIterator.DONE &&
                 boundary < pos.limit; boundary = bi.Next())
            {
                if (boundary == 0)
                {
                    continue;
                }
                // HACK: Check to see that preceeding item was a letter

                int cp   = IBM.ICU.Text.UTF16.CharAt(text, boundary - 1);
                int type = IBM.ICU.Lang.UCharacter.GetType(cp);
                // System.out.println(Integer.toString(cp,16) + " (before): " +
                // type);
                if (((1 << type) & LETTER_OR_MARK_MASK) == 0)
                {
                    continue;
                }

                cp   = IBM.ICU.Text.UTF16.CharAt(text, boundary);
                type = IBM.ICU.Lang.UCharacter.GetType(cp);
                // System.out.println(Integer.toString(cp,16) + " (after): " +
                // type);
                if (((1 << type) & LETTER_OR_MARK_MASK) == 0)
                {
                    continue;
                }

                if (boundaryCount >= boundaries.Length)       // realloc if necessary
                {
                    int[] temp = new int[boundaries.Length * 2];
                    System.Array.Copy((Array)(boundaries), 0, (Array)(temp), 0, boundaries.Length);
                    boundaries = temp;
                }

                boundaries[boundaryCount++] = boundary;
                // System.out.println(boundary);
            }

            int delta        = 0;
            int lastBoundary = 0;

            if (boundaryCount != 0)       // if we found something, adjust
            {
                delta        = boundaryCount * insertion.Length;
                lastBoundary = boundaries[boundaryCount - 1];

                // we do this from the end backwards, so that we don't have to keep
                // updating.

                while (boundaryCount > 0)
                {
                    boundary = boundaries[--boundaryCount];
                    text.Replace(boundary, boundary, insertion);
                }
            }

            // Now fix up the return values
            pos.contextLimit += delta;
            pos.limit        += delta;
            pos.start         = (incremental) ? lastBoundary + delta : pos.limit;
        }
Example #8
0
        // = public static UnicodeReplacer valueOf(String output,
        // = int cursorPos,
        // = RuleBasedTransliterator.Data data) {
        // = if (output.length() == 1) {
        // = char c = output.charAt(0);
        // = UnicodeReplacer r = data.lookupReplacer(c);
        // = if (r != null) {
        // = return r;
        // = }
        // = }
        // = return new StringReplacer(output, cursorPos, data);
        // = }

        /// <summary>
        /// UnicodeReplacer API
        /// </summary>
        ///
        public virtual int Replace(Replaceable text, int start, int limit, int[] cursor)
        {
            int outLen;
            int newStart = 0;

            // NOTE: It should be possible to _always_ run the complex
            // processing code; just slower. If not, then there is a bug
            // in the complex processing code.

            // Simple (no nested replacers) Processing Code :
            if (!isComplex)
            {
                text.Replace(start, limit, output);
                outLen = output.Length;

                // Setup default cursor position (for cursorPos within output)
                newStart = cursorPos;
            }

            // Complex (nested replacers) Processing Code :
            else
            {
                /*
                 * When there are segments to be copied, use the Replaceable.copy()
                 * API in order to retain out-of-band data. Copy everything to the
                 * end of the string, then copy them back over the key. This
                 * preserves the integrity of indices into the key and surrounding
                 * context while generating the output text.
                 */
                StringBuilder buf = new StringBuilder();
                int           oOutput; // offset into 'output'
                isComplex = false;

                // The temporary buffer starts at tempStart, and :
                // to destLimit + tempExtra. The start of the buffer has a single
                // character from before the key. This provides style
                // data when addition characters are filled into the
                // temporary buffer. If there is nothing to the left, use
                // the non-character U+FFFF, which Replaceable subclasses
                // should treat specially as a "no-style character."
                // destStart points to the point after the style context
                // character, so it is tempStart+1 or tempStart+2.
                int tempStart = text.Length(); // start of temp buffer
                int destStart = tempStart;     // copy new text to here
                if (start > 0)
                {
                    int len = IBM.ICU.Text.UTF16.GetCharCount(text.Char32At(start - 1));
                    text.Copy(start - len, start, tempStart);
                    destStart += len;
                }
                else
                {
                    text.Replace(tempStart, tempStart, "\uFFFF");
                    destStart++;
                }
                int destLimit = destStart;
                int tempExtra = 0;     // temp chars after destLimit

                for (oOutput = 0; oOutput < output.Length;)
                {
                    if (oOutput == cursorPos)
                    {
                        // Record the position of the cursor
                        newStart = buf.Length + destLimit - destStart;     // relative
                                                                           // to start
                        // the buf.length() was inserted for bug 5789
                        // the problem is that if we are accumulating into a buffer
                        // (when r == null below)
                        // then the actual length of the text at that point needs to
                        // add the buf length.
                        // there was an alternative suggested in #5789, but that
                        // looks like it won't work
                        // if we have accumulated some stuff in the dest part AND
                        // have a non-zero buffer.
                    }
                    int c = IBM.ICU.Text.UTF16.CharAt(output, oOutput);

                    // When we are at the last position copy the right style
                    // context character into the temporary buffer. We don't
                    // do this before because it will provide an incorrect
                    // right context for previous replace() operations.
                    int nextIndex = oOutput + IBM.ICU.Text.UTF16.GetCharCount(c);
                    if (nextIndex == output.Length)
                    {
                        tempExtra = IBM.ICU.Text.UTF16.GetCharCount(text.Char32At(limit));
                        text.Copy(limit, limit + tempExtra, destLimit);
                    }

                    UnicodeReplacer r = data.LookupReplacer(c);
                    if (r == null)
                    {
                        // Accumulate straight (non-segment) text.
                        IBM.ICU.Text.UTF16.Append(buf, c);
                    }
                    else
                    {
                        isComplex = true;

                        // Insert any accumulated straight text.
                        if (buf.Length > 0)
                        {
                            text.Replace(destLimit, destLimit, buf.ToString());
                            destLimit += buf.Length;
                            buf.Length = 0;
                        }

                        // Delegate output generation to replacer object
                        int len_0 = r.Replace(text, destLimit, destLimit, cursor);
                        destLimit += len_0;
                    }
                    oOutput = nextIndex;
                }
                // Insert any accumulated straight text.
                if (buf.Length > 0)
                {
                    text.Replace(destLimit, destLimit, buf.ToString());
                    destLimit += buf.Length;
                }
                if (oOutput == cursorPos)
                {
                    // Record the position of the cursor
                    newStart = destLimit - destStart;     // relative to start
                }

                outLen = destLimit - destStart;

                // Copy new text to start, and delete it
                text.Copy(destStart, destLimit, start);
                text.Replace(tempStart + outLen, destLimit + tempExtra + outLen, "");

                // Delete the old text (the key)
                text.Replace(start + outLen, limit + outLen, "");
            }

            if (hasCursor)
            {
                // Adjust the cursor for positions outside the key. These
                // refer to code points rather than code units. If cursorPos
                // is within the output string, then use newStart, which has
                // already been set above.
                if (cursorPos < 0)
                {
                    newStart = start;
                    int n = cursorPos;
                    // Outside the output string, cursorPos counts code points
                    while (n < 0 && newStart > 0)
                    {
                        newStart -= IBM.ICU.Text.UTF16.GetCharCount(text.Char32At(newStart - 1));
                        ++n;
                    }
                    newStart += n;
                }
                else if (cursorPos > output.Length)
                {
                    newStart = start + outLen;
                    int n_1 = cursorPos - output.Length;
                    // Outside the output string, cursorPos counts code points
                    while (n_1 > 0 && newStart < text.Length())
                    {
                        newStart += IBM.ICU.Text.UTF16.GetCharCount(text.Char32At(newStart));
                        --n_1;
                    }
                    newStart += n_1;
                }
                else
                {
                    // Cursor is within output string. It has been set up above
                    // to be relative to start.
                    newStart += start;
                }

                cursor[0] = newStart;
            }

            return(outLen);
        }
Example #9
0
        /// <summary>
        /// Implements <see cref="M:IBM.ICU.Text.Transliterator.HandleTransliterate(IBM.ICU.Text.Replaceable, null, System.Boolean)"/>.
        /// </summary>
        ///
        protected internal override void HandleTransliterate(Replaceable text, Transliterator.Position pos,
                                                             bool isIncremental)
        {
            int start = pos.start;
            int limit = pos.limit;
            int i, j, ipat;

            loop : {
                while (start < limit)
                {
                    // Loop over the forms in spec[]. Exit this loop when we
                    // match one of the specs. Exit the outer loop if a
                    // partial match is detected and isIncremental is true.
                    for (j = 0, ipat = 0; spec[ipat] != END; ++j)
                    {
                        // Read the header
                        int prefixLen = spec[ipat++];
                        int suffixLen = spec[ipat++];
                        int radix     = spec[ipat++];
                        int minDigits = spec[ipat++];
                        int maxDigits = spec[ipat++];

                        // s is a copy of start that is advanced over the
                        // characters as we parse them.
                        int  s     = start;
                        bool match = true;

                        for (i = 0; i < prefixLen; ++i)
                        {
                            if (s >= limit)
                            {
                                if (i > 0)
                                {
                                    // We've already matched a character. This is
                                    // a partial match, so we return if in
                                    // incremental mode. In non-incremental mode,
                                    // go to the next spec.
                                    if (isIncremental)
                                    {
                                        goto gotoloop;
                                    }
                                    match = false;
                                    break;
                                }
                            }
                            char c = text.CharAt(s++);
                            if (c != spec[ipat + i])
                            {
                                match = false;
                                break;
                            }
                        }

                        if (match)
                        {
                            int u          = 0;
                            int digitCount = 0;
                            for (;;)
                            {
                                if (s >= limit)
                                {
                                    // Check for partial match in incremental mode.
                                    if (s > start && isIncremental)
                                    {
                                        goto gotoloop;
                                    }
                                    break;
                                }
                                int ch    = text.Char32At(s);
                                int digit = IBM.ICU.Lang.UCharacter.Digit(ch, radix);
                                if (digit < 0)
                                {
                                    break;
                                }
                                s += IBM.ICU.Text.UTF16.GetCharCount(ch);
                                u  = (u * radix) + digit;
                                if (++digitCount == maxDigits)
                                {
                                    break;
                                }
                            }

                            match = (digitCount >= minDigits);

                            if (match)
                            {
                                for (i = 0; i < suffixLen; ++i)
                                {
                                    if (s >= limit)
                                    {
                                        // Check for partial match in incremental mode.
                                        if (s > start && isIncremental)
                                        {
                                            goto gotoloop;
                                        }
                                        match = false;
                                        break;
                                    }
                                    char c_0 = text.CharAt(s++);
                                    if (c_0 != spec[ipat + prefixLen + i])
                                    {
                                        match = false;
                                        break;
                                    }
                                }

                                if (match)
                                {
                                    // At this point, we have a match
                                    String str = IBM.ICU.Text.UTF16.ValueOf(u);
                                    text.Replace(start, s, str);
                                    limit -= s - start - str.Length;
                                    // The following break statement leaves the
                                    // loop that is traversing the forms in
                                    // spec[]. We then parse the next input
                                    // character.
                                    break;
                                }
                            }
                        }

                        ipat += prefixLen + suffixLen;
                    }

                    if (start < limit)
                    {
                        start += IBM.ICU.Text.UTF16.GetCharCount(text.Char32At(start));
                    }
                }
            }
gotoloop:
            ;

            pos.contextLimit += limit - pos.limit;
            pos.limit         = limit;
            pos.start         = start;
        }