Пример #1
0
 /// <summary>
 /// For debugging purposes; format the given text in the form
 /// aaa{bbb|ccc|ddd}eee, where the {} indicate the context start and limit,
 /// and the || indicate the start and limit.
 /// </summary>
 ///
 public static StringBuilder FormatInput(StringBuilder appendTo,
                                         ReplaceableString input, Transliterator.Position pos)
 {
     if (0 <= pos.contextStart && pos.contextStart <= pos.start &&
         pos.start <= pos.limit && pos.limit <= pos.contextLimit &&
         pos.contextLimit <= input.Length())
     {
         String b, c, d;
         // a = input.substring(0, pos.contextStart);
         b = input.Substring(pos.contextStart, pos.start);
         c = input.Substring(pos.start, pos.limit);
         d = input.Substring(pos.limit, pos.contextLimit);
         // e = input.substring(pos.contextLimit, input.length());
         appendTo.// append(a).
         Append('{').Append(b).Append('|').Append(c).Append('|').Append(d)
         .Append('}')
         // .append(e)
         ;
     }
     else
     {
         appendTo.Append("INVALID Position {cs=" + pos.contextStart + ", s="
                         + pos.start + ", l=" + pos.limit + ", cl="
                         + pos.contextLimit + "} on " + input);
     }
     return(appendTo);
 }
Пример #2
0
 /// <summary>
 /// For debugging purposes; format the given text in the form
 /// aaa{bbb|ccc|ddd}eee, where the {} indicate the context start
 /// and limit, and the || indicate the start and limit.
 /// </summary>
 /// <param name="appendTo"></param>
 /// <param name="input"></param>
 /// <param name="pos"></param>
 /// <returns></returns>
 public static StringBuffer FormatInput(StringBuffer appendTo,
                                        ReplaceableString input,
                                        Transliterator.Position pos)
 {
     if (0 <= pos.ContextStart &&
         pos.ContextStart <= pos.Start &&
         pos.Start <= pos.Limit &&
         pos.Limit <= pos.ContextLimit &&
         pos.ContextLimit <= input.Length)
     {
         string b, c, d;
         //a = input.substring(0, pos.contextStart);
         b = input.Substring(pos.ContextStart, pos.Start - pos.ContextStart); // ICU4N: Corrected 2nd parameter
         c = input.Substring(pos.Start, pos.Limit - pos.Start);               // ICU4N: Corrected 2nd parameter
         d = input.Substring(pos.Limit, pos.ContextLimit - pos.Limit);        // ICU4N: Corrected 2nd parameter
         //e = input.substring(pos.contextLimit, input.length());
         appendTo.                                                            //Append(a).
         Append('{').Append(b).
         Append('|').Append(c).Append('|').Append(d).
         Append('}')
         //.Append(e)
         ;
     }
     else
     {
         appendTo.Append("INVALID Position {cs=" +
                         pos.ContextStart + ", s=" + pos.Start + ", l=" +
                         pos.Limit + ", cl=" + pos.ContextLimit + "} on " +
                         input);
     }
     return(appendTo);
 }
Пример #3
0
        public void CheckIncrementalAux(Transliterator t, String input)
        {
            IReplaceable test = new ReplaceableString(input);

            Transliterator.Position pos = new Transliterator.Position(0, test.Length, 0, test.Length);
            t.Transliterate(test, pos);
            bool gotError = false;

            // we have a few special cases. Any-Remove (pos.start = 0, but also = limit) and U+XXXXX?X?
            if (pos.Start == 0 && pos.Limit != 0 && !t.ID.Equals("Hex-Any/Unicode"))
            {
                Errln("No Progress, " + t.ID + ": " + UtilityExtensions.FormatInput(test, pos));
                gotError = true;
            }
            else
            {
                Logln("PASS Progress, " + t.ID + ": " + UtilityExtensions.FormatInput(test, pos));
            }
            t.FinishTransliteration(test, pos);
            if (pos.Start != pos.Limit)
            {
                Errln("Incomplete, " + t.ID + ":  " + UtilityExtensions.FormatInput(test, pos));
                gotError = true;
            }
            if (!gotError)
            {
                //Errln("FAIL: Did not get expected error");
            }
        }
        /// <summary>
        /// Implements <see cref="M:IBM.ICU.Text.Transliterator.HandleTransliterate(IBM.ICU.Text.Replaceable, null, System.Boolean)"/>.
        /// </summary>
        ///
        protected internal override void HandleTransliterate(Replaceable text, Transliterator.Position offsets,
                                                             bool isIncremental)
        {
            int start = offsets.start;
            int limit = offsets.limit;

            if (start >= limit)
            {
                return;
            }

            int overallDelta = 0;

            // Walk through the string looking for safe characters.
            // Whenever you hit one normalize from the start of the last
            // safe character up to just before the next safe character
            // Also, if you hit the end and we are not in incremental mode,
            // do to end.

            // TODO: fix for surrogates
            // TODO: add QuickCheck, so we rarely convert OK stuff

            int lastSafe = start;     // go back to start in any event
            int cp;

            for (int i = start + 1; i < limit; i += IBM.ICU.Text.UTF16.GetCharCount(cp))
            {
                cp = text.Char32At(i);
                if (IBM.ICU.Lang.UCharacter.GetCombiningClass(cp) == 0 &&
                    !unsafeStart.Contains(cp))
                {
                    int delta = Convert(text, lastSafe, i, null);
                    i            += delta;
                    limit        += delta;
                    overallDelta += delta;
                    lastSafe      = i;
                }
            }
            if (!isIncremental)
            {
                int delta_0 = Convert(text, lastSafe, limit, null);
                overallDelta += delta_0;
                lastSafe      = limit + delta_0;
            }
            else
            {
                // We are incremental, so accept the last characters IF they turn
                // into skippables
                int delta_1 = Convert(text, lastSafe, limit, skippable);
                if (delta_1 != Int32.MinValue)
                {
                    overallDelta += delta_1;
                    lastSafe      = limit + delta_1;
                }
            }
            offsets.contextLimit += overallDelta;
            offsets.limit        += overallDelta;
            offsets.start         = lastSafe;
        }
Пример #5
0
        /// <summary>
        /// For debugging purposes; format the given text in the form
        /// aaa{bbb|ccc|ddd}eee, where the {} indicate the context start
        /// and limit, and the || indicate the start and limit.
        /// </summary>
        public static string FormatInput(ReplaceableString input,
                                         Transliterator.Position pos)
        {
            StringBuffer appendTo = new StringBuffer();

            FormatInput(appendTo, input, pos);
            return(Utility.Escape(appendTo.ToString()));
        }
Пример #6
0
        /// <summary>
        /// Implements <see cref="M:IBM.ICU.Text.Transliterator.HandleTransliterate(IBM.ICU.Text.Replaceable, null, System.Boolean)"/>.
        /// </summary>
        ///
        protected internal override void HandleTransliterate(Replaceable text, Transliterator.Position index,
                                                             bool incremental)
        {
            // Our caller (filteredTransliterate) has already narrowed us
            // to an unfiltered run. Delete it.
            text.Replace(index.start, index.limit, "");
            int len = index.limit - index.start;

            index.contextLimit -= len;
            index.limit        -= len;
        }
Пример #7
0
        /// <summary>
        /// Implements <see cref="M:IBM.ICU.Text.Transliterator.HandleTransliterate(IBM.ICU.Text.Replaceable, null, System.Boolean)"/>.
        /// </summary>
        ///
        protected internal override void HandleTransliterate(Replaceable text, Transliterator.Position pos,
                                                             bool isIncremental)
        {
            int allStart = pos.start;
            int allLimit = pos.limit;

            AnyTransliterator.ScriptRunIterator it = new AnyTransliterator.ScriptRunIterator(text, pos.contextStart,
                                                                                             pos.contextLimit);

            while (it.Next())
            {
                // Ignore runs in the ante context
                if (it.limit <= allStart)
                {
                    continue;
                }

                // Try to instantiate transliterator from it.scriptCode to
                // our target or target/variant
                Transliterator t = GetTransliterator(it.scriptCode);

                if (t == null)
                {
                    // We have no transliterator. Do nothing, but keep
                    // pos.start up to date.
                    pos.start = it.limit;
                    continue;
                }

                // If the run end is before the transliteration limit, do
                // a non-incremental transliteration. Otherwise do an
                // incremental one.
                bool incremental = isIncremental && (it.limit >= allLimit);

                pos.start = Math.Max(allStart, it.start);
                pos.limit = Math.Min(allLimit, it.limit);
                int limit = pos.limit;
                t.FilteredTransliterate(text, pos, incremental);
                int delta = pos.limit - limit;
                allLimit += delta;
                it.AdjustLimit(delta);

                // We're done if we enter the post context
                if (it.limit >= allLimit)
                {
                    break;
                }
            }

            // Restore limit. pos.start is fine where the last transliterator
            // left it, or at the end of the last run.
            pos.limit = allLimit;
        }
        /// <summary>
        /// Transliterate the given text with the given UTransPosition indices.
        /// Return TRUE if the transliteration should continue or FALSE if it should
        /// halt (because of a U_PARTIAL_MATCH match). Note that FALSE is only ever
        /// returned if isIncremental is TRUE.
        /// </summary>
        ///
        /// <param name="text">the text to be transliterated</param>
        /// <param name="pos">the position indices, which will be updated</param>
        /// <param name="incremental">if TRUE, assume new text may be inserted at index.limit, andreturn FALSE if thre is a partial match.</param>
        /// <returns>TRUE unless a U_PARTIAL_MATCH has been obtained, indicating that
        /// transliteration should stop until more text arrives.</returns>
        public bool Transliterate(Replaceable text, Transliterator.Position pos,
                                  bool incremental)
        {
            int indexByte = text.Char32At(pos.start) & 0xFF;

            for (int i = index[indexByte]; i < index[indexByte + 1]; ++i)
            {
                int m = rules[i].MatchAndReplace(text, pos, incremental);
                switch (m)
                {
                case IBM.ICU.Text.UnicodeMatcher_Constants.U_MATCH:
                    if (IBM.ICU.Text.Transliterator.DEBUG)
                    {
                        System.Console.Out.WriteLine(((incremental) ? "Rule.i: match "
                                    : "Rule: match ")
                                                     + rules[i].ToRule(true)
                                                     + " => "
                                                     + IBM.ICU.Impl.UtilityExtensions.FormatInput(text, pos));
                    }
                    return(true);

                case IBM.ICU.Text.UnicodeMatcher_Constants.U_PARTIAL_MATCH:
                    if (IBM.ICU.Text.Transliterator.DEBUG)
                    {
                        System.Console.Out
                        .WriteLine(((incremental) ? "Rule.i: partial match "
                                            : "Rule: partial match ")
                                   + rules[i].ToRule(true)
                                   + " => "
                                   + IBM.ICU.Impl.UtilityExtensions.FormatInput(text, pos));
                    }
                    return(false);

                default:
                    if (IBM.ICU.Text.Transliterator.DEBUG)
                    {
                        System.Console.Out.WriteLine("Rule: no match " + rules[i]);
                    }
                    break;
                }
            }
            // No match or partial match from any rule
            pos.start += IBM.ICU.Text.UTF16.GetCharCount(text.Char32At(pos.start));
            if (IBM.ICU.Text.Transliterator.DEBUG)
            {
                System.Console.Out.WriteLine(((incremental) ? "Rule.i: no match => "
                            : "Rule: no match => ")
                                             + IBM.ICU.Impl.UtilityExtensions.FormatInput(text, pos));
            }
            return(true);
        }
        /// <summary>
        /// Implements <see cref="M:IBM.ICU.Text.Transliterator.HandleTransliterate(IBM.ICU.Text.Replaceable, null, System.Boolean)"/>.
        /// </summary>
        ///
        protected internal override void HandleTransliterate(Replaceable text, Transliterator.Position pos,
                                                             bool incremental)
        {
            int start = pos.start;
            int limit = pos.limit;

            StringBuilder buf        = new StringBuilder(prefix);
            int           prefixLen  = prefix.Length;
            bool          redoPrefix = false;

            while (start < limit)
            {
                int c       = (grokSupplementals) ? (int)(text.Char32At(start)) : (int)(text.CharAt(start));
                int charLen = (grokSupplementals) ? IBM.ICU.Text.UTF16.GetCharCount(c) : 1;

                if ((c & -65536) != 0 && supplementalHandler != null)
                {
                    buf.Length = 0;
                    buf.Append(supplementalHandler.prefix);
                    IBM.ICU.Impl.Utility.AppendNumber(buf, c, supplementalHandler.radix,
                                                      supplementalHandler.minDigits);
                    buf.Append(supplementalHandler.suffix);
                    redoPrefix = true;
                }
                else
                {
                    if (redoPrefix)
                    {
                        buf.Length = 0;
                        buf.Append(prefix);
                        redoPrefix = false;
                    }
                    else
                    {
                        buf.Length = prefixLen;
                    }
                    IBM.ICU.Impl.Utility.AppendNumber(buf, c, radix, minDigits);
                    buf.Append(suffix);
                }

                text.Replace(start, start + charLen, buf.ToString());
                start += buf.Length;
                limit += buf.Length - charLen;
            }

            pos.contextLimit += limit - pos.limit;
            pos.limit         = limit;
            pos.start         = start;
        }
Пример #10
0
        /// <summary>
        /// Transliterate the given text with the given UTransPosition
        /// indices.  Return TRUE if the transliteration should continue
        /// or FALSE if it should halt (because of a U_PARTIAL_MATCH match).
        /// Note that FALSE is only ever returned if isIncremental is TRUE.
        /// </summary>
        /// <param name="text">The text to be transliterated.</param>
        /// <param name="pos">The position indices, which will be updated.</param>
        /// <param name="incremental">If TRUE, assume new text may be inserted
        /// at index.Limit, and return FALSE if thre is a partial match.</param>
        /// <returns>TRUE unless a U_PARTIAL_MATCH has been obtained,
        /// indicating that transliteration should stop until more text
        /// arrives.</returns>
        public virtual bool Transliterate(IReplaceable text,
                                          Transliterator.Position pos,
                                          bool incremental)
        {
            int indexByte = text.Char32At(pos.Start) & 0xFF;

            for (int i = index[indexByte]; i < index[indexByte + 1]; ++i)
            {
                MatchDegree m = rules[i].MatchAndReplace(text, pos, incremental);
                switch (m)
                {
                case MatchDegree.Match:
                    if (Transliterator.DEBUG)
                    {
                        Console.Out.WriteLine((incremental ? "Rule.i: match " : "Rule: match ") +
                                              rules[i].ToRule(true) + " => " +
                                              UtilityExtensions.FormatInput(text, pos));
                    }
                    return(true);

                case MatchDegree.PartialMatch:
                    if (Transliterator.DEBUG)
                    {
                        Console.Out.WriteLine((incremental ? "Rule.i: partial match " : "Rule: partial match ") +
                                              rules[i].ToRule(true) + " => " +
                                              UtilityExtensions.FormatInput(text, pos));
                    }
                    return(false);

                default:
                    if (Transliterator.DEBUG)
                    {
                        Console.Out.WriteLine("Rule: no match " + rules[i]);
                    }
                    break;
                }
            }
            // No match or partial match from any rule
            pos.Start += UTF16.GetCharCount(text.Char32At(pos.Start));
            if (Transliterator.DEBUG)
            {
                Console.Out.WriteLine((incremental ? "Rule.i: no match => " : "Rule: no match => ") +
                                      UtilityExtensions.FormatInput(text, pos));
            }
            return(true);
        }
Пример #11
0
        private void expect(Transliterator t, String source, String expectedResult)
        {
            String result = t.Transliterate(source);

            expectAux(t.ID + ":String", source, result, expectedResult);

            ReplaceableString rsource = new ReplaceableString(source);

            t.Transliterate(rsource);
            result = rsource.ToString();
            expectAux(t.ID + ":Replaceable", source, result, expectedResult);

            // Test keyboard (incremental) transliteration -- this result
            // must be the same after we finalize (see below).
            rsource.Replace(0, rsource.Length, "");
            Transliterator.Position index = new Transliterator.Position();
            StringBuffer            log   = new StringBuffer();

            for (int i = 0; i < source.Length; ++i)
            {
                if (i != 0)
                {
                    log.Append(" + ");
                }
                log.Append(source[i]).Append(" -> ");
                t.Transliterate(rsource, index,
                                source[i] + "");
                // Append the string buffer with a vertical bar '|' where
                // the committed index is.
                String s = rsource.ToString();
                log.Append(s.Substring(0, index.Start)). // ICU4N: Checked 2nd parameter
                Append('|').
                Append(s.Substring(index.Start));
            }

            // As a final step in keyboard transliteration, we must call
            // transliterate to finish off any pending partial matches that
            // were waiting for more input.
            t.FinishTransliteration(rsource, index);
            result = rsource.ToString();
            log.Append(" => ").Append(rsource.ToString());
            expectAux(t.ID + ":Keyboard", log.ToString(),
                      result.Equals(expectedResult),
                      expectedResult);
        }
Пример #12
0
        protected internal override void HandleTransliterate(Replaceable text,
                                                             Transliterator.Position index, bool incremental)
        {
            /*
             * We keep start and limit fixed the entire time, relative to the text
             * -- limit may move numerically if text is inserted or removed. The
             * cursor moves from start to limit, with replacements happening under
             * it.
             *
             * Example: rules 1. ab>x|y 2. yc>z
             *
             * |eabcd start - no match, advance cursor e|abcd match rule 1 - change
             * text & adjust cursor ex|ycd match rule 2 - change text & adjust
             * cursor exz|d no match, advance cursor exzd| done
             */

            /*
             * A rule like a>b|a creates an infinite loop. To prevent that, we put
             * an arbitrary limit on the number of iterations that we take, one that
             * is high enough that any reasonable rules are ok, but low enough to
             * prevent a server from hanging. The limit is 16 times the number of
             * characters n, unless n is so large that 16n exceeds a uint32_t.
             */
            int loopCount = 0;
            int loopLimit = (index.limit - index.start) << 4;

            if (loopLimit < 0)
            {
                loopLimit = 0x7FFFFFFF;
            }

            while (index.start < index.limit && loopCount <= loopLimit &&
                   data.ruleSet.Transliterate(text, index, incremental))
            {
                ++loopCount;
            }
        }
        /// <summary>
        /// Implements <see cref="M:IBM.ICU.Text.Transliterator.HandleTransliterate(IBM.ICU.Text.Replaceable, null, System.Boolean)"/>.
        /// </summary>
        ///
        protected internal override void HandleTransliterate(Replaceable text, Transliterator.Position offsets,
                                                             bool isIncremental)
        {
            int cursor = offsets.start;
            int limit  = offsets.limit;

            StringBuilder str = new StringBuilder();

            str.Append(OPEN_DELIM);
            int    len;
            String name;

            while (cursor < limit)
            {
                int c = text.Char32At(cursor);
                if ((name = IBM.ICU.Lang.UCharacter.GetExtendedName(c)) != null)
                {
                    str.Length = OPEN_DELIM_LEN;
                    str.Append(name).Append(CLOSE_DELIM);

                    int clen = IBM.ICU.Text.UTF16.GetCharCount(c);
                    text.Replace(cursor, cursor + clen, str.ToString());
                    len     = str.Length;
                    cursor += len;        // advance cursor by 1 and adjust for new text
                    limit  += len - clen; // change in length
                }
                else
                {
                    ++cursor;
                }
            }

            offsets.contextLimit += limit - offsets.limit;
            offsets.limit         = limit;
            offsets.start         = cursor;
        }
Пример #14
0
        public void TestTransliteratorErrors()
        {
            String            trans      = "Latin-Greek";
            String            bogusID    = "LATINGREEK-GREEKLATIN";
            String            newID      = "Bogus-Latin";
            String            newIDRules = "zzz > Z; f <> ph";
            String            bogusRules = "a } [b-g m-p ";
            ReplaceableString testString =
                new ReplaceableString("A quick fox jumped over the lazy dog.");
            String insertString = "cats and dogs";
            int    stoppedAt = 0, len;

            Transliterator.Position pos = new Transliterator.Position();

            Transliterator t =
                Transliterator.GetInstance(trans, Transliterator.FORWARD);

            if (t == null)
            {
                Errln("FAIL: construction of Latin-Greek");
                return;
            }
            len       = testString.Length;
            stoppedAt = t.Transliterate(testString, 0, 100);
            if (stoppedAt != -1)
            {
                Errln("FAIL: Out of bounds check failed (1).");
            }
            else if (testString.Length != len)
            {
                testString =
                    new ReplaceableString("A quick fox jumped over the lazy dog.");
                Errln("FAIL: Transliterate fails and the target string was modified.");
            }
            stoppedAt = t.Transliterate(testString, 100, testString.Length - 1);
            if (stoppedAt != -1)
            {
                Errln("FAIL: Out of bounds check failed (2).");
            }
            else if (testString.Length != len)
            {
                testString =
                    new ReplaceableString("A quick fox jumped over the lazy dog.");
                Errln("FAIL: Transliterate fails and the target string was modified.");
            }
            pos.Start = 100;
            pos.Limit = testString.Length;
            try
            {
                t.Transliterate(testString, pos);
                Errln("FAIL: Start offset is out of bounds, error not reported.");
            }
            catch (ArgumentException e)
            {
                Logln("Start offset is out of bounds and detected.");
            }
            pos.Limit = 100;
            pos.Start = 0;

            try
            {
                t.Transliterate(testString, pos);
                Errln("FAIL: Limit offset is out of bounds, error not reported.\n");
            }
            catch (ArgumentException e)
            {
                Logln("Start offset is out of bounds and detected.");
            }
            len = pos.ContextLimit = testString.Length;
            pos.ContextStart = 0;
            pos.Limit        = len - 1;
            pos.Start        = 5;
            try
            {
                t.Transliterate(testString, pos, insertString);
                if (len == pos.Limit)
                {
                    Errln("FAIL: Test insertion with string: the transliteration position limit didn't change as expected.");
                }
            }
            catch (ArgumentException e)
            {
                Errln("Insertion test with string failed for some reason.");
            }
            pos.ContextStart = 0;
            pos.ContextLimit = testString.Length;
            pos.Limit        = testString.Length - 1;
            pos.Start        = 5;
            try
            {
                t.Transliterate(testString, pos, 0x0061);
                if (len == pos.Limit)
                {
                    Errln("FAIL: Test insertion with character: the transliteration position limit didn't change as expected.");
                }
            }
            catch (ArgumentException e)
            {
                Errln("FAIL: Insertion test with UTF-16 code point failed for some reason.");
            }
            len = pos.Limit = testString.Length;
            pos.ContextStart = 0;
            pos.ContextLimit = testString.Length - 1;
            pos.Start        = 5;
            try
            {
                t.Transliterate(testString, pos, insertString);
                Errln("FAIL: Out of bounds check failed (3).");
                if (testString.Length != len)
                {
                    Errln("FAIL: The input string was modified though the offsets were out of bounds.");
                }
            }
            catch (ArgumentException e)
            {
                Logln("Insertion test with out of bounds indexes.");
            }
            Transliterator t1 = null;

            try
            {
                t1 = Transliterator.GetInstance(bogusID, Transliterator.FORWARD);
                if (t1 != null)
                {
                    Errln("FAIL: construction of bogus ID \"LATINGREEK-GREEKLATIN\"");
                }
            }
            catch (ArgumentException e)
            {
            }

            //try { // unneeded - Exception cannot be thrown
            Transliterator t2 =
                Transliterator.CreateFromRules(
                    newID,
                    newIDRules,
                    Transliterator.FORWARD);

            try
            {
                Transliterator t3 = t2.GetInverse();
                Errln("FAIL: The newID transliterator was not registered so createInverse should fail.");
                if (t3 != null)
                {
                    Errln("FAIL: The newID transliterator was not registered so createInverse should fail.");
                }
            }
            catch (Exception e)
            {
            }
            //} catch (Exception e) { }
            try
            {
                Transliterator t4 =
                    Transliterator.CreateFromRules(
                        newID,
                        bogusRules,
                        Transliterator.FORWARD);
                if (t4 != null)
                {
                    Errln("FAIL: The rules is malformed but error was not reported.");
                }
            }
            catch (Exception e)
            {
            }
        }
Пример #15
0
 /// <exclude/>
 /// <summary>
 /// Implements <see cref="M:IBM.ICU.Text.Transliterator.HandleTransliterate(IBM.ICU.Text.Replaceable, null, System.Boolean)"/>.
 /// </summary>
 ///
 protected internal override void HandleTransliterate(Replaceable text, Transliterator.Position offsets,
                                                      bool incremental)
 {
     offsets.start = offsets.limit;
 }
Пример #16
0
        public void TestTransliterate()
        {
            Logln("Testing the handleTransliterate() API of CompoundTransliterator");
            Transliterator ct1 = null;

            try
            {
                ct1 = Transliterator.GetInstance("Any-Hex;Hex-Any");
            }
            catch (ArgumentException iae)
            {
                Errln("FAIL: construction using CompoundTransliterator(String ID) failed for " + "Any-Hex;Hex-Any");
                throw iae;
            }

            String s = "abcabc";

            expect(ct1, s, s);
            Transliterator.Position index    = new Transliterator.Position();
            ReplaceableString       rsource2 = new ReplaceableString(s);
            String expectedResult            = s;

            ct1.Transliterate(rsource2, index);
            ct1.FinishTransliteration(rsource2, index);
            String result = rsource2.ToString();

            expectAux(ct1.ID + ":ReplaceableString, index(0,0,0,0)", s + "->" + rsource2, result.Equals(expectedResult), expectedResult);

            Transliterator.Position index2   = new Transliterator.Position(1, 3, 2, 3);
            ReplaceableString       rsource3 = new ReplaceableString(s);

            ct1.Transliterate(rsource3, index2);
            ct1.FinishTransliteration(rsource3, index2);
            result = rsource3.ToString();
            expectAux(ct1.ID + ":String, index2(1,2,2,3)", s + "->" + rsource3, result.Equals(expectedResult), expectedResult);


            String[] Data =
            {
                //ID, input string, transliterated string
                "Any-Hex;Hex-Any;Any-Hex",              "hello",                                      "\\u0068\\u0065\\u006C\\u006C\\u006F",
                "Any-Hex;Hex-Any",                      "hello! How are you?",                        "hello! How are you?",
                "Devanagari-Latin;Latin-Devanagari",    "\u092D\u0948'\u0930'\u0935",                 "\u092D\u0948\u0930\u0935", // quotes lost
                "Latin-Cyrillic;Cyrillic-Latin",        "a'b'k'd'e'f'g'h'i'j'Shch'shch'zh'h",         "a'b'k'd'e'f'g'h'i'j'Shch'shch'zh'h",
                "Latin-Greek;Greek-Latin",              "ABGabgAKLMN",                                "ABGabgAKLMN",
                //"Latin-Arabic;Arabic-Latin",               "Ad'r'a'b'i'k'dh'dd'gh", "Adrabikdhddgh",
                "Hiragana-Katakana",                    "\u3041\u308f\u3099\u306e\u304b\u3092\u3099",
                "\u30A1\u30f7\u30ce\u30ab\u30fa",
                "Hiragana-Katakana;Katakana-Hiragana",  "\u3041\u308f\u3099\u306e\u304b\u3051",
                "\u3041\u308f\u3099\u306e\u304b\u3051",
                "Katakana-Hiragana;Hiragana-Katakana",  "\u30A1\u30f7\u30ce\u30f5\u30f6",
                "\u30A1\u30f7\u30ce\u30ab\u30b1",
                "Latin-Katakana;Katakana-Latin",        "vavivuvevohuzizuzoninunasesuzezu",
                "vavivuvevohuzizuzoninunasesuzezu",
            };
            Transliterator ct2 = null;

            for (int i = 0; i < Data.Length; i += 3)
            {
                try
                {
                    ct2 = Transliterator.GetInstance(Data[i + 0]);
                }
                catch (ArgumentException iae2)
                {
                    Errln("FAIL: CompoundTransliterator construction failed for " + Data[i + 0]);
                    throw iae2;
                }
                expect(ct2, Data[i + 1], Data[i + 2]);
            }
        }
        /// <summary>
        /// Implements <see cref="M:IBM.ICU.Text.Transliterator.HandleTransliterate(IBM.ICU.Text.Replaceable, null, System.Boolean)"/>.
        /// </summary>
        ///
        protected internal override void HandleTransliterate(Replaceable text, Transliterator.Position offsets,
                                                             bool isIncremental)
        {
            int maxLen = IBM.ICU.Impl.UCharacterName.GetInstance().GetMaxCharNameLength() + 1;     // allow
            // for
            // temporary
            // trailing
            // space

            StringBuilder name = new StringBuilder(maxLen);

            // Get the legal character set
            UnicodeSet legal = new UnicodeSet();

            IBM.ICU.Impl.UCharacterName.GetInstance().GetCharNameCharacters(legal);

            int cursor = offsets.start;
            int limit  = offsets.limit;

            // Modes:
            // 0 - looking for open delimiter
            // 1 - after open delimiter
            int mode    = 0;
            int openPos = -1;     // open delim candidate pos

            int c;

            while (cursor < limit)
            {
                c = text.Char32At(cursor);

                switch (mode)
                {
                case 0:                  // looking for open delimiter
                    if (c == OPEN_DELIM) // quick check first
                    {
                        openPos = cursor;
                        int i = IBM.ICU.Impl.Utility.ParsePattern(OPEN_PAT, text, cursor, limit);
                        if (i >= 0 && i < limit)
                        {
                            mode        = 1;
                            name.Length = 0;
                            cursor      = i;
                            continue;     // *** reprocess char32At(cursor)
                        }
                    }
                    break;

                case 1:     // after open delimiter
                    // Look for legal chars. If \s+ is found, convert it
                    // to a single space. If closeDelimiter is found, exit
                    // the loop. If any other character is found, exit the
                    // loop. If the limit is reached, exit the loop.

                    // Convert \s+ => SPACE. This assumes there are no
                    // runs of >1 space characters in names.
                    if (IBM.ICU.Impl.UCharacterProperty.IsRuleWhiteSpace(c))
                    {
                        // Ignore leading whitespace
                        if (name.Length > 0 &&
                            name[name.Length - 1] != SPACE)
                        {
                            name.Append(SPACE);
                            // If we are too long then abort. maxLen includes
                            // temporary trailing space, so use '>'.
                            if (name.Length > maxLen)
                            {
                                mode = 0;
                            }
                        }
                        break;
                    }

                    if (c == CLOSE_DELIM)
                    {
                        int len = name.Length;

                        // Delete trailing space, if any
                        if (len > 0 && name[len - 1] == SPACE)
                        {
                            name.Length = --len;
                        }

                        c = IBM.ICU.Lang.UCharacter.GetCharFromExtendedName(name.ToString());
                        if (c != -1)
                        {
                            // Lookup succeeded

                            // assert(UTF16.getCharCount(CLOSE_DELIM) == 1);
                            cursor++;     // advance over CLOSE_DELIM

                            String str = IBM.ICU.Text.UTF16.ValueOf(c);
                            text.Replace(openPos, cursor, str);

                            // Adjust indices for the change in the length of
                            // the string. Do not assume that str.length() ==
                            // 1, in case of surrogates.
                            int delta = cursor - openPos - str.Length;
                            cursor -= delta;
                            limit  -= delta;
                            // assert(cursor == openPos + str.length());
                        }
                        // If the lookup failed, we leave things as-is and
                        // still switch to mode 0 and continue.
                        mode    = 0;
                        openPos = -1; // close off candidate
                        continue;     // *** reprocess char32At(cursor)
                    }

                    if (legal.Contains(c))
                    {
                        IBM.ICU.Text.UTF16.Append(name, c);
                        // If we go past the longest possible name then abort.
                        // maxLen includes temporary trailing space, so use '>='.
                        if (name.Length >= maxLen)
                        {
                            mode = 0;
                        }
                    }

                    // Invalid character
                    else
                    {
                        --cursor;     // Backup and reprocess this character
                        mode = 0;
                    }

                    break;
                }

                cursor += IBM.ICU.Text.UTF16.GetCharCount(c);
            }

            offsets.contextLimit += limit - offsets.limit;
            offsets.limit         = limit;
            // In incremental mode, only advance the cursor up to the last
            // open delimiter candidate.
            offsets.start = (isIncremental && openPos >= 0) ? openPos : cursor;
        }
Пример #18
0
 /// <summary>
 /// Convenience method.
 /// </summary>
 public static StringBuffer FormatInput(StringBuffer appendTo,
                                        IReplaceable input,
                                        Transliterator.Position pos)
 {
     return(FormatInput(appendTo, (ReplaceableString)input, pos));
 }
        /// <summary>
        /// Implements <see cref="M:IBM.ICU.Text.Transliterator.HandleTransliterate(IBM.ICU.Text.Replaceable, null, System.Boolean)"/>.
        /// </summary>
        ///
        protected internal override void HandleTransliterate(Replaceable text, Transliterator.Position offsets,
                                                             bool isIncremental)
        {
            if (csp == null)
            {
                return;
            }

            if (offsets.start >= offsets.limit)
            {
                return;
            }

            iter.SetText(text);
            result.Length = 0;
            int c, delta;

            // Walk through original string
            // If there is a case change, modify corresponding position in
            // replaceable

            iter.SetIndex(offsets.start);
            iter.SetLimit(offsets.limit);
            iter.SetContextLimits(offsets.contextStart, offsets.contextLimit);
            while ((c = iter.NextCaseMapCP()) >= 0)
            {
                c = csp.ToFullUpper(c, iter, result, locale, locCache);

                if (iter.DidReachLimit() && isIncremental)
                {
                    // the case mapping function tried to look beyond the context
                    // limit
                    // wait for more input
                    offsets.start = iter.GetCaseMapCPStart();
                    return;
                }

                /* decode the result */
                if (c < 0)
                {
                    /* c mapped to itself, no change */
                    continue;
                }
                else if (c <= IBM.ICU.Impl.UCaseProps.MAX_STRING_LENGTH)
                {
                    /* replace by the mapping string */
                    delta         = iter.Replace(result.ToString());
                    result.Length = 0;
                }
                else
                {
                    /* replace by single-code point mapping */
                    delta = iter.Replace(IBM.ICU.Text.UTF16.ValueOf(c));
                }

                if (delta != 0)
                {
                    offsets.limit        += delta;
                    offsets.contextLimit += delta;
                }
            }
            offsets.start = offsets.limit;
        }
Пример #20
0
        /**
         * Attempt a match and replacement at the given position.  Return
         * the degree of match between this rule and the given text.  The
         * degree of match may be mismatch, a partial match, or a full
         * match.  A mismatch means at least one character of the text
         * does not match the context or key.  A partial match means some
         * context and key characters match, but the text is not long
         * enough to match all of them.  A full match means all context
         * and key characters match.
         *
         * If a full match is obtained, perform a replacement, update pos,
         * and return U_MATCH.  Otherwise both text and pos are unchanged.
         *
         * @param text the text
         * @param pos the position indices
         * @param incremental if TRUE, test for partial matches that may
         * be completed by additional text inserted at pos.limit.
         * @return one of <code>U_MISMATCH</code>,
         * <code>U_PARTIAL_MATCH</code>, or <code>U_MATCH</code>.  If
         * incremental is FALSE then U_PARTIAL_MATCH will not be returned.
         */
        public virtual MatchDegree MatchAndReplace(IReplaceable text,
                                                   Transliterator.Position pos,
                                                   bool incremental)
        {
            // Matching and replacing are done in one method because the
            // replacement operation needs information obtained during the
            // match.  Another way to do this is to have the match method
            // create a match result struct with relevant offsets, and to pass
            // this into the replace method.

            // ============================ MATCH ===========================

            // Reset segment match data
            if (segments != null)
            {
                for (int i = 0; i < segments.Length; ++i)
                {
                    ((StringMatcher)segments[i]).ResetMatch();
                }
            }

            int keyLimit;

            int[] intRef = new int[1];

            // ------------------------ Ante Context ------------------------

            // A mismatch in the ante context, or with the start anchor,
            // is an outright U_MISMATCH regardless of whether we are
            // incremental or not.
            int oText; // offset into 'text'
            int minOText;

            // Note (1): We process text in 16-bit code units, rather than
            // 32-bit code points.  This works because stand-ins are
            // always in the BMP and because we are doing a literal match
            // operation, which can be done 16-bits at a time.

            int anteLimit = PosBefore(text, pos.ContextStart);

            MatchDegree match;

            // Start reverse match at char before pos.start
            intRef[0] = PosBefore(text, pos.Start);

            if (anteContext != null)
            {
                match = anteContext.Matches(text, intRef, anteLimit, false);
                if (match != MatchDegree.Match)
                {
                    return(MatchDegree.Mismatch);
                }
            }

            oText = intRef[0];

            minOText = PosAfter(text, oText);

            // ------------------------ Start Anchor ------------------------

            if (((flags & ANCHOR_START) != 0) && oText != anteLimit)
            {
                return(MatchDegree.Mismatch);
            }

            // -------------------- Key and Post Context --------------------

            intRef[0] = pos.Start;

            if (key != null)
            {
                match = key.Matches(text, intRef, pos.Limit, incremental);
                if (match != MatchDegree.Match)
                {
                    return(match);
                }
            }

            keyLimit = intRef[0];

            if (postContext != null)
            {
                if (incremental && keyLimit == pos.Limit)
                {
                    // The key matches just before pos.limit, and there is
                    // a postContext.  Since we are in incremental mode,
                    // we must assume more characters may be inserted at
                    // pos.limit -- this is a partial match.
                    return(MatchDegree.PartialMatch);
                }

                match = postContext.Matches(text, intRef, pos.ContextLimit, incremental);
                if (match != MatchDegree.Match)
                {
                    return(match);
                }
            }

            oText = intRef[0];

            // ------------------------- Stop Anchor ------------------------

            if (((flags & ANCHOR_END)) != 0)
            {
                if (oText != pos.ContextLimit)
                {
                    return(MatchDegree.Mismatch);
                }
                if (incremental)
                {
                    return(MatchDegree.PartialMatch);
                }
            }

            // =========================== REPLACE ==========================

            // We have a full match.  The key is between pos.start and
            // keyLimit.

            int newLength = output.Replace(text, pos.Start, keyLimit, intRef);
            int lenDelta  = newLength - (keyLimit - pos.Start);
            int newStart  = intRef[0];

            oText            += lenDelta;
            pos.Limit        += lenDelta;
            pos.ContextLimit += lenDelta;
            // Restrict new value of start to [minOText, min(oText, pos.limit)].
            pos.Start = Math.Max(minOText, Math.Min(Math.Min(oText, pos.Limit), newStart));
            return(MatchDegree.Match);
        }
Пример #21
0
        /// <exclude/>
        /// <summary>
        /// Implements <see cref="M:IBM.ICU.Text.Transliterator.HandleTransliterate(IBM.ICU.Text.Replaceable, null, System.Boolean)"/>.
        /// </summary>
        ///
        protected internal override void HandleTransliterate(Replaceable text, Transliterator.Position index,
                                                             bool incremental)
        {
            /*
             * Call each transliterator with the same start value and initial cursor
             * index, but with the limit index as modified by preceding
             * transliterators. The cursor index must be reset for each
             * transliterator to give each a chance to transliterate the text. The
             * initial cursor index is known to still point to the same place after
             * each transliterator is called because each transliterator will not
             * change the text between start and the initial value of cursor.
             *
             * IMPORTANT: After the first transliterator, each subsequent
             * transliterator only gets to transliterate text committed by preceding
             * transliterators; that is, the cursor (output value) of transliterator
             * i becomes the limit (input value) of transliterator i+1. Finally, the
             * overall limit is fixed up before we return.
             *
             * Assumptions we make here: (1) contextStart <= start <= limit <=
             * contextLimit <= text.length() (2) start <= start' <= limit' ;cursor
             * doesn't move back (3) start <= limit' ;text before cursor unchanged -
             * start' is the value of start after calling handleKT - limit' is the
             * value of limit after calling handleKT
             */

            /**
             * Example: 3 transliterators. This example illustrates the mechanics we
             * need to implement. C, S, and L are the contextStart, start, and
             * limit. gl is the globalLimit. contextLimit is equal to limit
             * throughout.
             *
             * 1. h-u, changes hex to Unicode
             *
             * 4 7 a d 0 4 7 a abc/u0061/u => abca/u C S L C S L gl=f->a
             *
             * 2. upup, changes "x" to "XX"
             *
             * 4 7 a 4 7 a abca/u => abcAA/u C SL C S L gl=a->b 3. u-h, changes
             * Unicode to hex
             *
             * 4 7 a 4 7 a d 0 3 abcAA/u => abc/u0041/u0041/u C S L C S L gl=b->15
             * 4. return
             *
             * 4 7 a d 0 3 abc/u0041/u0041/u C S L
             */

            if (trans.Length < 1)
            {
                index.start = index.limit;
                return;     // Short circuit for empty compound transliterators
            }

            // compoundLimit is the limit value for the entire compound
            // operation. We overwrite index.limit with the previous
            // index.start. After each transliteration, we update
            // compoundLimit for insertions or deletions that have happened.
            int compoundLimit = index.limit;

            // compoundStart is the start for the entire compound
            // operation.
            int compoundStart = index.start;

            int delta = 0;     // delta in length

            StringBuilder log = null;

            if (IBM.ICU.Text.Transliterator.DEBUG)
            {
                log = new StringBuilder("CompoundTransliterator{" + GetID()
                                        + ((incremental) ? "}i: IN=" : "}: IN="));
                IBM.ICU.Impl.UtilityExtensions.FormatInput(log, text, index);
                System.Console.Out.WriteLine(IBM.ICU.Impl.Utility.Escape(log.ToString()));
            }

            // Give each transliterator a crack at the run of characters.
            // See comments at the top of the method for more detail.
            for (int i = 0; i < trans.Length; ++i)
            {
                index.start = compoundStart;     // Reset start
                int limit = index.limit;

                if (index.start == index.limit)
                {
                    // Short circuit for empty range
                    if (IBM.ICU.Text.Transliterator.DEBUG)
                    {
                        System.Console.Out.WriteLine("CompoundTransliterator[" + i + ".."
                                                     + (trans.Length - 1)
                                                     + ((incremental) ? "]i: " : "]: ")
                                                     + IBM.ICU.Impl.UtilityExtensions.FormatInput(text, index)
                                                     + " (NOTHING TO DO)");
                    }
                    break;
                }

                if (IBM.ICU.Text.Transliterator.DEBUG)
                {
                    log.Length = 0;
                    log.Append("CompoundTransliterator[" + i + "="
                               + trans[i].GetID() + ((incremental) ? "]i: " : "]: "));
                    IBM.ICU.Impl.UtilityExtensions.FormatInput(log, text, index);
                }

                trans[i].FilteredTransliterate(text, index, incremental);

                // In a properly written transliterator, start == limit after
                // handleTransliterate() returns when incremental is false.
                // Catch cases where the subclass doesn't do this, and throw
                // an exception. (Just pinning start to limit is a bad idea,
                // because what's probably happening is that the subclass
                // isn't transliterating all the way to the end, and it should
                // in non-incremental mode.)
                if (!incremental && index.start != index.limit)
                {
                    throw new Exception(
                              "ERROR: Incomplete non-incremental transliteration by "
                              + trans[i].GetID());
                }

                if (IBM.ICU.Text.Transliterator.DEBUG)
                {
                    log.Append(" => ");
                    IBM.ICU.Impl.UtilityExtensions.FormatInput(log, text, index);
                    System.Console.Out.WriteLine(IBM.ICU.Impl.Utility.Escape(log.ToString()));
                }

                // Cumulative delta for insertions/deletions
                delta += index.limit - limit;

                if (incremental)
                {
                    // In the incremental case, only allow subsequent
                    // transliterators to modify what has already been
                    // completely processed by prior transliterators. In the
                    // non-incrmental case, allow each transliterator to
                    // process the entire text.
                    index.limit = index.start;
                }
            }

            compoundLimit += delta;

            // Start is good where it is -- where the last transliterator left
            // it. Limit needs to be put back where it was, modulo
            // adjustments for deletions/insertions.
            index.limit = compoundLimit;

            if (IBM.ICU.Text.Transliterator.DEBUG)
            {
                log.Length = 0;
                log.Append("CompoundTransliterator{" + GetID()
                           + ((incremental) ? "}i: OUT=" : "}: OUT="));
                IBM.ICU.Impl.UtilityExtensions.FormatInput(log, text, index);
                System.Console.Out.WriteLine(IBM.ICU.Impl.Utility.Escape(log.ToString()));
            }
        }
Пример #22
0
        protected internal override void HandleTransliterate(Replaceable text, Transliterator.Position pos,
                                                             bool incremental)
        {
            boundaryCount = 0;
            int boundary = 0;

            GetBreakIterator();     // Lazy-create it if necessary
            bi.SetText(new BreakTransliterator.ReplaceableCharacterIterator(text, pos.start, pos.limit,
                                                                            pos.start));
            // TODO: fix clumsy workaround used below.

            /*
             * char[] tempBuffer = new char[text.length()]; text.getChars(0,
             * text.length(), tempBuffer, 0); bi.setText(new
             * StringCharacterIterator(new String(tempBuffer), pos.start, pos.limit,
             * pos.start));
             */
            // end debugging

            // To make things much easier, we will stack the boundaries, and then
            // insert at the end.
            // generally, we won't need too many, since we will be filtered.

            for (boundary = bi.First(); boundary != IBM.ICU.Text.BreakIterator.DONE &&
                 boundary < pos.limit; boundary = bi.Next())
            {
                if (boundary == 0)
                {
                    continue;
                }
                // HACK: Check to see that preceeding item was a letter

                int cp   = IBM.ICU.Text.UTF16.CharAt(text, boundary - 1);
                int type = IBM.ICU.Lang.UCharacter.GetType(cp);
                // System.out.println(Integer.toString(cp,16) + " (before): " +
                // type);
                if (((1 << type) & LETTER_OR_MARK_MASK) == 0)
                {
                    continue;
                }

                cp   = IBM.ICU.Text.UTF16.CharAt(text, boundary);
                type = IBM.ICU.Lang.UCharacter.GetType(cp);
                // System.out.println(Integer.toString(cp,16) + " (after): " +
                // type);
                if (((1 << type) & LETTER_OR_MARK_MASK) == 0)
                {
                    continue;
                }

                if (boundaryCount >= boundaries.Length)       // realloc if necessary
                {
                    int[] temp = new int[boundaries.Length * 2];
                    System.Array.Copy((Array)(boundaries), 0, (Array)(temp), 0, boundaries.Length);
                    boundaries = temp;
                }

                boundaries[boundaryCount++] = boundary;
                // System.out.println(boundary);
            }

            int delta        = 0;
            int lastBoundary = 0;

            if (boundaryCount != 0)       // if we found something, adjust
            {
                delta        = boundaryCount * insertion.Length;
                lastBoundary = boundaries[boundaryCount - 1];

                // we do this from the end backwards, so that we don't have to keep
                // updating.

                while (boundaryCount > 0)
                {
                    boundary = boundaries[--boundaryCount];
                    text.Replace(boundary, boundary, insertion);
                }
            }

            // Now fix up the return values
            pos.contextLimit += delta;
            pos.limit        += delta;
            pos.start         = (incremental) ? lastBoundary + delta : pos.limit;
        }
Пример #23
0
        /// <summary>
        /// Implements <see cref="M:IBM.ICU.Text.Transliterator.HandleTransliterate(IBM.ICU.Text.Replaceable, null, System.Boolean)"/>.
        /// </summary>
        ///
        protected internal override void HandleTransliterate(Replaceable text, Transliterator.Position offsets,
                                                             bool isIncremental)
        {
            // TODO reimplement, see ustrcase.c
            // using a real word break iterator
            // instead of just looking for a transition between cased and uncased
            // characters
            // call CaseMapTransliterator::handleTransliterate() for lowercasing?
            // (set fMap)
            // needs to take isIncremental into account because case mappings are
            // context-sensitive
            // also detect when lowercasing function did not finish because of
            // context

            if (offsets.start >= offsets.limit)
            {
                return;
            }

            // case type: >0 cased (UCaseProps.LOWER etc.) ==0 uncased <0
            // case-ignorable
            int type;

            // Our mode; we are either converting letter toTitle or
            // toLower.
            bool doTitle = true;

            // Determine if there is a preceding context of cased case-ignorable*,
            // in which case we want to start in toLower mode. If the
            // prior context is anything else (including empty) then start
            // in toTitle mode.
            int c, start;

            for (start = offsets.start - 1; start >= offsets.contextStart; start -= IBM.ICU.Text.UTF16
                                                                                    .GetCharCount(c))
            {
                c    = text.Char32At(start);
                type = csp.GetTypeOrIgnorable(c);
                if (type > 0)       // cased
                {
                    doTitle = false;
                    break;
                }
                else if (type == 0)         // uncased but not ignorable
                {
                    break;
                }
                // else (type<0) case-ignorable: continue
            }

            // Convert things after a cased character toLower; things
            // after a uncased, non-case-ignorable character toTitle. Case-ignorable
            // characters are copied directly and do not change the mode.

            iter.SetText(text);
            iter.SetIndex(offsets.start);
            iter.SetLimit(offsets.limit);
            iter.SetContextLimits(offsets.contextStart, offsets.contextLimit);

            result.Length = 0;

            // Walk through original string
            // If there is a case change, modify corresponding position in
            // replaceable
            int delta;

            while ((c = iter.NextCaseMapCP()) >= 0)
            {
                type = csp.GetTypeOrIgnorable(c);
                if (type >= 0)       // not case-ignorable
                {
                    if (doTitle)
                    {
                        c = csp.ToFullTitle(c, iter, result, locale, locCache);
                    }
                    else
                    {
                        c = csp.ToFullLower(c, iter, result, locale, locCache);
                    }
                    doTitle = type == 0;     // doTitle=isUncased

                    if (iter.DidReachLimit() && isIncremental)
                    {
                        // the case mapping function tried to look beyond the
                        // context limit
                        // wait for more input
                        offsets.start = iter.GetCaseMapCPStart();
                        return;
                    }

                    /* decode the result */
                    if (c < 0)
                    {
                        /* c mapped to itself, no change */
                        continue;
                    }
                    else if (c <= IBM.ICU.Impl.UCaseProps.MAX_STRING_LENGTH)
                    {
                        /* replace by the mapping string */
                        delta         = iter.Replace(result.ToString());
                        result.Length = 0;
                    }
                    else
                    {
                        /* replace by single-code point mapping */
                        delta = iter.Replace(IBM.ICU.Text.UTF16.ValueOf(c));
                    }

                    if (delta != 0)
                    {
                        offsets.limit        += delta;
                        offsets.contextLimit += delta;
                    }
                }
            }
            offsets.start = offsets.limit;
        }
Пример #24
0
 /// <summary>
 /// Convenience method.
 /// </summary>
 public static string FormatInput(IReplaceable input,
                                  Transliterator.Position pos)
 {
     return(FormatInput((ReplaceableString)input, pos));
 }
Пример #25
0
        /// <summary>
        /// Implements <see cref="M:IBM.ICU.Text.Transliterator.HandleTransliterate(IBM.ICU.Text.Replaceable, null, System.Boolean)"/>.
        /// </summary>
        ///
        protected internal override void HandleTransliterate(Replaceable text, Transliterator.Position pos,
                                                             bool isIncremental)
        {
            int start = pos.start;
            int limit = pos.limit;
            int i, j, ipat;

            loop : {
                while (start < limit)
                {
                    // Loop over the forms in spec[]. Exit this loop when we
                    // match one of the specs. Exit the outer loop if a
                    // partial match is detected and isIncremental is true.
                    for (j = 0, ipat = 0; spec[ipat] != END; ++j)
                    {
                        // Read the header
                        int prefixLen = spec[ipat++];
                        int suffixLen = spec[ipat++];
                        int radix     = spec[ipat++];
                        int minDigits = spec[ipat++];
                        int maxDigits = spec[ipat++];

                        // s is a copy of start that is advanced over the
                        // characters as we parse them.
                        int  s     = start;
                        bool match = true;

                        for (i = 0; i < prefixLen; ++i)
                        {
                            if (s >= limit)
                            {
                                if (i > 0)
                                {
                                    // We've already matched a character. This is
                                    // a partial match, so we return if in
                                    // incremental mode. In non-incremental mode,
                                    // go to the next spec.
                                    if (isIncremental)
                                    {
                                        goto gotoloop;
                                    }
                                    match = false;
                                    break;
                                }
                            }
                            char c = text.CharAt(s++);
                            if (c != spec[ipat + i])
                            {
                                match = false;
                                break;
                            }
                        }

                        if (match)
                        {
                            int u          = 0;
                            int digitCount = 0;
                            for (;;)
                            {
                                if (s >= limit)
                                {
                                    // Check for partial match in incremental mode.
                                    if (s > start && isIncremental)
                                    {
                                        goto gotoloop;
                                    }
                                    break;
                                }
                                int ch    = text.Char32At(s);
                                int digit = IBM.ICU.Lang.UCharacter.Digit(ch, radix);
                                if (digit < 0)
                                {
                                    break;
                                }
                                s += IBM.ICU.Text.UTF16.GetCharCount(ch);
                                u  = (u * radix) + digit;
                                if (++digitCount == maxDigits)
                                {
                                    break;
                                }
                            }

                            match = (digitCount >= minDigits);

                            if (match)
                            {
                                for (i = 0; i < suffixLen; ++i)
                                {
                                    if (s >= limit)
                                    {
                                        // Check for partial match in incremental mode.
                                        if (s > start && isIncremental)
                                        {
                                            goto gotoloop;
                                        }
                                        match = false;
                                        break;
                                    }
                                    char c_0 = text.CharAt(s++);
                                    if (c_0 != spec[ipat + prefixLen + i])
                                    {
                                        match = false;
                                        break;
                                    }
                                }

                                if (match)
                                {
                                    // At this point, we have a match
                                    String str = IBM.ICU.Text.UTF16.ValueOf(u);
                                    text.Replace(start, s, str);
                                    limit -= s - start - str.Length;
                                    // The following break statement leaves the
                                    // loop that is traversing the forms in
                                    // spec[]. We then parse the next input
                                    // character.
                                    break;
                                }
                            }
                        }

                        ipat += prefixLen + suffixLen;
                    }

                    if (start < limit)
                    {
                        start += IBM.ICU.Text.UTF16.GetCharCount(text.Char32At(start));
                    }
                }
            }
gotoloop:
            ;

            pos.contextLimit += limit - pos.limit;
            pos.limit         = limit;
            pos.start         = start;
        }