Exemple #1
0
        public void TestExhaustive()
        {
            int counter          = 0;
            CanonicalIterator it = new CanonicalIterator("");

            /*
             * CanonicalIterator slowIt = new CanonicalIterator("");
             * slowIt.SKIP_ZEROS = false;
             */
            //Transliterator name = Transliterator.getInstance("[^\\u0020-\\u007F] name");
            //Set itSet = new TreeSet();
            //Set slowItSet = new TreeSet();


            for (int i = 0; i < 0x10FFFF; ++i)
            {
                // skip characters we know don't have decomps
                UUnicodeCategory type = UChar.GetUnicodeCategory(i);
                if (type == UUnicodeCategory.OtherNotAssigned || type == UUnicodeCategory.PrivateUse ||
                    type == UUnicodeCategory.Surrogate)
                {
                    continue;
                }

                if ((++counter % 5000) == 0)
                {
                    Logln("Testing " + Utility.Hex(i, 0));
                }

                string s = UTF16.ValueOf(i);
                CharacterTest(s, i, it);

                CharacterTest(s + "\u0345", i, it);
            }
        }
Exemple #2
0
        /// <summary>
        /// Iterates to the next script run, returning true if one exists.
        /// </summary>
        /// <returns>true if there is another script run, false otherwise.</returns>
        public bool Next()
        {
            if (scriptLimit >= limit)
            {
                return(false);
            }

            scriptCode  = UScript.Common;
            scriptStart = scriptLimit;

            while (index < limit)
            {
                int ch = UTF16.CharAt(text, start, limit, index - start);
                int sc = GetScript(ch);

                /*
                 * From UTR #24: Implementations that determine the boundaries between
                 * characters of given scripts should never break between a non-spacing
                 * mark and its base character. Thus for boundary determinations and
                 * similar sorts of processing, a non-spacing mark — whatever its script
                 * value — should inherit the script value of its base character.
                 */
                if (IsSameScript(scriptCode, sc) ||
                    UChar.GetUnicodeCategory(ch) == UUnicodeCategory.NonSpacingMark)
                {
                    index += UTF16.GetCharCount(ch);

                    /*
                     * Inherited or Common becomes the script code of the surrounding text.
                     */
                    if (scriptCode <= UScript.Inherited && sc > UScript.Inherited)
                    {
                        scriptCode = sc;
                    }
                }
                else
                {
                    break;
                }
            }

            scriptLimit = index;
            return(true);
        }
Exemple #3
0
        internal String GetTestSource()
        {
            if (random == null)
            {
                random = CreateRandom(); // use test framework's random seed
            }
            String source = "";
            int    i      = 0;

            while (i < (random.Next(maxCharCount) + 1))
            {
                int codepoint = random.Next(maxCodePoint);
                //Elimate unassigned characters
                while (UChar.GetUnicodeCategory(codepoint) == UUnicodeCategory.OtherNotAssigned)
                {
                    codepoint = random.Next(maxCodePoint);
                }
                source = source + UTF16.ValueOf(codepoint);
                i++;
            }
            return(source);
        }
Exemple #4
0
 private static int U_GET_GC_MASK(int c)
 {
     return(1 << UChar.GetUnicodeCategory(c).ToInt32());
 }
Exemple #5
0
        protected override void HandleTransliterate(IReplaceable text, TransliterationPosition pos, bool incremental)
        {
            lock (this)
            {
                boundaryCount = 0;
                int boundary = 0;
                GetBreakIterator(); // Lazy-create it if necessary
                bi.SetText(new ReplaceableCharacterIterator(text, pos.Start, pos.Limit, pos.Start));
                // TODO: fix clumsy workaround used below.

                /*
                 * char[] tempBuffer = new char[text.length()];
                 * text.getChars(0, text.length(), tempBuffer, 0);
                 * bi.setText(new StringCharacterIterator(new String(tempBuffer), pos.start, pos.limit, pos.start));
                 */
                // end debugging

                // To make things much easier, we will stack the boundaries, and then insert at the end.
                // generally, we won't need too many, since we will be filtered.

                for (boundary = bi.First(); boundary != BreakIterator.Done && boundary < pos.Limit; boundary = bi.Next())
                {
                    if (boundary == 0)
                    {
                        continue;
                    }
                    // HACK: Check to see that preceeding item was a letter

                    int cp   = UTF16.CharAt(text, boundary - 1);
                    int type = UChar.GetUnicodeCategory(cp).ToInt32();
                    //System.out.println(Integer.toString(cp,16) + " (before): " + type);
                    if (((1 << type) & LETTER_OR_MARK_MASK) == 0)
                    {
                        continue;
                    }

                    cp   = UTF16.CharAt(text, boundary);
                    type = UChar.GetUnicodeCategory(cp).ToInt32();
                    //System.out.println(Integer.toString(cp,16) + " (after): " + type);
                    if (((1 << type) & LETTER_OR_MARK_MASK) == 0)
                    {
                        continue;
                    }

                    if (boundaryCount >= boundaries.Length)
                    {       // realloc if necessary
                        int[] temp = new int[boundaries.Length * 2];
                        System.Array.Copy(boundaries, 0, temp, 0, boundaries.Length);
                        boundaries = temp;
                    }

                    boundaries[boundaryCount++] = boundary;
                    //System.out.println(boundary);
                }

                int delta        = 0;
                int lastBoundary = 0;

                if (boundaryCount != 0)
                { // if we found something, adjust
                    delta        = boundaryCount * insertion.Length;
                    lastBoundary = boundaries[boundaryCount - 1];

                    // we do this from the end backwards, so that we don't have to keep updating.

                    while (boundaryCount > 0)
                    {
                        boundary = boundaries[--boundaryCount];
                        text.Replace(boundary, boundary, insertion);
                    }
                }

                // Now fix up the return values
                pos.ContextLimit += delta;
                pos.Limit        += delta;
                pos.Start         = incremental ? lastBoundary + delta : pos.Limit;
            }
        }