Beispiel #1
0
        public void TestNormalization()
        {
            String rules = "&a < \u0300\u0315 < A\u0300\u0315 < \u0316\u0315B < \u0316\u0300\u0315";

            String[]          testdata = { "\u1ED9",      "o\u0323\u0302",
                                           "\u0300\u0315",         "\u0315\u0300",
                                           "A\u0300\u0315B",       "A\u0315\u0300B",
                                           "A\u0316\u0315B",       "A\u0315\u0316B",
                                           "\u0316\u0300\u0315",   "\u0315\u0300\u0316",
                                           "A\u0316\u0300\u0315B", "A\u0315\u0300\u0316B",
                                           "\u0316\u0315\u0300",   "A\u0316\u0315\u0300B" };
            RuleBasedCollator coll = null;

            try
            {
                coll = new RuleBasedCollator(rules);
                coll.Decomposition = NormalizationMode.CanonicalDecomposition; //(Collator.CANONICAL_DECOMPOSITION);
            }
            catch (Exception e)
            {
                Warnln("ERROR: in creation of collator using rules " + rules);
                return;
            }

            CollationElementIterator iter = coll.GetCollationElementIterator("testing");

            for (int count = 0; count < testdata.Length; count++)
            {
                iter.SetText(testdata[count]);
                CollationTest.BackAndForth(this, iter);
            }
        }
Beispiel #2
0
        public void TestNext()
        {
            String text = "abc";
            CollationElementIterator iterator = coll
                                                .GetCollationElementIterator(text);

            int[] orders = new int[text.Length];
            int   order  = iterator.Next();
            int   i      = 0;

            while (order != CollationElementIterator.NULLORDER)
            {
                orders[i++] = order;
                order       = iterator.Next();
            }

            int offset = iterator.GetOffset();

            NUnit.Framework.Assert.AreEqual(text.Length, offset);
            order = iterator.Previous();

            while (order != CollationElementIterator.NULLORDER)
            {
                NUnit.Framework.Assert.AreEqual(orders[--i], order);
                order = iterator.Previous();
            }

            NUnit.Framework.Assert.AreEqual(0, iterator.GetOffset());
        }
Beispiel #3
0
        public void TestNormalizedUnicodeChar()
        {
            // thai should have normalization on
            RuleBasedCollator th_th = null;

            try
            {
                th_th = (RuleBasedCollator)Collator.GetInstance(
                    new CultureInfo("th-TH"));
            }
            catch (Exception e)
            {
                Warnln("Error creating Thai collator");
                return;
            }
            StringBuffer source = new StringBuffer();

            source.Append('\uFDFA');
            CollationElementIterator iter
                = th_th.GetCollationElementIterator(source.ToString());

            CollationTest.BackAndForth(this, iter);
            for (char codepoint = (char)0x1; codepoint < 0xfffe;)
            {
                source.Delete(0, source.Length);
                while (codepoint % 0xFF != 0)
                {
                    if (UChar.IsDefined(codepoint))
                    {
                        source.Append(codepoint);
                    }
                    codepoint++;
                }

                if (UChar.IsDefined(codepoint))
                {
                    source.Append(codepoint);
                }

                if (codepoint != 0xFFFF)
                {
                    codepoint++;
                }

                /*if (((int)codepoint) >= 0xfe00) {
                 *  String str = source.substring(185, 190);
                 *  System.out.println(com.ibm.icu.impl.Utility.escape(str));
                 *  System.out.println("codepoint "
                 + Integer.toHexString(codepoint)
                 + "length " + str.Length);
                 +  iter = th_th.GetCollationElementIterator(str);
                 +  CollationTest.BackAndForth(this, iter);
                 */
                iter = th_th.GetCollationElementIterator(source.ToString());
                // A basic test to see if it's working at all
                CollationTest.BackAndForth(this, iter);
            }
        }
        public void TestGetCollationElementIteratorCharacterIterator()
        {
            {
                Locale            locale = new Locale("es", "", "TRADITIONAL");
                RuleBasedCollator coll   = (RuleBasedCollator)ILOG.J2CsMapping.Text.Collator
                                           .GetInstance(locale);
                String text = "cha";
                StringCharacterIterator source = new StringCharacterIterator(
                    text);
                CollationElementIterator iterator = coll
                                                    .GetCollationElementIterator(source);
                int[] e_offset = { 0, 1, 2, 3 };
                int   offset   = iterator.GetOffset();
                int   i        = 0;
                NUnit.Framework.Assert.AreEqual(e_offset[i++], offset);
                while (offset != text.Length)
                {
                    iterator.Next();
                    offset = iterator.GetOffset();
                    // System.out.println(offset);
                    NUnit.Framework.Assert.AreEqual(e_offset[i++], offset);
                }
            }

            {
                Locale            locale_0 = new Locale("de", "DE");
                RuleBasedCollator coll_1   = (RuleBasedCollator)ILOG.J2CsMapping.Text.Collator
                                             .GetInstance(locale_0);
                String text_2 = "\u00E6b";
                StringCharacterIterator source_3 = new StringCharacterIterator(
                    text_2);
                CollationElementIterator iterator_4 = coll_1
                                                      .GetCollationElementIterator(source_3);
                int[] e_offset_5 = { 0, 1, 1, 2 };
                int   offset_6   = iterator_4.GetOffset();
                int   i_7        = 0;
                NUnit.Framework.Assert.AreEqual(e_offset_5[i_7++], offset_6);
                while (offset_6 != text_2.Length)
                {
                    iterator_4.Next();
                    offset_6 = iterator_4.GetOffset();
                    NUnit.Framework.Assert.AreEqual(e_offset_5[i_7++], offset_6);
                }
            }
            // Regression for HARMONY-1352
            try
            {
                new RuleBasedCollator("< a< b< c< d")
                .GetCollationElementIterator((CharacterIterator)null);
                NUnit.Framework.Assert.Fail("NullPointerException expected");
            }
            catch (NullReferenceException e)
            {
                // expected
            }
        }
Beispiel #5
0
        public void TestSetOffset()
        {
            // Failed in java too
            RuleBasedCollator rbColl = (RuleBasedCollator)ILOG.J2CsMapping.Text.Collator
                                       .GetInstance(new Locale("es", "", "TRADITIONAL"));
            String text = "cha";
            CollationElementIterator iterator = rbColl
                                                .GetCollationElementIterator(text);

            iterator.SetOffset(1);
            NUnit.Framework.Assert.AreEqual(1, iterator.GetOffset());
        }
        public void TestGetCollationElementIteratorString()
        {
            {
                Locale            locale = new Locale("es", "", "TRADITIONAL");
                RuleBasedCollator coll   = (RuleBasedCollator)ILOG.J2CsMapping.Text.Collator
                                           .GetInstance(locale);
                String source = "cha";
                CollationElementIterator iterator = coll
                                                    .GetCollationElementIterator(source);
                int[] e_offset = { 0, 1, 2, 3 };
                int   offset   = iterator.GetOffset();
                int   i        = 0;
                NUnit.Framework.Assert.AreEqual(e_offset[i++], offset);
                while (offset != source.Length)
                {
                    iterator.Next();
                    offset = iterator.GetOffset();
                    NUnit.Framework.Assert.AreEqual(e_offset[i++], offset);
                }
            }

            {
                Locale            locale_0 = new Locale("de", "DE");
                RuleBasedCollator coll_1   = (RuleBasedCollator)ILOG.J2CsMapping.Text.Collator
                                             .GetInstance(locale_0);
                String source_2 = "\u00E6b";
                CollationElementIterator iterator_3 = coll_1
                                                      .GetCollationElementIterator(source_2);
                int[] e_offset_4 = { 0, 1, 1, 2 };
                int   offset_5   = iterator_3.GetOffset();
                int   i_6        = 0;
                NUnit.Framework.Assert.AreEqual(e_offset_4[i_6++], offset_5);
                while (offset_5 != source_2.Length)
                {
                    iterator_3.Next();
                    offset_5 = iterator_3.GetOffset();
                    NUnit.Framework.Assert.AreEqual(e_offset_4[i_6++], offset_5);
                }
            }
            // Regression for HARMONY-1352
            try
            {
                new RuleBasedCollator("< a< b< c< d")
                .GetCollationElementIterator((String)null);
                NUnit.Framework.Assert.Fail("NullPointerException expected");
            }
            catch (NullReferenceException e)
            {
                // expected
            }
        }
Beispiel #7
0
        public void TestSecondaryOrder()
        {
            RuleBasedCollator rbColl = (RuleBasedCollator)ILOG.J2CsMapping.Text.Collator
                                       .GetInstance(new Locale("fr", "FR"));
            String text = "a\u00e0";
            CollationElementIterator iterator = rbColl
                                                .GetCollationElementIterator(text);
            int order   = iterator.Next();
            int sOrder1 = CollationElementIterator.SecondaryOrder(order);

            order = iterator.Next();
            int sOrder2 = CollationElementIterator.SecondaryOrder(order);

            NUnit.Framework.Assert.AreEqual(sOrder1, sOrder2);
        }
Beispiel #8
0
        public void TestGetMaxExpansion()
        {
            String            text   = "cha";
            RuleBasedCollator rbColl = (RuleBasedCollator)ILOG.J2CsMapping.Text.Collator
                                       .GetInstance(new Locale("es", "", "TRADITIONAL"));
            CollationElementIterator iterator = rbColl
                                                .GetCollationElementIterator(text);
            int order = iterator.Next();

            while (order != CollationElementIterator.NULLORDER)
            {
                NUnit.Framework.Assert.AreEqual(1, iterator.GetMaxExpansion(order));
                order = iterator.Next();
            }
        }
Beispiel #9
0
        public void TestPrimaryOrder()
        {
            RuleBasedCollator rbColl = (RuleBasedCollator)ILOG.J2CsMapping.Text.Collator
                                       .GetInstance(new Locale("de", "DE"));
            String text = "\u00e6";
            CollationElementIterator iterator = rbColl
                                                .GetCollationElementIterator(text);
            int order  = iterator.Next();
            int pOrder = CollationElementIterator.PrimaryOrder(order);
            CollationElementIterator iterator2 = rbColl
                                                 .GetCollationElementIterator("ae");
            int order2  = iterator2.Next();
            int pOrder2 = CollationElementIterator.PrimaryOrder(order2);

            NUnit.Framework.Assert.AreEqual(pOrder, pOrder2);
        }
Beispiel #10
0
        public void TestGetOffset()
        {
            String text = "abc";
            CollationElementIterator iterator = coll
                                                .GetCollationElementIterator(text);

            int[] offsets = { 0, 1, 2, 3 };
            int   offset  = iterator.GetOffset();
            int   i       = 0;

            NUnit.Framework.Assert.AreEqual(offsets[i++], offset);
            while (offset != text.Length)
            {
                iterator.Next();
                offset = iterator.GetOffset();
                NUnit.Framework.Assert.AreEqual(offsets[i++], offset);
            }
        }
Beispiel #11
0
        void assertEqual(CollationElementIterator i1, CollationElementIterator i2)
        {
            int c1, c2, count = 0;

            do
            {
                c1 = i1.Next();
                c2 = i2.Next();
                if (c1 != c2)
                {
                    Errln("    " + count + ": strength(0x" +
                          (c1).ToHexString() + ") != strength(0x" + (c2).ToHexString() + ")");
                    break;
                }
                count += 1;
            } while (c1 != CollationElementIterator.NULLORDER);
            CollationTest.BackAndForth(this, i1);
            CollationTest.BackAndForth(this, i2);
        }
Beispiel #12
0
        public void TestInvalidThai()
        {
            String[] tests = { "\u0E44\u0E01\u0E44\u0E01",
                               "\u0E44\u0E01\u0E01\u0E44",
                               "\u0E01\u0E44\u0E01\u0E44",
                               "\u0E01\u0E01\u0E44\u0E44",
                               "\u0E44\u0E44\u0E01\u0E01",
                               "\u0E01\u0E44\u0E44\u0E01", };

            RuleBasedCollator collator;
            StrCmp            comparator;

            try
            {
                collator   = GetThaiCollator();
                comparator = new StrCmp();
            }
            catch (Exception e)
            {
                Warnln("could not construct Thai collator");
                return;
            }

            Array.Sort(tests, comparator);

            for (int i = 0; i < tests.Length; i++)
            {
                for (int j = i + 1; j < tests.Length; j++)
                {
                    if (collator.Compare(tests[i], tests[j]) > 0)
                    {
                        // inconsistency ordering found!
                        Errln("Inconsistent ordering between strings " + i
                              + " and " + j);
                    }
                }
                CollationElementIterator iterator
                    = collator.GetCollationElementIterator(tests[i]);
                CollationTest.BackAndForth(this, iterator);
            }
        }
Beispiel #13
0
        public void TestSetText(/* char* par */)
        {
            RuleBasedCollator        en_us = (RuleBasedCollator)Collator.GetInstance(new CultureInfo("en-US"));
            CollationElementIterator iter1 = en_us.GetCollationElementIterator(test1);
            CollationElementIterator iter2 = en_us.GetCollationElementIterator(test2);

            // Run through the second iterator just to exercise it
            int c = iter2.Next();
            int i = 0;

            while (++i < 10 && c != CollationElementIterator.NULLORDER)
            {
                try
                {
                    c = iter2.Next();
                }
                catch (Exception e)
                {
                    Errln("iter2.Next() returned an error.");
                    break;
                }
            }

            // Now set it to point to the same string as the first iterator
            try
            {
                iter2.SetText(test1);
            }
            catch (Exception e)
            {
                Errln("call to iter2->setText(test1) failed.");
                return;
            }
            assertEqual(iter1, iter2);

            iter1.Reset();
            //now use the overloaded setText(ChracterIterator&, UErrorCode) function to set the text
            CharacterIterator chariter = new StringCharacterIterator(test1);

            try
            {
                iter2.SetText(chariter);
            }
            catch (Exception e)
            {
                Errln("call to iter2->setText(chariter(test1)) failed.");
                return;
            }
            assertEqual(iter1, iter2);

            iter1.Reset();
            //now use the overloaded setText(ChracterIterator&, UErrorCode) function to set the text
            UCharacterIterator uchariter = UCharacterIterator.GetInstance(test1);

            try
            {
                iter2.SetText(uchariter);
            }
            catch (Exception e)
            {
                Errln("call to iter2->setText(uchariter(test1)) failed.");
                return;
            }
            assertEqual(iter1, iter2);
        }
Beispiel #14
0
        public void TestPrevious(/* char* par */)
        {
            RuleBasedCollator        en_us = (RuleBasedCollator)Collator.GetInstance(new CultureInfo("en-US"));
            CollationElementIterator iter  = en_us.GetCollationElementIterator(test1);

            // A basic test to see if it's working at all
            CollationTest.BackAndForth(this, iter);

            // Test with a contracting character sequence
            String            source;
            RuleBasedCollator c1 = null;

            try
            {
                c1 = new RuleBasedCollator("&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH");
            }
            catch (Exception e)
            {
                Errln("Couldn't create a RuleBasedCollator with a contracting sequence.");
                return;
            }

            source = "abchdcba";
            iter   = c1.GetCollationElementIterator(source);
            CollationTest.BackAndForth(this, iter);

            // Test with an expanding character sequence
            RuleBasedCollator c2 = null;

            try
            {
                c2 = new RuleBasedCollator("&a < b < c/abd < d");
            }
            catch (Exception e)
            {
                Errln("Couldn't create a RuleBasedCollator with an expanding sequence.");
                return;
            }

            source = "abcd";
            iter   = c2.GetCollationElementIterator(source);
            CollationTest.BackAndForth(this, iter);

            // Now try both
            RuleBasedCollator c3 = null;

            try
            {
                c3 = new RuleBasedCollator("&a < b < c/aba < d < z < ch");
            }
            catch (Exception e)
            {
                Errln("Couldn't create a RuleBasedCollator with both an expanding and a contracting sequence.");
                return;
            }

            source = "abcdbchdc";
            iter   = c3.GetCollationElementIterator(source);
            CollationTest.BackAndForth(this, iter);

            source = "\u0e41\u0e02\u0e41\u0e02\u0e27abc";
            Collator c4 = null;

            try
            {
                c4 = Collator.GetInstance(new CultureInfo("th-TH"));
            }
            catch (Exception e)
            {
                Errln("Couldn't create a collator");
                return;
            }

            iter = ((RuleBasedCollator)c4).GetCollationElementIterator(source);
            CollationTest.BackAndForth(this, iter);

            source = "\u0061\u30CF\u3099\u30FC";
            Collator c5 = null;

            try
            {
                c5 = Collator.GetInstance(new CultureInfo("ja-JP"));
            }
            catch (Exception e)
            {
                Errln("Couldn't create Japanese collator\n");
                return;
            }
            iter = ((RuleBasedCollator)c5).GetCollationElementIterator(source);

            CollationTest.BackAndForth(this, iter);
        }
Beispiel #15
0
        public void TestClearBuffers(/* char* par */)
        {
            RuleBasedCollator c = null;

            try
            {
                c = new RuleBasedCollator("&a < b < c & ab = d");
            }
            catch (Exception e)
            {
                Warnln("Couldn't create a RuleBasedCollator.");
                return;
            }

            String source = "abcd";
            CollationElementIterator i = c.GetCollationElementIterator(source);
            int e0 = 0;

            try
            {
                e0 = i.Next();    // save the first collation element
            }
            catch (Exception e)
            {
                Errln("call to i.Next() failed.");
                return;
            }

            try
            {
                i.SetOffset(3);        // go to the expanding character
            }
            catch (Exception e)
            {
                Errln("call to i.setOffset(3) failed.");
                return;
            }

            try
            {
                i.Next();                // but only use up half of it
            }
            catch (Exception e)
            {
                Errln("call to i.Next() failed.");
                return;
            }

            try
            {
                i.SetOffset(0);        // go back to the beginning
            }
            catch (Exception e)
            {
                Errln("call to i.setOffset(0) failed. ");
            }

            {
                int e = 0;
                try
                {
                    e = i.Next();    // and get this one again
                }
                catch (Exception ee)
                {
                    Errln("call to i.Next() failed. ");
                    return;
                }

                if (e != e0)
                {
                    Errln("got 0x" + (e).ToHexString() + ", expected 0x" + (e0).ToHexString());
                }
            }
        }
Beispiel #16
0
        public void TestMaxExpansion(/* char* par */)
        {
            int               unassigned = 0xEFFFD;
            String            rule       = "&a < ab < c/aba < d < z < ch";
            RuleBasedCollator coll       = null;

            try
            {
                coll = new RuleBasedCollator(rule);
            }
            catch (Exception e)
            {
                Warnln("Fail to create RuleBasedCollator");
                return;
            }
            char   ch  = (char)0;
            String str = ch + "";

            CollationElementIterator iter = coll.GetCollationElementIterator(str);

            while (ch < 0xFFFF)
            {
                int count = 1;
                ch++;
                str = ch + "";
                iter.SetText(str);
                int order = iter.Previous();

                // thai management
                if (order == 0)
                {
                    order = iter.Previous();
                }

                while (iter.Previous() != CollationElementIterator.NULLORDER)
                {
                    count++;
                }

                if (iter.GetMaxExpansion(order) < count)
                {
                    Errln("Failure at codepoint " + ch + ", maximum expansion count < " + count);
                }
            }

            // testing for exact max expansion
            ch = (char)0;
            while (ch < 0x61)
            {
                str = ch + "";
                iter.SetText(str);
                int order = iter.Previous();

                if (iter.GetMaxExpansion(order) != 1)
                {
                    Errln("Failure at codepoint 0x" + (ch).ToHexString()
                          + " maximum expansion count == 1");
                }
                ch++;
            }

            ch  = (char)0x63;
            str = ch + "";
            iter.SetText(str);
            int temporder = iter.Previous();

            if (iter.GetMaxExpansion(temporder) != 3)
            {
                Errln("Failure at codepoint 0x" + (ch).ToHexString()
                      + " maximum expansion count == 3");
            }

            ch  = (char)0x64;
            str = ch + "";
            iter.SetText(str);
            temporder = iter.Previous();

            if (iter.GetMaxExpansion(temporder) != 1)
            {
                Errln("Failure at codepoint 0x" + (ch).ToHexString()
                      + " maximum expansion count == 1");
            }

            str = UChar.ToString(unassigned);
            iter.SetText(str);
            temporder = iter.Previous();

            if (iter.GetMaxExpansion(temporder) != 2)
            {
                Errln("Failure at codepoint 0x" + (ch).ToHexString()
                      + " maximum expansion count == 2");
            }


            // testing jamo
            ch  = (char)0x1165;
            str = ch + "";
            iter.SetText(str);
            temporder = iter.Previous();

            if (iter.GetMaxExpansion(temporder) > 3)
            {
                Errln("Failure at codepoint 0x" + (ch).ToHexString()
                      + " maximum expansion count < 3");
            }

            // testing special jamo &a<\u1165
            rule = "\u0026\u0071\u003c\u1165\u002f\u0071\u0071\u0071\u0071";

            try
            {
                coll = new RuleBasedCollator(rule);
            }
            catch (Exception e)
            {
                Errln("Fail to create RuleBasedCollator");
                return;
            }
            iter = coll.GetCollationElementIterator(str);

            temporder = iter.Previous();

            if (iter.GetMaxExpansion(temporder) != 6)
            {
                Errln("Failure at codepoint 0x" + (ch).ToHexString()
                      + " maximum expansion count == 6");
            }
        }
Beispiel #17
0
        public void TestOffset(/* char* par */)
        {
            RuleBasedCollator en_us;

            try
            {
                en_us = (RuleBasedCollator)Collator.GetInstance(new CultureInfo("en-US"));
            }
            catch (Exception e)
            {
                Warnln("ERROR: in creation of collator of ENGLISH locale");
                return;
            }

            CollationElementIterator iter = en_us.GetCollationElementIterator(test1);

            // testing boundaries
            iter.SetOffset(0);
            if (iter.Previous() != CollationElementIterator.NULLORDER)
            {
                Errln("Error: After setting offset to 0, we should be at the end "
                      + "of the backwards iteration");
            }
            iter.SetOffset(test1.Length);
            if (iter.Next() != CollationElementIterator.NULLORDER)
            {
                Errln("Error: After setting offset to the end of the string, we "
                      + "should be at the end of the forwards iteration");
            }

            // Run all the way through the iterator, then get the offset
            int[] orders = CollationTest.GetOrders(iter);
            Logln("orders.Length = " + orders.Length);

            int offset = iter.GetOffset();

            if (offset != test1.Length)
            {
                String msg1 = "offset at end != length: ";
                String msg2 = " vs ";
                Errln(msg1 + offset + msg2 + test1.Length);
            }

            // Now set the offset back to the beginning and see if it works
            CollationElementIterator pristine = en_us.GetCollationElementIterator(test1);

            try
            {
                iter.SetOffset(0);
            }
            catch (Exception e)
            {
                Errln("setOffset failed.");
            }
            assertEqual(iter, pristine);

            // setting offset in the middle of a contraction
            String            contraction = "change";
            RuleBasedCollator tailored    = null;

            try
            {
                tailored = new RuleBasedCollator("& a < ch");
            }
            catch (Exception e)
            {
                Errln("Error: in creation of Spanish collator");
                return;
            }
            iter = tailored.GetCollationElementIterator(contraction);
            int[] order = CollationTest.GetOrders(iter);
            iter.SetOffset(1); // sets offset in the middle of ch
            int[] order2 = CollationTest.GetOrders(iter);
            if (!Arrays.Equals(order, order2))
            {
                Errln("Error: setting offset in the middle of a contraction should be the same as setting it to the start of the contraction");
            }
            contraction = "peache";
            iter        = tailored.GetCollationElementIterator(contraction);
            iter.SetOffset(3);
            order = CollationTest.GetOrders(iter);
            iter.SetOffset(4); // sets offset in the middle of ch
            order2 = CollationTest.GetOrders(iter);
            if (!Arrays.Equals(order, order2))
            {
                Errln("Error: setting offset in the middle of a contraction should be the same as setting it to the start of the contraction");
            }
            // setting offset in the middle of a surrogate pair
            String surrogate = "\ud800\udc00str";

            iter  = tailored.GetCollationElementIterator(surrogate);
            order = CollationTest.GetOrders(iter);
            iter.SetOffset(1); // sets offset in the middle of surrogate
            order2 = CollationTest.GetOrders(iter);
            if (!Arrays.Equals(order, order2))
            {
                Errln("Error: setting offset in the middle of a surrogate pair should be the same as setting it to the start of the surrogate pair");
            }
            surrogate = "simple\ud800\udc00str";
            iter      = tailored.GetCollationElementIterator(surrogate);
            iter.SetOffset(6);
            order = CollationTest.GetOrders(iter);
            iter.SetOffset(7); // sets offset in the middle of surrogate
            order2 = CollationTest.GetOrders(iter);
            if (!Arrays.Equals(order, order2))
            {
                Errln("Error: setting offset in the middle of a surrogate pair should be the same as setting it to the start of the surrogate pair");
            }
            // TODO: try iterating halfway through a messy string.
        }
Beispiel #18
0
        public void TestDiscontiguous()
        {
            String rulestr = "&z < AB < X\u0300 < ABC < X\u0300\u0315";

            String[] src = { "ADB",                 "ADBC",                 "A\u0315B",       "A\u0315BC",
                             // base character blocked
                             "XD\u0300",            "XD\u0300\u0315",
                             // non blocking combining character
                             "X\u0319\u0300",       "X\u0319\u0300\u0315",
                             // blocking combining character
                             "X\u0314\u0300",       "X\u0314\u0300\u0315",
                             // contraction prefix
                             "ABDC",                "AB\u0315C",            "X\u0300D\u0315",
                             "X\u0300\u0319\u0315", "X\u0300\u031A\u0315",
                             // ends not with a contraction character
                             "X\u0319\u0300D",      "X\u0319\u0300\u0315D",
                             "X\u0300D\u0315D",     "X\u0300\u0319\u0315D",
                             "X\u0300\u031A\u0315D" };
            String[] tgt =  // non blocking combining character
            {
                "A D B",                "A D BC",                "A \u0315 B",       "A \u0315 BC",
                // base character blocked
                "X D \u0300",           "X D \u0300\u0315",
                // non blocking combining character
                "X\u0300 \u0319",       "X\u0300\u0315 \u0319",
                // blocking combining character
                "X \u0314 \u0300",      "X \u0314 \u0300\u0315",
                // contraction prefix
                "AB DC",                "AB \u0315 C",           "X\u0300 D \u0315",
                "X\u0300\u0315 \u0319", "X\u0300 \u031A \u0315",
                // ends not with a contraction character
                "X\u0300 \u0319D",      "X\u0300\u0315 \u0319D",
                "X\u0300 D\u0315D",     "X\u0300\u0315 \u0319D",
                "X\u0300 \u031A\u0315D"
            };
            int count = 0;

            try
            {
                RuleBasedCollator        coll = new RuleBasedCollator(rulestr);
                CollationElementIterator iter
                    = coll.GetCollationElementIterator("");
                CollationElementIterator resultiter
                    = coll.GetCollationElementIterator("");
                while (count < src.Length)
                {
                    iter.SetText(src[count]);
                    int s = 0;
                    while (s < tgt[count].Length)
                    {
                        int e = tgt[count].IndexOf(' ', s);
                        if (e < 0)
                        {
                            e = tgt[count].Length;
                        }
                        String resultstr = tgt[count].Substring(s, e - s); // ICU4N: Corrected 2nd parameter
                        resultiter.SetText(resultstr);
                        int ce = resultiter.Next();
                        while (ce != CollationElementIterator.NULLORDER)
                        {
                            if (ce != iter.Next())
                            {
                                Errln("Discontiguos contraction test mismatch at"
                                      + count);
                                return;
                            }
                            ce = resultiter.Next();
                        }
                        s = e + 1;
                    }
                    iter.Reset();
                    CollationTest.BackAndForth(this, iter);
                    count++;
                }
            }
            catch (Exception e)
            {
                Warnln("Error running discontiguous tests " + e.ToString());
            }
        }
        /// <summary>
        /// Compares the character data stored in two different strings based on the
        /// collation rules.  Returns information about whether a string is less
        /// than, greater than or equal to another string in a language.
        /// This can be overriden in a subclass.
        /// </summary>
        /// <exception cref="NullPointerException"> if <code>source</code> or <code>target</code> is null. </exception>
        public override int Compare(String source, String target)
        {
            lock (this)
            {
                if (source == null || target == null)
                {
                    throw new NullPointerException();
                }

                // The basic algorithm here is that we use CollationElementIterators
                // to step through both the source and target strings.  We compare each
                // collation element in the source string against the corresponding one
                // in the target, checking for differences.
                //
                // If a difference is found, we set <result> to LESS or GREATER to
                // indicate whether the source string is less or greater than the target.
                //
                // However, it's not that simple.  If we find a tertiary difference
                // (e.g. 'A' vs. 'a') near the beginning of a string, it can be
                // overridden by a primary difference (e.g. "A" vs. "B") later in
                // the string.  For example, "AA" < "aB", even though 'A' > 'a'.
                //
                // To keep track of this, we use strengthResult to keep track of the
                // strength of the most significant difference that has been found
                // so far.  When we find a difference whose strength is greater than
                // strengthResult, it overrides the last difference (if any) that
                // was found.

                int result = Collator.EQUAL;

                if (SourceCursor == null)
                {
                    SourceCursor = GetCollationElementIterator(source);
                }
                else
                {
                    SourceCursor.Text = source;
                }
                if (TargetCursor == null)
                {
                    TargetCursor = GetCollationElementIterator(target);
                }
                else
                {
                    TargetCursor.Text = target;
                }

                int sOrder = 0, tOrder = 0;

                bool initialCheckSecTer = Strength >= Collator.SECONDARY;
                bool checkSecTer        = initialCheckSecTer;
                bool checkTertiary      = Strength >= Collator.TERTIARY;

                bool gets = true, gett = true;

                while (true)
                {
                    // Get the next collation element in each of the strings, unless
                    // we've been requested to skip it.
                    if (gets)
                    {
                        sOrder = SourceCursor.Next();
                    }
                    else
                    {
                        gets = true;
                    }
                    if (gett)
                    {
                        tOrder = TargetCursor.Next();
                    }
                    else
                    {
                        gett = true;
                    }

                    // If we've hit the end of one of the strings, jump out of the loop
                    if ((sOrder == CollationElementIterator.NULLORDER) || (tOrder == CollationElementIterator.NULLORDER))
                    {
                        break;
                    }

                    int pSOrder = CollationElementIterator.PrimaryOrder(sOrder);
                    int pTOrder = CollationElementIterator.PrimaryOrder(tOrder);

                    // If there's no difference at this position, we can skip it
                    if (sOrder == tOrder)
                    {
                        if (Tables_Renamed.FrenchSec && pSOrder != 0)
                        {
                            if (!checkSecTer)
                            {
                                // in french, a secondary difference more to the right is stronger,
                                // so accents have to be checked with each base element
                                checkSecTer = initialCheckSecTer;
                                // but tertiary differences are less important than the first
                                // secondary difference, so checking tertiary remains disabled
                                checkTertiary = false;
                            }
                        }
                        continue;
                    }

                    // Compare primary differences first.
                    if (pSOrder != pTOrder)
                    {
                        if (sOrder == 0)
                        {
                            // The entire source element is ignorable.
                            // Skip to the next source element, but don't fetch another target element.
                            gett = false;
                            continue;
                        }
                        if (tOrder == 0)
                        {
                            gets = false;
                            continue;
                        }

                        // The source and target elements aren't ignorable, but it's still possible
                        // for the primary component of one of the elements to be ignorable....

                        if (pSOrder == 0)                         // primary order in source is ignorable
                        {
                            // The source's primary is ignorable, but the target's isn't.  We treat ignorables
                            // as a secondary difference, so remember that we found one.
                            if (checkSecTer)
                            {
                                result      = Collator.GREATER;                            // (strength is SECONDARY)
                                checkSecTer = false;
                            }
                            // Skip to the next source element, but don't fetch another target element.
                            gett = false;
                        }
                        else if (pTOrder == 0)
                        {
                            // record differences - see the comment above.
                            if (checkSecTer)
                            {
                                result      = Collator.LESS;                            // (strength is SECONDARY)
                                checkSecTer = false;
                            }
                            // Skip to the next source element, but don't fetch another target element.
                            gets = false;
                        }
                        else
                        {
                            // Neither of the orders is ignorable, and we already know that the primary
                            // orders are different because of the (pSOrder != pTOrder) test above.
                            // Record the difference and stop the comparison.
                            if (pSOrder < pTOrder)
                            {
                                return(Collator.LESS);                                // (strength is PRIMARY)
                            }
                            else
                            {
                                return(Collator.GREATER);                                // (strength is PRIMARY)
                            }
                        }
                    }                     // else of if ( pSOrder != pTOrder )
                    else
                    {
                        // primary order is the same, but complete order is different. So there
                        // are no base elements at this point, only ignorables (Since the strings are
                        // normalized)

                        if (checkSecTer)
                        {
                            // a secondary or tertiary difference may still matter
                            short secSOrder = CollationElementIterator.SecondaryOrder(sOrder);
                            short secTOrder = CollationElementIterator.SecondaryOrder(tOrder);
                            if (secSOrder != secTOrder)
                            {
                                // there is a secondary difference
                                result = (secSOrder < secTOrder) ? Collator.LESS : Collator.GREATER;
                                // (strength is SECONDARY)
                                checkSecTer = false;
                                // (even in french, only the first secondary difference within
                                //  a base character matters)
                            }
                            else
                            {
                                if (checkTertiary)
                                {
                                    // a tertiary difference may still matter
                                    short terSOrder = CollationElementIterator.TertiaryOrder(sOrder);
                                    short terTOrder = CollationElementIterator.TertiaryOrder(tOrder);
                                    if (terSOrder != terTOrder)
                                    {
                                        // there is a tertiary difference
                                        result = (terSOrder < terTOrder) ? Collator.LESS : Collator.GREATER;
                                        // (strength is TERTIARY)
                                        checkTertiary = false;
                                    }
                                }
                            }
                        }         // if (checkSecTer)
                    }             // if ( pSOrder != pTOrder )
                }                 // while()

                if (sOrder != CollationElementIterator.NULLORDER)
                {
                    // (tOrder must be CollationElementIterator::NULLORDER,
                    //  since this point is only reached when sOrder or tOrder is NULLORDER.)
                    // The source string has more elements, but the target string hasn't.
                    do
                    {
                        if (CollationElementIterator.PrimaryOrder(sOrder) != 0)
                        {
                            // We found an additional non-ignorable base character in the source string.
                            // This is a primary difference, so the source is greater
                            return(Collator.GREATER);                            // (strength is PRIMARY)
                        }
                        else if (CollationElementIterator.SecondaryOrder(sOrder) != 0)
                        {
                            // Additional secondary elements mean the source string is greater
                            if (checkSecTer)
                            {
                                result      = Collator.GREATER;                            // (strength is SECONDARY)
                                checkSecTer = false;
                            }
                        }
                    } while ((sOrder = SourceCursor.Next()) != CollationElementIterator.NULLORDER);
                }
                else if (tOrder != CollationElementIterator.NULLORDER)
                {
                    // The target string has more elements, but the source string hasn't.
                    do
                    {
                        if (CollationElementIterator.PrimaryOrder(tOrder) != 0)
                        // We found an additional non-ignorable base character in the target string.
                        // This is a primary difference, so the source is less
                        {
                            return(Collator.LESS);                            // (strength is PRIMARY)
                        }
                        else if (CollationElementIterator.SecondaryOrder(tOrder) != 0)
                        {
                            // Additional secondary elements in the target mean the source string is less
                            if (checkSecTer)
                            {
                                result      = Collator.LESS;                            // (strength is SECONDARY)
                                checkSecTer = false;
                            }
                        }
                    } while ((tOrder = TargetCursor.Next()) != CollationElementIterator.NULLORDER);
                }

                // For IDENTICAL comparisons, we use a bitwise character comparison
                // as a tiebreaker if all else is equal
                if (result == 0 && Strength == IDENTICAL)
                {
                    int             mode = Decomposition;
                    Normalizer.Form form;
                    if (mode == CANONICAL_DECOMPOSITION)
                    {
                        form = Normalizer.Form.NFD;
                    }
                    else if (mode == FULL_DECOMPOSITION)
                    {
                        form = Normalizer.Form.NFKD;
                    }
                    else
                    {
                        return(source.CompareTo(target));
                    }

                    String sourceDecomposition = Normalizer.Normalize(source, form);
                    String targetDecomposition = Normalizer.Normalize(target, form);
                    return(sourceDecomposition.CompareTo(targetDecomposition));
                }
                return(result);
            }
        }
Beispiel #20
0
        public void TestSearchCollatorElements()
        {
            String tsceText =
                " \uAC00" +              // simple LV Hangul
                " \uAC01" +              // simple LVT Hangul
                " \uAC0F" +              // LVTT, last jamo expands for search
                " \uAFFF" +              // LLVVVTT, every jamo expands for search
                " \u1100\u1161\u11A8" +  // 0xAC01 as conjoining jamo
                " \u3131\u314F\u3131" +  // 0xAC01 as compatibility jamo
                " \u1100\u1161\u11B6" +  // 0xAC0F as conjoining jamo; last expands for search
                " \u1101\u1170\u11B6" +  // 0xAFFF as conjoining jamo; all expand for search
                " \u00E6" +              // small letter ae, expands
                " \u1E4D" +              // small letter o with tilde and acute, decomposes
                " ";

            int[] rootStandardOffsets =
            {
                0,   1,  2,
                2,   3,  4,  4,
                4,   5,  6,  6,
                6,   7,  8,  8,
                8,   9, 10, 11,
                12, 13, 14, 15,
                16, 17, 18, 19,
                20, 21, 22, 23,
                24, 25, 26,/* plus another 1-2 offset=26 if ae-ligature maps to three CEs */
                26, 27, 28, 28,
                28,
                29
            };

            int[] rootSearchOffsets =
            {
                0,   1,  2,
                2,   3,  4,  4,
                4,   5,  6,  6,  6,
                6,   7,  8,  8,  8,  8,  8,  8,
                8,   9, 10, 11,
                12, 13, 14, 15,
                16, 17, 18, 19, 20,
                20, 21, 22, 22, 23, 23, 23, 24,
                24, 25, 26,/* plus another 1-2 offset=26 if ae-ligature maps to three CEs */
                26, 27, 28, 28,
                28,
                29
            };


            TSCEItem[] tsceItems =
            {
                new TSCEItem("root",                  rootStandardOffsets),
                new TSCEItem("root@collation=search", rootSearchOffsets),
            };

            foreach (TSCEItem tsceItem in tsceItems)
            {
                String            localeString = tsceItem.LocaleString;
                ULocale           uloc         = new ULocale(localeString);
                RuleBasedCollator col          = null;
                try
                {
                    col = (RuleBasedCollator)Collator.GetInstance(uloc);
                }
                catch (Exception e)
                {
                    Errln("Error: in locale " + localeString + ", err in Collator.getInstance");
                    continue;
                }
                CollationElementIterator uce = col.GetCollationElementIterator(tsceText);
                int[] offsets = tsceItem.GetOffsets();
                int   ioff, noff = offsets.Length;
                int   offset, element;

                ioff = 0;
                do
                {
                    offset  = uce.GetOffset();
                    element = uce.Next();
                    Logln(String.Format("({0}) offset={1:d2}  ce={2:x8}\n", tsceItem.LocaleString, offset, element));
                    if (element == 0)
                    {
                        Errln("Error: in locale " + localeString + ", CEIterator next() returned element 0");
                    }
                    if (ioff < noff)
                    {
                        if (offset != offsets[ioff])
                        {
                            Errln("Error: in locale " + localeString + ", expected CEIterator next()->getOffset " + offsets[ioff] + ", got " + offset);
                            //ioff = noff;
                            //break;
                        }
                        ioff++;
                    }
                    else
                    {
                        Errln("Error: in locale " + localeString + ", CEIterator next() returned more elements than expected");
                    }
                } while (element != CollationElementIterator.NULLORDER);
                if (ioff < noff)
                {
                    Errln("Error: in locale " + localeString + ", CEIterator next() returned fewer elements than expected");
                }

                // backwards test
                uce.SetOffset(tsceText.Length);
                ioff = noff;
                do
                {
                    offset  = uce.GetOffset();
                    element = uce.Previous();
                    if (element == 0)
                    {
                        Errln("Error: in locale " + localeString + ", CEIterator previous() returned element 0");
                    }
                    if (ioff > 0)
                    {
                        ioff--;
                        if (offset != offsets[ioff])
                        {
                            Errln("Error: in locale " + localeString + ", expected CEIterator previous()->getOffset " + offsets[ioff] + ", got " + offset);
                            //ioff = 0;
                            //break;
                        }
                    }
                    else
                    {
                        Errln("Error: in locale " + localeString + ", CEIterator previous() returned more elements than expected");
                    }
                } while (element != CollationElementIterator.NULLORDER);
                if (ioff > 0)
                {
                    Errln("Error: in locale " + localeString + ", CEIterator previous() returned fewer elements than expected");
                }
            }
        }
        /// <summary>
        /// Transforms the string into a series of characters that can be compared
        /// with CollationKey.compareTo. This overrides java.text.Collator.getCollationKey.
        /// It can be overriden in a subclass.
        /// </summary>
        public override CollationKey GetCollationKey(String source)
        {
            lock (this)
            {
                //
                // The basic algorithm here is to find all of the collation elements for each
                // character in the source string, convert them to a char representation,
                // and put them into the collation key.  But it's trickier than that.
                // Each collation element in a string has three components: primary (A vs B),
                // secondary (A vs A-acute), and tertiary (A' vs a); and a primary difference
                // at the end of a string takes precedence over a secondary or tertiary
                // difference earlier in the string.
                //
                // To account for this, we put all of the primary orders at the beginning of the
                // string, followed by the secondary and tertiary orders, separated by nulls.
                //
                // Here's a hypothetical example, with the collation element represented as
                // a three-digit number, one digit for primary, one for secondary, etc.
                //
                // String:              A     a     B   \u00e9 <--(e-acute)
                // Collation Elements: 101   100   201  510
                //
                // Collation Key:      1125<null>0001<null>1010
                //
                // To make things even trickier, secondary differences (accent marks) are compared
                // starting at the *end* of the string in languages with French secondary ordering.
                // But when comparing the accent marks on a single base character, they are compared
                // from the beginning.  To handle this, we reverse all of the accents that belong
                // to each base character, then we reverse the entire string of secondary orderings
                // at the end.  Taking the same example above, a French collator might return
                // this instead:
                //
                // Collation Key:      1125<null>1000<null>1010
                //
                if (source == null)
                {
                    return(null);
                }

                if (PrimResult == null)
                {
                    PrimResult = new StringBuffer();
                    SecResult  = new StringBuffer();
                    TerResult  = new StringBuffer();
                }
                else
                {
                    PrimResult.Length = 0;
                    SecResult.Length  = 0;
                    TerResult.Length  = 0;
                }
                int  order        = 0;
                bool compareSec   = (Strength >= Collator.SECONDARY);
                bool compareTer   = (Strength >= Collator.TERTIARY);
                int  secOrder     = CollationElementIterator.NULLORDER;
                int  terOrder     = CollationElementIterator.NULLORDER;
                int  preSecIgnore = 0;

                if (SourceCursor == null)
                {
                    SourceCursor = GetCollationElementIterator(source);
                }
                else
                {
                    SourceCursor.Text = source;
                }

                // walk through each character
                while ((order = SourceCursor.Next()) != CollationElementIterator.NULLORDER)
                {
                    secOrder = CollationElementIterator.SecondaryOrder(order);
                    terOrder = CollationElementIterator.TertiaryOrder(order);
                    if (!CollationElementIterator.IsIgnorable(order))
                    {
                        PrimResult.Append((char)(CollationElementIterator.PrimaryOrder(order) + COLLATIONKEYOFFSET));

                        if (compareSec)
                        {
                            //
                            // accumulate all of the ignorable/secondary characters attached
                            // to a given base character
                            //
                            if (Tables_Renamed.FrenchSec && preSecIgnore < SecResult.Length())
                            {
                                //
                                // We're doing reversed secondary ordering and we've hit a base
                                // (non-ignorable) character.  Reverse any secondary orderings
                                // that applied to the last base character.  (see block comment above.)
                                //
                                RBCollationTables.Reverse(SecResult, preSecIgnore, SecResult.Length());
                            }
                            // Remember where we are in the secondary orderings - this is how far
                            // back to go if we need to reverse them later.
                            SecResult.Append((char)(secOrder + COLLATIONKEYOFFSET));
                            preSecIgnore = SecResult.Length();
                        }
                        if (compareTer)
                        {
                            TerResult.Append((char)(terOrder + COLLATIONKEYOFFSET));
                        }
                    }
                    else
                    {
                        if (compareSec && secOrder != 0)
                        {
                            SecResult.Append((char)(secOrder + Tables_Renamed.MaxSecOrder + COLLATIONKEYOFFSET));
                        }
                        if (compareTer && terOrder != 0)
                        {
                            TerResult.Append((char)(terOrder + Tables_Renamed.MaxTerOrder + COLLATIONKEYOFFSET));
                        }
                    }
                }
                if (Tables_Renamed.FrenchSec)
                {
                    if (preSecIgnore < SecResult.Length())
                    {
                        // If we've accumulated any secondary characters after the last base character,
                        // reverse them.
                        RBCollationTables.Reverse(SecResult, preSecIgnore, SecResult.Length());
                    }
                    // And now reverse the entire secResult to get French secondary ordering.
                    RBCollationTables.Reverse(SecResult, 0, SecResult.Length());
                }
                PrimResult.Append((char)0);
                SecResult.Append((char)0);
                SecResult.Append(TerResult.ToString());
                PrimResult.Append(SecResult.ToString());

                if (Strength == IDENTICAL)
                {
                    PrimResult.Append((char)0);
                    int mode = Decomposition;
                    if (mode == CANONICAL_DECOMPOSITION)
                    {
                        PrimResult.Append(Normalizer.Normalize(source, Normalizer.Form.NFD));
                    }
                    else if (mode == FULL_DECOMPOSITION)
                    {
                        PrimResult.Append(Normalizer.Normalize(source, Normalizer.Form.NFKD));
                    }
                    else
                    {
                        PrimResult.Append(source);
                    }
                }
                return(new RuleBasedCollationKey(source, PrimResult.ToString()));
            }
        }