public void TestNormalization() { String rules = "&a < \u0300\u0315 < A\u0300\u0315 < \u0316\u0315B < \u0316\u0300\u0315"; String[] testdata = { "\u1ED9", "o\u0323\u0302", "\u0300\u0315", "\u0315\u0300", "A\u0300\u0315B", "A\u0315\u0300B", "A\u0316\u0315B", "A\u0315\u0316B", "\u0316\u0300\u0315", "\u0315\u0300\u0316", "A\u0316\u0300\u0315B", "A\u0315\u0300\u0316B", "\u0316\u0315\u0300", "A\u0316\u0315\u0300B" }; RuleBasedCollator coll = null; try { coll = new RuleBasedCollator(rules); coll.Decomposition = NormalizationMode.CanonicalDecomposition; //(Collator.CANONICAL_DECOMPOSITION); } catch (Exception e) { Warnln("ERROR: in creation of collator using rules " + rules); return; } CollationElementIterator iter = coll.GetCollationElementIterator("testing"); for (int count = 0; count < testdata.Length; count++) { iter.SetText(testdata[count]); CollationTest.BackAndForth(this, iter); } }
internal void CompareArray(RuleBasedCollator c, String[] tests) { for (int i = 0; i < tests.Length; i += 3) { int expect = 0; if (tests[i + 1].Equals("<")) { expect = -1; } else if (tests[i + 1].Equals(">")) { expect = 1; } else if (tests[i + 1].Equals("=")) { expect = 0; } else { // expect = Integer.decode(tests[i+1]).intValue(); Errln("Error: unknown operator " + tests[i + 1]); return; } String s1 = tests[i]; String s2 = tests[i + 2]; CollationTest.DoTest(this, c, s1, s2, expect); } }
public void TestNormalizedUnicodeChar() { // thai should have normalization on RuleBasedCollator th_th = null; try { th_th = (RuleBasedCollator)Collator.GetInstance( new CultureInfo("th-TH")); } catch (Exception e) { Warnln("Error creating Thai collator"); return; } StringBuffer source = new StringBuffer(); source.Append('\uFDFA'); CollationElementIterator iter = th_th.GetCollationElementIterator(source.ToString()); CollationTest.BackAndForth(this, iter); for (char codepoint = (char)0x1; codepoint < 0xfffe;) { source.Delete(0, source.Length); while (codepoint % 0xFF != 0) { if (UChar.IsDefined(codepoint)) { source.Append(codepoint); } codepoint++; } if (UChar.IsDefined(codepoint)) { source.Append(codepoint); } if (codepoint != 0xFFFF) { codepoint++; } /*if (((int)codepoint) >= 0xfe00) { * String str = source.substring(185, 190); * System.out.println(com.ibm.icu.impl.Utility.escape(str)); * System.out.println("codepoint " + Integer.toHexString(codepoint) + "length " + str.Length); + iter = th_th.GetCollationElementIterator(str); + CollationTest.BackAndForth(this, iter); */ iter = th_th.GetCollationElementIterator(source.ToString()); // A basic test to see if it's working at all CollationTest.BackAndForth(this, iter); } }
private void Report(String localeName, String string1, CollationKey k1, CollationKey k2) { if (!k1.Equals(k2)) { StringBuilder msg = new StringBuilder(); msg.Append("With ").Append(localeName).Append(" collator\n and input string: ").Append(string1).Append('\n'); msg.Append(" failed to produce identical keys on both collators\n"); msg.Append(" localeCollator key: ").Append(CollationTest.Prettify(k1)).Append('\n'); msg.Append(" ruleCollator key: ").Append(CollationTest.Prettify(k2)).Append('\n'); Errln(msg.ToString()); } }
public void TestUnicodeChar() { RuleBasedCollator en_us = (RuleBasedCollator)Collator.GetInstance(new CultureInfo("en-US")); CollationElementIterator iter; char codepoint; StringBuffer source = new StringBuffer(); source.Append("\u0e4d\u0e4e\u0e4f"); // source.append("\u04e8\u04e9"); iter = en_us.GetCollationElementIterator(source.ToString()); // A basic test to see if it's working at all CollationTest.BackAndForth(this, iter); for (codepoint = (char)1; codepoint < 0xFFFE;) { source.Delete(0, source.Length); while (codepoint % 0xFF != 0) { if (UChar.IsDefined(codepoint)) { source.Append(codepoint); } codepoint++; } if (UChar.IsDefined(codepoint)) { source.Append(codepoint); } if (codepoint != 0xFFFF) { codepoint++; } /*if (codepoint >= 0x04fc) { * System.out.println("codepoint " + Integer.toHexString(codepoint)); * String str = source.substring(230, 232); * System.out.println(com.ibm.icu.impl.Utility.escape(str)); * System.out.println("codepoint " + Integer.toHexString(codepoint) + "length " + str.Length); + iter = en_us.GetCollationElementIterator(str); + CollationTest.BackAndForth(this, iter); + } */ iter = en_us.GetCollationElementIterator(source.ToString()); // A basic test to see if it's working at all CollationTest.BackAndForth(this, iter); } }
void assertEqual(CollationElementIterator i1, CollationElementIterator i2) { int c1, c2, count = 0; do { c1 = i1.Next(); c2 = i2.Next(); if (c1 != c2) { Errln(" " + count + ": strength(0x" + (c1).ToHexString() + ") != strength(0x" + (c2).ToHexString() + ")"); break; } count += 1; } while (c1 != CollationElementIterator.NULLORDER); CollationTest.BackAndForth(this, i1); CollationTest.BackAndForth(this, i2); }
public void TestInvalidThai() { String[] tests = { "\u0E44\u0E01\u0E44\u0E01", "\u0E44\u0E01\u0E01\u0E44", "\u0E01\u0E44\u0E01\u0E44", "\u0E01\u0E01\u0E44\u0E44", "\u0E44\u0E44\u0E01\u0E01", "\u0E01\u0E44\u0E44\u0E01", }; RuleBasedCollator collator; StrCmp comparator; try { collator = GetThaiCollator(); comparator = new StrCmp(); } catch (Exception e) { Warnln("could not construct Thai collator"); return; } Array.Sort(tests, comparator); for (int i = 0; i < tests.Length; i++) { for (int j = i + 1; j < tests.Length; j++) { if (collator.Compare(tests[i], tests[j]) > 0) { // inconsistency ordering found! Errln("Inconsistent ordering between strings " + i + " and " + j); } } CollationElementIterator iterator = collator.GetCollationElementIterator(tests[i]); CollationTest.BackAndForth(this, iterator); } }
public void TestPinYin() { String[] seq = { "\u963f", "\u554a", "\u54ce", "\u6371", "\u7231", "\u9f98", "\u4e5c", "\u8baa", "\u4e42", "\u53c8" }; RuleBasedCollator collator = null; try { collator = (RuleBasedCollator)Collator.GetInstance( // ICU4N: See: https://stackoverflow.com/questions/9416435/what-culture-code-should-i-use-for-pinyin#comment11937203_9421566 new CultureInfo("zh-Hans")); //("zh", "", "PINYIN")); // ICU4N TODO: Can we replicate the 3rd parameter somehow? } catch (Exception e) { Warnln("ERROR: in creation of collator of zh__PINYIN locale"); return; } for (int i = 0; i < seq.Length - 1; i++) { CollationTest.DoTest(this, collator, seq[i], seq[i + 1], -1); } }
private void ReportCResult(String source, String target, CollationKey sourceKey, CollationKey targetKey, int compareResult, int keyResult, int incResult, int expectedResult) { if (expectedResult < -1 || expectedResult > 1) { Errln("***** invalid call to reportCResult ****"); return; } bool ok1 = (compareResult == expectedResult); bool ok2 = (keyResult == expectedResult); bool ok3 = (incResult == expectedResult); if (ok1 && ok2 && ok3 && !IsVerbose()) { return; } else { String msg1 = ok1 ? "Ok: compare(\"" : "FAIL: compare(\""; String msg2 = "\", \""; String msg3 = "\") returned "; String msg4 = "; expected "; String sExpect = ""; String sResult = ""; sResult = CollationTest.AppendCompareResult(compareResult, sResult); sExpect = CollationTest.AppendCompareResult(expectedResult, sExpect); if (ok1) { Logln(msg1 + source + msg2 + target + msg3 + sResult); } else { Errln(msg1 + source + msg2 + target + msg3 + sResult + msg4 + sExpect); } msg1 = ok2 ? "Ok: key(\"" : "FAIL: key(\""; msg2 = "\").compareTo(key(\""; msg3 = "\")) returned "; sResult = CollationTest.AppendCompareResult(keyResult, sResult); if (ok2) { Logln(msg1 + source + msg2 + target + msg3 + sResult); } else { Errln(msg1 + source + msg2 + target + msg3 + sResult + msg4 + sExpect); msg1 = " "; msg2 = " vs. "; Errln(msg1 + CollationTest.Prettify(sourceKey) + msg2 + CollationTest.Prettify(targetKey)); } msg1 = ok3 ? "Ok: incCompare(\"" : "FAIL: incCompare(\""; msg2 = "\", \""; msg3 = "\") returned "; sResult = CollationTest.AppendCompareResult(incResult, sResult); if (ok3) { Logln(msg1 + source + msg2 + target + msg3 + sResult); } else { Errln(msg1 + source + msg2 + target + msg3 + sResult + msg4 + sExpect); } } }
public void TestDiscontiguous() { String rulestr = "&z < AB < X\u0300 < ABC < X\u0300\u0315"; String[] src = { "ADB", "ADBC", "A\u0315B", "A\u0315BC", // base character blocked "XD\u0300", "XD\u0300\u0315", // non blocking combining character "X\u0319\u0300", "X\u0319\u0300\u0315", // blocking combining character "X\u0314\u0300", "X\u0314\u0300\u0315", // contraction prefix "ABDC", "AB\u0315C", "X\u0300D\u0315", "X\u0300\u0319\u0315", "X\u0300\u031A\u0315", // ends not with a contraction character "X\u0319\u0300D", "X\u0319\u0300\u0315D", "X\u0300D\u0315D", "X\u0300\u0319\u0315D", "X\u0300\u031A\u0315D" }; String[] tgt = // non blocking combining character { "A D B", "A D BC", "A \u0315 B", "A \u0315 BC", // base character blocked "X D \u0300", "X D \u0300\u0315", // non blocking combining character "X\u0300 \u0319", "X\u0300\u0315 \u0319", // blocking combining character "X \u0314 \u0300", "X \u0314 \u0300\u0315", // contraction prefix "AB DC", "AB \u0315 C", "X\u0300 D \u0315", "X\u0300\u0315 \u0319", "X\u0300 \u031A \u0315", // ends not with a contraction character "X\u0300 \u0319D", "X\u0300\u0315 \u0319D", "X\u0300 D\u0315D", "X\u0300\u0315 \u0319D", "X\u0300 \u031A\u0315D" }; int count = 0; try { RuleBasedCollator coll = new RuleBasedCollator(rulestr); CollationElementIterator iter = coll.GetCollationElementIterator(""); CollationElementIterator resultiter = coll.GetCollationElementIterator(""); while (count < src.Length) { iter.SetText(src[count]); int s = 0; while (s < tgt[count].Length) { int e = tgt[count].IndexOf(' ', s); if (e < 0) { e = tgt[count].Length; } String resultstr = tgt[count].Substring(s, e - s); // ICU4N: Corrected 2nd parameter resultiter.SetText(resultstr); int ce = resultiter.Next(); while (ce != CollationElementIterator.NULLORDER) { if (ce != iter.Next()) { Errln("Discontiguos contraction test mismatch at" + count); return; } ce = resultiter.Next(); } s = e + 1; } iter.Reset(); CollationTest.BackAndForth(this, iter); count++; } } catch (Exception e) { Warnln("Error running discontiguous tests " + e.ToString()); } }
public void TestPrevious(/* char* par */) { RuleBasedCollator en_us = (RuleBasedCollator)Collator.GetInstance(new CultureInfo("en-US")); CollationElementIterator iter = en_us.GetCollationElementIterator(test1); // A basic test to see if it's working at all CollationTest.BackAndForth(this, iter); // Test with a contracting character sequence String source; RuleBasedCollator c1 = null; try { c1 = new RuleBasedCollator("&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH"); } catch (Exception e) { Errln("Couldn't create a RuleBasedCollator with a contracting sequence."); return; } source = "abchdcba"; iter = c1.GetCollationElementIterator(source); CollationTest.BackAndForth(this, iter); // Test with an expanding character sequence RuleBasedCollator c2 = null; try { c2 = new RuleBasedCollator("&a < b < c/abd < d"); } catch (Exception e) { Errln("Couldn't create a RuleBasedCollator with an expanding sequence."); return; } source = "abcd"; iter = c2.GetCollationElementIterator(source); CollationTest.BackAndForth(this, iter); // Now try both RuleBasedCollator c3 = null; try { c3 = new RuleBasedCollator("&a < b < c/aba < d < z < ch"); } catch (Exception e) { Errln("Couldn't create a RuleBasedCollator with both an expanding and a contracting sequence."); return; } source = "abcdbchdc"; iter = c3.GetCollationElementIterator(source); CollationTest.BackAndForth(this, iter); source = "\u0e41\u0e02\u0e41\u0e02\u0e27abc"; Collator c4 = null; try { c4 = Collator.GetInstance(new CultureInfo("th-TH")); } catch (Exception e) { Errln("Couldn't create a collator"); return; } iter = ((RuleBasedCollator)c4).GetCollationElementIterator(source); CollationTest.BackAndForth(this, iter); source = "\u0061\u30CF\u3099\u30FC"; Collator c5 = null; try { c5 = Collator.GetInstance(new CultureInfo("ja-JP")); } catch (Exception e) { Errln("Couldn't create Japanese collator\n"); return; } iter = ((RuleBasedCollator)c5).GetCollationElementIterator(source); CollationTest.BackAndForth(this, iter); }
public void TestOffset(/* char* par */) { RuleBasedCollator en_us; try { en_us = (RuleBasedCollator)Collator.GetInstance(new CultureInfo("en-US")); } catch (Exception e) { Warnln("ERROR: in creation of collator of ENGLISH locale"); return; } CollationElementIterator iter = en_us.GetCollationElementIterator(test1); // testing boundaries iter.SetOffset(0); if (iter.Previous() != CollationElementIterator.NULLORDER) { Errln("Error: After setting offset to 0, we should be at the end " + "of the backwards iteration"); } iter.SetOffset(test1.Length); if (iter.Next() != CollationElementIterator.NULLORDER) { Errln("Error: After setting offset to the end of the string, we " + "should be at the end of the forwards iteration"); } // Run all the way through the iterator, then get the offset int[] orders = CollationTest.GetOrders(iter); Logln("orders.Length = " + orders.Length); int offset = iter.GetOffset(); if (offset != test1.Length) { String msg1 = "offset at end != length: "; String msg2 = " vs "; Errln(msg1 + offset + msg2 + test1.Length); } // Now set the offset back to the beginning and see if it works CollationElementIterator pristine = en_us.GetCollationElementIterator(test1); try { iter.SetOffset(0); } catch (Exception e) { Errln("setOffset failed."); } assertEqual(iter, pristine); // setting offset in the middle of a contraction String contraction = "change"; RuleBasedCollator tailored = null; try { tailored = new RuleBasedCollator("& a < ch"); } catch (Exception e) { Errln("Error: in creation of Spanish collator"); return; } iter = tailored.GetCollationElementIterator(contraction); int[] order = CollationTest.GetOrders(iter); iter.SetOffset(1); // sets offset in the middle of ch int[] order2 = CollationTest.GetOrders(iter); if (!Arrays.Equals(order, order2)) { Errln("Error: setting offset in the middle of a contraction should be the same as setting it to the start of the contraction"); } contraction = "peache"; iter = tailored.GetCollationElementIterator(contraction); iter.SetOffset(3); order = CollationTest.GetOrders(iter); iter.SetOffset(4); // sets offset in the middle of ch order2 = CollationTest.GetOrders(iter); if (!Arrays.Equals(order, order2)) { Errln("Error: setting offset in the middle of a contraction should be the same as setting it to the start of the contraction"); } // setting offset in the middle of a surrogate pair String surrogate = "\ud800\udc00str"; iter = tailored.GetCollationElementIterator(surrogate); order = CollationTest.GetOrders(iter); iter.SetOffset(1); // sets offset in the middle of surrogate order2 = CollationTest.GetOrders(iter); if (!Arrays.Equals(order, order2)) { Errln("Error: setting offset in the middle of a surrogate pair should be the same as setting it to the start of the surrogate pair"); } surrogate = "simple\ud800\udc00str"; iter = tailored.GetCollationElementIterator(surrogate); iter.SetOffset(6); order = CollationTest.GetOrders(iter); iter.SetOffset(7); // sets offset in the middle of surrogate order2 = CollationTest.GetOrders(iter); if (!Arrays.Equals(order, order2)) { Errln("Error: setting offset in the middle of a surrogate pair should be the same as setting it to the start of the surrogate pair"); } // TODO: try iterating halfway through a messy string. }
private void conformanceTest(RuleBasedCollator coll) { if (@in == null || coll == null) { return; } int skipFlags = 0; if (coll.IsAlternateHandlingShifted) { skipFlags |= IS_SHIFTED; } if (coll == rbUCA) { skipFlags |= FROM_RULES; } Logln("-prop:ucaconfnosortkeys=1 turns off getSortKey() in UCAConformanceTest"); bool withSortKeys = GetProperty("ucaconfnosortkeys") == null; int lineNo = 0; String line = null, oldLine = null, buffer = null, oldB = null; RawCollationKey sk1 = new RawCollationKey(), sk2 = new RawCollationKey(); RawCollationKey oldSk = null, newSk = sk1; try { while ((line = @in.ReadLine()) != null) { lineNo++; if (line.Length == 0 || line[0] == '#') { continue; } buffer = parseString(line); if (skipLineBecauseOfBug(buffer, skipFlags)) { Logln("Skipping line " + lineNo + " because of a known bug"); continue; } if (withSortKeys) { coll.GetRawCollationKey(buffer, newSk); } if (oldSk != null) { bool ok = true; int skres = withSortKeys ? oldSk.CompareTo(newSk) : 0; int cmpres = coll.Compare(oldB, buffer); int cmpres2 = coll.Compare(buffer, oldB); if (cmpres != -cmpres2) { Errln(String.Format( "Compare result not symmetrical on line {0}: " + "previous vs. current ({1}) / current vs. previous ({2})", lineNo, cmpres, cmpres2)); ok = false; } // TODO: Compare with normalization turned off if the input passes the FCD test. if (withSortKeys && cmpres != normalizeResult(skres)) { Errln("Difference between coll.compare (" + cmpres + ") and sortkey compare (" + skres + ") on line " + lineNo); ok = false; } int res = cmpres; if (res == 0 && !isAtLeastUCA62) { // Up to UCA 6.1, the collation test files use a custom tie-breaker, // comparing the raw input strings. res = comparer.Compare(oldB, buffer); // Starting with UCA 6.2, the collation test files use the standard UCA tie-breaker, // comparing the NFD versions of the input strings, // which we do via setting strength=identical. } if (res > 0) { Errln("Line " + lineNo + " is not greater or equal than previous line"); ok = false; } if (!ok) { Errln(" Previous data line " + oldLine); Errln(" Current data line " + line); if (withSortKeys) { Errln(" Previous key: " + CollationTest.Prettify(oldSk)); Errln(" Current key: " + CollationTest.Prettify(newSk)); } } } oldSk = newSk; oldB = buffer; oldLine = line; if (oldSk == sk1) { newSk = sk2; } else { newSk = sk1; } } } catch (Exception e) { Errln("Unexpected exception " + e); } finally { try { @in.Dispose(); } catch (IOException ignored) { } @in = null; } }
public void TestDictionary() { RuleBasedCollator coll = null; try { coll = GetThaiCollator(); } catch (Exception e) { Warnln("could not construct Thai collator"); return; } // Read in a dictionary of Thai words int line = 0; int failed = 0; int wordCount = 0; TextReader @in = null; try { String fileName = "riwords.txt"; @in = TestUtil.GetDataReader(fileName, "UTF-8"); // // Loop through each word in the dictionary and compare it to the previous // word. They should be in sorted order. // String lastWord = ""; String word = @in.ReadLine(); while (word != null) { line++; // Skip comments and blank lines if (word.Length == 0 || word[0] == 0x23) { word = @in.ReadLine(); continue; } // Show the first 8 words being compared, so we can see what's happening ++wordCount; if (wordCount <= 8) { Logln("Word " + wordCount + ": " + word); } if (lastWord.Length > 0) { // CollationTest.doTest isn't really set up to handle situations where // the result can be equal or greater than the previous, so have to skip for now. // Not a big deal, since we're still testing to make sure everything sorts out // right, just not looking at the colation keys in detail... // CollationTest.doTest(this, coll, lastWord, word, -1); int result = coll.Compare(lastWord, word); if (result > 0) { failed++; if (MAX_FAILURES_TO_SHOW < 0 || failed <= MAX_FAILURES_TO_SHOW) { String msg = "--------------------------------------------\n" + line + " compare(" + lastWord + ", " + word + ") returned " + result + ", expected -1\n"; CollationKey k1, k2; k1 = coll.GetCollationKey(lastWord); k2 = coll.GetCollationKey(word); msg += "key1: " + CollationTest.Prettify(k1) + "\n" + "key2: " + CollationTest.Prettify(k2); Errln(msg); } } } lastWord = word; word = @in.ReadLine(); } } catch (IOException e) { Errln("IOException " + e.ToString()); } finally { if (@in == null) { Errln("Error: could not open test file. Aborting test."); } else { try { @in.Dispose(); } catch (IOException ignored) { } } } // ICU4N: We can't return in a finally block, so we have to do it here under // the same condition if (@in == null) { return; } if (failed != 0) { if (failed > MAX_FAILURES_TO_SHOW) { Errln("Too many failures; only the first " + MAX_FAILURES_TO_SHOW + " failures were shown"); } Errln("Summary: " + failed + " of " + (line - 1) + " comparisons failed"); } Logln("Words checked: " + wordCount); }