public void TestExhaustive() { int counter = 0; CanonicalIterator it = new CanonicalIterator(""); /* * CanonicalIterator slowIt = new CanonicalIterator(""); * slowIt.SKIP_ZEROS = false; */ //Transliterator name = Transliterator.getInstance("[^\\u0020-\\u007F] name"); //Set itSet = new TreeSet(); //Set slowItSet = new TreeSet(); for (int i = 0; i < 0x10FFFF; ++i) { // skip characters we know don't have decomps UCharacterCategory type = UCharacter.GetType(i); if (type == UCharacterCategory.OtherNotAssigned || type == UCharacterCategory.PrivateUse || type == UCharacterCategory.Surrogate) { continue; } if ((++counter % 5000) == 0) { Logln("Testing " + Utility.Hex(i, 0)); } string s = UTF16.ValueOf(i); CharacterTest(s, i, it); CharacterTest(s + "\u0345", i, it); } }
/** * Do a normalization using the iterative API in the given direction. * @param str a Java StringCharacterIterator * @param buf scratch buffer * @param dir either +1 or -1 */ private String iterativeNorm(StringCharacterIterator str, Normalizer.Mode mode, StringBuffer buf, int dir, int options) { normalizer.SetText(str); normalizer.SetMode(mode); buf.Length = (0); normalizer.SetOption(-1, false); // reset all options normalizer.SetOption(options, true); // set desired options int ch; if (dir > 0) { for (ch = normalizer.First(); ch != Normalizer.DONE; ch = normalizer.Next()) { buf.Append(UTF16.ValueOf(ch)); } } else { for (ch = normalizer.Last(); ch != Normalizer.DONE; ch = normalizer.Previous()) { buf.Insert(0, UTF16.ValueOf(ch)); } } return(buf.ToString()); }
/** * Gets the current string from the iterator. Only use after calling next(), not nextRange(). */ public string GetString() { if (Codepoint != IS_STRING) { return(UTF16.ValueOf(Codepoint)); } return(String); }
/** * Change unicode string from <00AD> to \u00AD, for the later is accepted * by Java * @param str String including <*> style unicode * @return \\u String */ private static String StringReplace(String str) { StringBuffer result = new StringBuffer(); char[] chars = str.ToCharArray(); StringBuffer sbTemp = new StringBuffer(); for (int i = 0; i < chars.Length; i++) { if ('<' == chars[i]) { sbTemp = new StringBuffer(); while ('>' != chars[i + 1]) { sbTemp.Append(chars[++i]); } /* * The unicode sometimes is larger then \uFFFF, so have to use * UTF16. */ int toBeInserted = int.Parse(sbTemp.ToString(), NumberStyles.HexNumber, CultureInfo.InvariantCulture); if ((toBeInserted >> 16) == 0) { result.Append((char)toBeInserted); } else { String utf16String = UTF16.ValueOf(toBeInserted); char[] charsTemp = utf16String.ToCharArray(); for (int j = 0; j < charsTemp.Length; j++) { result.Append(charsTemp[j]); } } } else if ('>' == chars[i]) {//end when met with '>' continue; } else { result.Append(chars[i]); } } return(result.ToString()); }
/** * @param rand * @param others * @return */ private String GetRandomKey(Random rand) { int r = rand.Next(30); if (r == 0) { return(UTF16.ValueOf(r)); } else if (r < 10) { return(UTF16.ValueOf('A' - 1 + r)); } else if (r < 20) { return(UTF16.ValueOf(0x10FFFF - (r - 10))); // } else if (r == 20) { // return ""; } return("a" + UTF16.ValueOf(r + 'a' - 1)); }
internal String GetTestSource() { if (random == null) { random = CreateRandom(); // use test framework's random seed } String source = ""; int i = 0; while (i < (random.Next(maxCharCount) + 1)) { int codepoint = random.Next(maxCodePoint); //Elimate unassigned characters while (UCharacter.GetType(codepoint) == UCharacterCategory.OtherNotAssigned) { codepoint = random.Next(maxCodePoint); } source = source + UTF16.ValueOf(codepoint); i++; } return(source); }
public void TestScripts() { // get a couple of characters of each script for testing StringBuffer testBuffer = new StringBuffer(); for (int script = 0; script < UScript.CodeLimit; ++script) { UnicodeSet test = new UnicodeSet().ApplyPropertyAlias("script", UScript.GetName(script)); int count = Math.Min(20, test.Count); for (int i = 0; i < count; ++i) { testBuffer.Append(UTF16.ValueOf(test[i])); } } { String test = testBuffer.ToString(); Logln("Test line: " + test); int inclusion = TestFmwk.GetExhaustiveness(); bool testedUnavailableScript = false; for (int script = 0; script < UScript.CodeLimit; ++script) { if (script == UScript.Common || script == UScript.Inherited) { continue; } // if the inclusion rate is not 10, skip all but a small number of items. // Make sure, however, that we test at least one unavailable script if (inclusion < 10 && script != UScript.Latin && script != UScript.Han && script != UScript.Hiragana && testedUnavailableScript ) { continue; } String scriptName = UScript.GetName(script); // long name UCultureInfo locale = new UCultureInfo(scriptName); if (locale.Language.Equals("new") || locale.Language.Equals("pau")) { if (logKnownIssue("11171", "long script name loosely looks like a locale ID with a known likely script")) { continue; } } Transliterator t; try { t = Transliterator.GetInstance("any-" + scriptName); } catch (Exception e) { testedUnavailableScript = true; Logln("Skipping unavailable: " + scriptName); continue; // we don't handle all scripts } Logln("Checking: " + scriptName); if (t != null) { t.Transform(test); // just verify we don't crash } String shortScriptName = UScript.GetShortName(script); // 4-letter script code try { t = Transliterator.GetInstance("any-" + shortScriptName); } catch (Exception e) { Errln("Transliterator.GetInstance() worked for \"any-" + scriptName + "\" but not for \"any-" + shortScriptName + '\"'); } t.Transform(test); // just verify we don't crash } } }
private void ParseStarredCharacters(CollationStrength strength, int i) { StringCharSequence empty = new StringCharSequence(""); i = ParseString(SkipWhiteSpace(i), rawBuilder.Value); if (rawBuilder.Length == 0) { SetParseError("missing starred-relation string"); return; } int prev = -1; int j = 0; for (; ;) { while (j < rawBuilder.Length) { int cp = rawBuilder.Value.CodePointAt(j); if (!nfd.IsInert(cp)) { SetParseError("starred-relation string is not all NFD-inert"); return; } try { sink.AddRelation(strength, empty, UTF16.ValueOf(cp).AsCharSequence(), empty.Value); } catch (Exception e) { SetParseError("adding relation failed", e); return; } j += Character.CharCount(cp); prev = cp; } if (i >= rules.Length || rules[i] != 0x2d) { // '-' break; } if (prev < 0) { SetParseError("range without start in starred-relation string"); return; } i = ParseString(i + 1, rawBuilder.Value); if (rawBuilder.Length == 0) { SetParseError("range without end in starred-relation string"); return; } int c = rawBuilder.Value.CodePointAt(0); if (c < prev) { SetParseError("range start greater than end in starred-relation string"); return; } // range prev-c while (++prev <= c) { if (!nfd.IsInert(prev)) { SetParseError("starred-relation string range is not all NFD-inert"); return; } if (IsSurrogate(prev)) { SetParseError("starred-relation string range contains a surrogate"); return; } if (0xfffd <= prev && prev <= 0xffff) { SetParseError("starred-relation string range contains U+FFFD, U+FFFE or U+FFFF"); return; } try { sink.AddRelation(strength, empty, UTF16.ValueOf(prev).AsCharSequence(), empty.Value); } catch (Exception e) { SetParseError("adding relation failed", e); return; } } prev = -1; j = Character.CharCount(c); } ruleIndex = SkipWhiteSpace(i); }