public void TestToCodePoint() { char[] pairs = { (char)(UCharacter.MIN_HIGH_SURROGATE + 0), (char)(UCharacter.MIN_LOW_SURROGATE + 0), (char)(UCharacter.MIN_HIGH_SURROGATE + 1), (char)(UCharacter.MIN_LOW_SURROGATE + 1), (char)(UCharacter.MIN_HIGH_SURROGATE + 2), (char)(UCharacter.MIN_LOW_SURROGATE + 2), (char)(UCharacter.MAX_HIGH_SURROGATE - 2), (char)(UCharacter.MAX_LOW_SURROGATE - 2), (char)(UCharacter.MAX_HIGH_SURROGATE - 1), (char)(UCharacter.MAX_LOW_SURROGATE - 1), (char)(UCharacter.MAX_HIGH_SURROGATE - 0), (char)(UCharacter.MAX_LOW_SURROGATE - 0), }; for (int i = 0; i < pairs.Length; i += 2) { int cp = UCharacter.ToCodePoint(pairs[i], pairs[i + 1]); if (pairs[i] != UTF16.GetLeadSurrogate(cp) || pairs[i + 1] != UTF16.GetTrailSurrogate(cp)) { Errln((pairs[i]).ToHexString() + ", " + pairs[i + 1]); break; } } }
public void TestIsSurrogatePair() { if (UCharacter.IsSurrogatePair( (char)(UCharacter.MIN_HIGH_SURROGATE - 1), UCharacter.MIN_LOW_SURROGATE)) { Errln("0xd7ff,0xdc00"); } if (UCharacter.IsSurrogatePair( (char)(UCharacter.MAX_HIGH_SURROGATE + 1), UCharacter.MIN_LOW_SURROGATE)) { Errln("0xd800,0xdc00"); } if (UCharacter.IsSurrogatePair(UCharacter.MIN_HIGH_SURROGATE, (char)(UCharacter.MIN_LOW_SURROGATE - 1))) { Errln("0xd800,0xdbff"); } if (UCharacter.IsSurrogatePair(UCharacter.MIN_HIGH_SURROGATE, (char)(UCharacter.MAX_LOW_SURROGATE + 1))) { Errln("0xd800,0xe000"); } if (!UCharacter.IsSurrogatePair(UCharacter.MIN_HIGH_SURROGATE, UCharacter.MIN_LOW_SURROGATE)) { Errln("0xd800,0xdc00"); } }
/** * Gets a script or reorder code from its string representation. * @return the script/reorder code, or * -1 if not recognized */ public static int GetReorderCode(string word) { for (int i = 0; i < gSpecialReorderCodes.Length; ++i) { if (word.Equals(gSpecialReorderCodes[i], StringComparison.OrdinalIgnoreCase)) { return(ReorderCodes.First + i); } } try { int script = UCharacter.GetPropertyValueEnum(UProperty.Script, word); if (script >= 0) { return(script); } } catch (IcuArgumentException e) { // fall through } if (word.Equals("others", StringComparison.OrdinalIgnoreCase)) { return(ReorderCodes.Others); // same as Zzzz = USCRIPT_UNKNOWN } return(-1); }
public void TestIsSupplementaryCodePoint() { if (UCharacter.IsSupplementaryCodePoint(-1)) { Errln("-1"); } if (UCharacter.IsSupplementaryCodePoint(0)) { Errln("0"); } if (UCharacter .IsSupplementaryCodePoint(UCharacter.MIN_SUPPLEMENTARY_CODE_POINT - 1)) { Errln("0xffff"); } if (!UCharacter .IsSupplementaryCodePoint(UCharacter.MIN_SUPPLEMENTARY_CODE_POINT)) { Errln("0x10000"); } if (!UCharacter.IsSupplementaryCodePoint(UCharacter.MAX_CODE_POINT)) { Errln("0x10ffff"); } if (UCharacter.IsSupplementaryCodePoint(UCharacter.MAX_CODE_POINT + 1)) { Errln("0x110000"); } }
public void TestExhaustive() { int counter = 0; CanonicalIterator it = new CanonicalIterator(""); /* * CanonicalIterator slowIt = new CanonicalIterator(""); * slowIt.SKIP_ZEROS = false; */ //Transliterator name = Transliterator.getInstance("[^\\u0020-\\u007F] name"); //Set itSet = new TreeSet(); //Set slowItSet = new TreeSet(); for (int i = 0; i < 0x10FFFF; ++i) { // skip characters we know don't have decomps UCharacterCategory type = UCharacter.GetType(i); if (type == UCharacterCategory.OtherNotAssigned || type == UCharacterCategory.PrivateUse || type == UCharacterCategory.Surrogate) { continue; } if ((++counter % 5000) == 0) { Logln("Testing " + Utility.Hex(i, 0)); } string s = UTF16.ValueOf(i); CharacterTest(s, i, it); CharacterTest(s + "\u0345", i, it); } }
public void TestNormalizedUnicodeChar() { // thai should have normalization on RuleBasedCollator th_th = null; try { th_th = (RuleBasedCollator)Collator.GetInstance( new CultureInfo("th-TH")); } catch (Exception e) { Warnln("Error creating Thai collator"); return; } StringBuffer source = new StringBuffer(); source.Append('\uFDFA'); CollationElementIterator iter = th_th.GetCollationElementIterator(source.ToString()); CollationTest.BackAndForth(this, iter); for (char codepoint = (char)0x1; codepoint < 0xfffe;) { source.Delete(0, source.Length); while (codepoint % 0xFF != 0) { if (UCharacter.IsDefined(codepoint)) { source.Append(codepoint); } codepoint++; } if (UCharacter.IsDefined(codepoint)) { source.Append(codepoint); } if (codepoint != 0xFFFF) { codepoint++; } /*if (((int)codepoint) >= 0xfe00) { * String str = source.substring(185, 190); * System.out.println(com.ibm.icu.impl.Utility.escape(str)); * System.out.println("codepoint " + Integer.toHexString(codepoint) + "length " + str.Length); + iter = th_th.GetCollationElementIterator(str); + CollationTest.BackAndForth(this, iter); */ iter = th_th.GetCollationElementIterator(source.ToString()); // A basic test to see if it's working at all CollationTest.BackAndForth(this, iter); } }
public override bool Handles(int c, int breakType) { if (breakType == BreakIterator.KIND_WORD || breakType == BreakIterator.KIND_LINE) { int script = UCharacter.GetInt32PropertyValue(c, UProperty.Script); return(script == UScript.Myanmar); } return(false); }
public StringBuffer Prepare(String src, StringPrepOptions options) { int ch; String mapOut = Map(src, options); UCharacterIterator iter = UCharacterIterator.GetInstance(mapOut); UCharacterDirection direction = UCharacterDirection.CharDirectionCount, firstCharDir = UCharacterDirection.CharDirectionCount; int rtlPos = -1, ltrPos = -1; bool rightToLeft = false, leftToRight = false; while ((ch = iter.NextCodePoint()) != UCharacterIterator.DONE) { if (transform.prohibitedSet.Contains(ch) == true && ch != 0x0020) { throw new StringPrepParseException("A prohibited code point was found in the input", StringPrepErrorType.ProhibitedError, iter.GetText(), iter.Index); } direction = UCharacter.GetDirection(ch); if (firstCharDir == UCharacterDirection.CharDirectionCount) { firstCharDir = direction; } if (direction == UCharacterDirection.LeftToRight) { leftToRight = true; ltrPos = iter.Index - 1; } if (direction == UCharacterDirection.RightToLeft || direction == UCharacterDirection.RightToLeftArabic) { rightToLeft = true; rtlPos = iter.Index - 1; } } // satisfy 2 if (leftToRight == true && rightToLeft == true) { throw new StringPrepParseException("The input does not conform to the rules for BiDi code points.", StringPrepErrorType.CheckBiDiError, iter.GetText(), (rtlPos > ltrPos) ? rtlPos : ltrPos); } //satisfy 3 if (rightToLeft == true && !((firstCharDir == UCharacterDirection.RightToLeft || firstCharDir == UCharacterDirection.RightToLeftArabic) && (direction == UCharacterDirection.RightToLeft || direction == UCharacterDirection.RightToLeftArabic)) ) { throw new StringPrepParseException("The input does not conform to the rules for BiDi code points.", StringPrepErrorType.CheckBiDiError, iter.GetText(), (rtlPos > ltrPos) ? rtlPos : ltrPos); } return(new StringBuffer(mapOut)); }
public override void Run() { for (int i = 0; i < 10000; i++) { actualName = UCharacter.GetName(codePoint); if (!correctName.Equals(actualName)) { break; } } }
internal void Test(String s, int start, int count, int index, int offset, int expected, bool flip) { char[] chars = s.ToCharArray(); String strng = s.Substring(start, count); // ICU4N: (start + count) - start == count int val1 = UCharacter.OffsetByCodePoints(chars, start, count, index, offset); int val2 = UCharacter.OffsetByCodePoints(strng, index - start, offset) + start; if (val1 != expected) { TestFmwk.Errln("char[] " + Str(s, start, count, index, offset) + "(" + val1 + ") != " + expected); } else if (val2 != expected) { TestFmwk.Errln("String " + Str(s, start, count, index, offset) + "(" + val2 + ") != " + expected); } else if (TestFmwk.IsVerbose()) { TestFmwk.Logln(Str(s, start, count, index, offset) + " == " + expected); } if (flip) { val1 = UCharacter.OffsetByCodePoints(chars, start, count, expected, -offset); val2 = UCharacter.OffsetByCodePoints(strng, expected - start, -offset) + start; if (val1 != index) { TestFmwk.Errln("char[] " + Str(s, start, count, expected, -offset) + "(" + val1 + ") != " + index); } else if (val2 != index) { TestFmwk.Errln("String " + Str(s, start, count, expected, -offset) + "(" + val2 + ") != " + index); } else if (TestFmwk.IsVerbose()) { TestFmwk.Logln(Str(s, start, count, expected, -offset) + " == " + index); } } }
public void TestCharCount() { UCharacter.CharCount(-1); UCharacter.CharCount(UCharacter.MAX_CODE_POINT + 1); if (UCharacter.CharCount(UCharacter.MIN_SUPPLEMENTARY_CODE_POINT - 1) != 1) { Errln("0xffff"); } if (UCharacter.CharCount(UCharacter.MIN_SUPPLEMENTARY_CODE_POINT) != 2) { Errln("0x010000"); } }
/// <seealso cref="Transliterator.AddSourceTargetSet(UnicodeSet, UnicodeSet, UnicodeSet)"/> public override void AddSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet) { lock (typeof(UppercaseTransliterator)) { if (sourceTargetUtility == null) { sourceTargetUtility = new SourceTargetUtility(new StringTransform(transform: (source) => { return(UCharacter.FoldCase(source, true)); })); } } sourceTargetUtility.AddSourceTargetSet(this, inputFilter, sourceSet, targetSet); }
/// <seealso cref="Transliterator.AddSourceTargetSet(UnicodeSet, UnicodeSet, UnicodeSet)"/>. public override void AddSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet) { lock (this) { if (sourceTargetUtility == null) { sourceTargetUtility = new SourceTargetUtility(new StringTransform(transform: (source) => { return(UCharacter.ToLower(locale, source)); })); } } sourceTargetUtility.AddSourceTargetSet(this, inputFilter, sourceSet, targetSet); }
public void TestUnicodeChar() { RuleBasedCollator en_us = (RuleBasedCollator)Collator.GetInstance(new CultureInfo("en-US")); CollationElementIterator iter; char codepoint; StringBuffer source = new StringBuffer(); source.Append("\u0e4d\u0e4e\u0e4f"); // source.append("\u04e8\u04e9"); iter = en_us.GetCollationElementIterator(source.ToString()); // A basic test to see if it's working at all CollationTest.BackAndForth(this, iter); for (codepoint = (char)1; codepoint < 0xFFFE;) { source.Delete(0, source.Length); while (codepoint % 0xFF != 0) { if (UCharacter.IsDefined(codepoint)) { source.Append(codepoint); } codepoint++; } if (UCharacter.IsDefined(codepoint)) { source.Append(codepoint); } if (codepoint != 0xFFFF) { codepoint++; } /*if (codepoint >= 0x04fc) { * System.out.println("codepoint " + Integer.toHexString(codepoint)); * String str = source.substring(230, 232); * System.out.println(com.ibm.icu.impl.Utility.escape(str)); * System.out.println("codepoint " + Integer.toHexString(codepoint) + "length " + str.Length); + iter = en_us.GetCollationElementIterator(str); + CollationTest.BackAndForth(this, iter); + } */ iter = en_us.GetCollationElementIterator(source.ToString()); // A basic test to see if it's working at all CollationTest.BackAndForth(this, iter); } }
/// <summary> /// Update the set of unhandled characters for the specified breakType to include /// all that have the same script as <paramref name="c"/>. /// May be called concurrently with <see cref="Handles(int, int)"/> or <see cref="FindBreaks(CharacterIterator, int, int, int, DictionaryBreakEngine.DequeI)"/>. /// Must not be called concurrently with itself. /// </summary> public void HandleChar(int c, int breakType) { if (breakType >= 0 && breakType < fHandled.Length && c != CharacterIteration.DONE32) { UnicodeSet originalSet = fHandled[breakType]; if (!originalSet.Contains(c)) { int script = UCharacter.GetInt32PropertyValue(c, UProperty.Script); UnicodeSet newSet = new UnicodeSet(); newSet.ApplyInt32PropertyValue(UProperty.Script, script); newSet.AddAll(originalSet); fHandled[breakType] = newSet; } } }
public static void Permute(string source, bool skipZeros, ISet <string> output) { // TODO: optimize //if (PROGRESS) System.out.println("Permute: " + source); // optimization: // if zero or one character, just return a set with it // we check for length < 2 to keep from counting code points all the time if (source.Length <= 2 && UTF16.CountCodePoint(source) <= 1) { output.Add(source); return; } // otherwise iterate through the string, and recursively permute all the other characters ISet <string> subpermute = new HashSet <string>(); int cp; for (int i = 0; i < source.Length; i += UTF16.GetCharCount(cp)) { cp = UTF16.CharAt(source, i); // optimization: // if the character is canonical combining class zero, // don't permute it if (skipZeros && i != 0 && UCharacter.GetCombiningClass(cp) == 0) { //System.out.println("Skipping " + Utility.hex(UTF16.valueOf(source, i))); continue; } // see what the permutations of the characters before and after this one are subpermute.Clear(); Permute(source.Substring(0, i - 0) // ICU4N: Checked 2nd parameter + source.Substring(i + UTF16.GetCharCount(cp)), skipZeros, subpermute); // ICU4N: Substring only has 1 parameter // prefix this character to all of them string chStr = UTF16.ValueOf(source, i); foreach (string s in subpermute) { string piece = chStr + s; //if (PROGRESS) System.out.println(" Piece: " + piece); output.Add(piece); } } }
internal void Fail(String s, int start, int limit, Type exc) { try { UCharacter.CodePointCount(s, start, limit); Errln("unexpected success " + Str(s, start, limit)); } catch (Exception e) { //if (!exc.GetTypeInfo().isInstance(e)) if (!exc.IsAssignableFrom(e.GetType())) { Warnln("bad exception " + Str(s, start, limit) + e.GetType().Name); } } }
public void TestUnicodeMapGeneralCategory() { Logln("Setting General Category"); UnicodeMap <String> map1 = new UnicodeMap <string>(); IDictionary <Integer, String> map2 = new Dictionary <Integer, String>(); //Map<Integer, String> map3 = new TreeMap<Integer, String>(); map1 = new UnicodeMap <String>(); map2 = new SortedDictionary <Integer, String>(); for (int cp = 0; cp <= SET_LIMIT; ++cp) { int enumValue = UCharacter.GetInt32PropertyValue(cp, propEnum); //if (enumValue <= 0) continue; // for smaller set String value = UCharacter.GetPropertyValueName(propEnum, enumValue, NameChoice.Long); map1.Put(cp, value); map2[new Integer(cp)] = value; } checkNext(map1, map2, int.MaxValue); Logln("Comparing General Category"); check(map1, map2, -1); Logln("Comparing Values"); ISet <String> values1 = new SortedSet <String>(StringComparer.Ordinal); map1.GetAvailableValues(values1); ISet <String> values2 = new SortedSet <String>(map2.Values, StringComparer.Ordinal); if (!TestBoilerplate <string> .VerifySetsIdentical(this, values1, values2)) { throw new ArgumentException("Halting"); } Logln("Comparing Sets"); foreach (string value in values1) { Logln(value == null ? "null" : value); UnicodeSet set1 = map1.KeySet(value); UnicodeSet set2 = TestBoilerplate <string> .GetSet(map2, value); if (!TestBoilerplate <string> .VerifySetsIdentical(this, set1, set2)) { throw new ArgumentException("Halting"); } } }
internal void Fail(String text, int index, int offset, Type exc) { try { UCharacter.OffsetByCodePoints(text, index, offset); Errln("unexpected success " + Str(text, index, offset, 0, text.Length)); } catch (Exception e) { //if (!exc.isInstance(e)) if (!exc.IsAssignableFrom(e.GetType())) { Errln("bad exception " + Str(text, 0, text.Length, index, offset) + e.GetType().Name); } } }
public void TestIsValidCodePoint() { if (UCharacter.IsValidCodePoint(-1)) { Errln("-1"); } if (!UCharacter.IsValidCodePoint(0)) { Errln("0"); } if (!UCharacter.IsValidCodePoint(UCharacter.MAX_CODE_POINT)) { Errln("0x10ffff"); } if (UCharacter.IsValidCodePoint(UCharacter.MAX_CODE_POINT + 1)) { Errln("0x110000"); } }
public void TestToChars() { char[] chars = new char[3]; int cp = UCharacter.ToCodePoint(UCharacter.MIN_HIGH_SURROGATE, UCharacter.MIN_LOW_SURROGATE); UCharacter.ToChars(cp, chars, 1); if (chars[1] != UCharacter.MIN_HIGH_SURROGATE || chars[2] != UCharacter.MIN_LOW_SURROGATE) { Errln("fail"); } chars = UCharacter.ToChars(cp); if (chars[0] != UCharacter.MIN_HIGH_SURROGATE || chars[1] != UCharacter.MIN_LOW_SURROGATE) { Errln("fail"); } }
public void TestUCharactersGetName() { List <GetNameThread> threads = new List <GetNameThread>(); for (int t = 0; t < 20; t++) { int codePoint = 47 + t; String correctName = UCharacter.GetName(codePoint); GetNameThread thread = new GetNameThread(codePoint, correctName); thread.Start(); threads.Add(thread); } foreach (var thread in threads) { thread.Join(); if (!thread.correctName.Equals(thread.actualName)) { Errln("FAIL, expected \"" + thread.correctName + "\", got \"" + thread.actualName + "\""); } } }
public void TestIsHighSurrogate() { if (UCharacter .IsHighSurrogate((char)(UCharacter.MIN_HIGH_SURROGATE - 1))) { Errln("0xd7ff"); } if (!UCharacter.IsHighSurrogate(UCharacter.MIN_HIGH_SURROGATE)) { Errln("0xd800"); } if (!UCharacter.IsHighSurrogate(UCharacter.MAX_HIGH_SURROGATE)) { Errln("0xdbff"); } if (UCharacter .IsHighSurrogate((char)(UCharacter.MAX_HIGH_SURROGATE + 1))) { Errln("0xdc00"); } }
internal void Test(String s, int start, int limit, int expected) { int val1 = UCharacter.CodePointCount(s.ToCharArray(), start, limit); int val2 = UCharacter.CodePointCount(s, start, limit); if (val1 != expected) { Errln("char[] " + Str(s, start, limit) + "(" + val1 + ") != " + expected); } else if (val2 != expected) { Errln("String " + Str(s, start, limit) + "(" + val2 + ") != " + expected); } else if (IsVerbose()) { Logln(Str(s, start, limit) + " == " + expected); } }
private string AdjustForUsageAndContext(CapitalizationContextUsage usage, String name) { if (name != null && name.Length > 0 && UCharacter.IsLowerCase(name.CodePointAt(0)) && (capitalization == DisplayContext.CapitalizationForBeginningOfSentence || (capitalizationUsage != null && capitalizationUsage[(int)usage]))) { // Note, won't have capitalizationUsage != null && capitalizationUsage[usage.ordinal()] // unless capitalization is CAPITALIZATION_FOR_UI_LIST_OR_MENU or CAPITALIZATION_FOR_STANDALONE lock (this) { if (capitalizationBrkIter == null) { // should only happen when deserializing, etc. capitalizationBrkIter = BreakIterator.GetSentenceInstance(locale); } return(UCharacter.ToTitleCase(locale, name, capitalizationBrkIter, UCharacter.TITLECASE_NO_LOWERCASE | UCharacter.TITLECASE_NO_BREAK_ADJUSTMENT)); } } return(name); }
public void TestIsLowSurrogate() { if (UCharacter .IsLowSurrogate((char)(UCharacter.MIN_LOW_SURROGATE - 1))) { Errln("0xdbff"); } if (!UCharacter.IsLowSurrogate(UCharacter.MIN_LOW_SURROGATE)) { Errln("0xdc00"); } if (!UCharacter.IsLowSurrogate(UCharacter.MAX_LOW_SURROGATE)) { Errln("0xdfff"); } if (UCharacter .IsLowSurrogate((char)(UCharacter.MAX_LOW_SURROGATE + 1))) { Errln("0xe000"); } }
internal String GetTestSource() { if (random == null) { random = CreateRandom(); // use test framework's random seed } String source = ""; int i = 0; while (i < (random.Next(maxCharCount) + 1)) { int codepoint = random.Next(maxCodePoint); //Elimate unassigned characters while (UCharacter.GetType(codepoint) == UCharacterCategory.OtherNotAssigned) { codepoint = random.Next(maxCodePoint); } source = source + UTF16.ValueOf(codepoint); i++; } return(source); }
public void TestUnknownPropertyNames() { try { int p = UCharacter.GetPropertyEnum("??"); Errln("UCharacter.getPropertyEnum(??) returned " + p + " rather than throwing an exception"); } catch (ArgumentException e) { // ok } try { int p = UCharacter.GetPropertyValueEnum(UProperty.Line_Break, "?!"); Errln("UCharacter.getPropertyValueEnum(UProperty.LINE_BREAK, ?!) returned " + p + " rather than throwing an exception"); } catch (ArgumentException e) { // ok } }
/// <summary> /// Implements <see cref="Transliterator.HandleTransliterate(IReplaceable, Position, bool)"/>. /// </summary> protected override void HandleTransliterate(IReplaceable text, Position offsets, bool isIncremental) { int cursor = offsets.Start; int limit = offsets.Limit; StringBuilder str = new StringBuilder(); str.Append(OPEN_DELIM); int len; string name; while (cursor < limit) { int c = text.Char32At(cursor); if ((name = UCharacter.GetExtendedName(c)) != null) { str.Length = OPEN_DELIM_LEN; str.Append(name).Append(CLOSE_DELIM); int clen = UTF16.GetCharCount(c); text.Replace(cursor, cursor + clen, str.ToString()); len = str.Length; cursor += len; // advance cursor by 1 and adjust for new text limit += len - clen; // change in length } else { ++cursor; } } offsets.ContextLimit += limit - offsets.Limit; offsets.Limit = limit; offsets.Start = cursor; }
// // RBBISymbolTable::parseReference This function from the abstract symbol table interface // looks for a $variable name in the source text. // It does not look it up, only scans for it. // It is used by the UnicodeSet parser. // public virtual string ParseReference(string text, ParsePosition pos, int limit) { int start = pos.Index; int i = start; string result = ""; while (i < limit) { int c = UTF16.CharAt(text, i); if ((i == start && !UCharacter.IsUnicodeIdentifierStart(c)) || !UCharacter.IsUnicodeIdentifierPart(c)) { break; } i += UTF16.GetCharCount(c); } if (i == start) { // No valid name chars return(result); // Indicate failure with empty string } pos.Index = i; result = text.Substring(start, i - start); // ICU4N: Corrected 2nd parameter return(result); }