public void TestExhaustive() { int counter = 0; CanonicalIterator it = new CanonicalIterator(""); /* * CanonicalIterator slowIt = new CanonicalIterator(""); * slowIt.SKIP_ZEROS = false; */ //Transliterator name = Transliterator.getInstance("[^\\u0020-\\u007F] name"); //Set itSet = new TreeSet(); //Set slowItSet = new TreeSet(); for (int i = 0; i < 0x10FFFF; ++i) { // skip characters we know don't have decomps UUnicodeCategory type = UChar.GetUnicodeCategory(i); if (type == UUnicodeCategory.OtherNotAssigned || type == UUnicodeCategory.PrivateUse || type == UUnicodeCategory.Surrogate) { continue; } if ((++counter % 5000) == 0) { Logln("Testing " + Utility.Hex(i, 0)); } string s = UTF16.ValueOf(i); CharacterTest(s, i, it); CharacterTest(s + "\u0345", i, it); } }
private static bool IsLNS(int c) { // Letter, number, symbol, // or a private use code point because those are typically used as letters or numbers. // Consider modifier letters only if they are cased. UUnicodeCategory gc = UCharacterProperty.Instance.GetUnicodeCategory(c); return(((1 << (int)gc) & LNS) != 0 || (gc == UUnicodeCategory.ModifierLetter && UCaseProperties.Instance.GetCaseType(c) != CaseType.None)); }
/// <summary> /// Converts a <see cref="UUnicodeCategory"/> to an <see cref="int"/>. /// Same as <c>(int)<paramref name="characterCategory"/></c>. /// </summary> /// <param name="characterCategory">This <see cref="UUnicodeCategory"/>.</param> /// <returns>This category as <see cref="int"/>.</returns> public static int ToInt32(this UUnicodeCategory characterCategory) // ICU4N TODO: Add this extension to all main enums { return((int)characterCategory); }
/// <summary> /// Gets the name of the argument category. /// </summary> /// <param name="category">Category to retrieve name.</param> /// <returns>Category name.</returns> /// <stable>ICU 2.1</stable> // ICU4N NOTE: Since ToString() cannot be changed from the default on an Enum, // we have renamed this method AsString(). public static string AsString(this UUnicodeCategory category) { switch (category) { case UUnicodeCategory.UppercaseLetter: return("Letter, Uppercase"); case UUnicodeCategory.LowercaseLetter: return("Letter, Lowercase"); case UUnicodeCategory.TitlecaseLetter: return("Letter, Titlecase"); case UUnicodeCategory.ModifierLetter: return("Letter, Modifier"); case UUnicodeCategory.OtherLetter: return("Letter, Other"); case UUnicodeCategory.NonSpacingMark: return("Mark, Non-Spacing"); case UUnicodeCategory.EnclosingMark: return("Mark, Enclosing"); case UUnicodeCategory.SpacingCombiningMark: return("Mark, Spacing Combining"); case UUnicodeCategory.DecimalDigitNumber: return("Number, Decimal Digit"); case UUnicodeCategory.LetterNumber: return("Number, Letter"); case UUnicodeCategory.OtherNumber: return("Number, Other"); case UUnicodeCategory.SpaceSeparator: return("Separator, Space"); case UUnicodeCategory.LineSeparator: return("Separator, Line"); case UUnicodeCategory.ParagraphSeparator: return("Separator, Paragraph"); case UUnicodeCategory.Control: return("Other, Control"); case UUnicodeCategory.Format: return("Other, Format"); case UUnicodeCategory.PrivateUse: return("Other, Private Use"); case UUnicodeCategory.Surrogate: return("Other, Surrogate"); case UUnicodeCategory.DashPunctuation: return("Punctuation, Dash"); case UUnicodeCategory.OpenPunctuation: return("Punctuation, Open"); case UUnicodeCategory.ClosePunctuation: return("Punctuation, Close"); case UUnicodeCategory.ConnectorPunctuation: return("Punctuation, Connector"); case UUnicodeCategory.OtherPunctuation: return("Punctuation, Other"); case UUnicodeCategory.MathSymbol: return("Symbol, Math"); case UUnicodeCategory.CurrencySymbol: return("Symbol, Currency"); case UUnicodeCategory.ModifierSymbol: return("Symbol, Modifier"); case UUnicodeCategory.OtherSymbol: return("Symbol, Other"); case UUnicodeCategory.InitialQuotePunctuation: return("Punctuation, Initial quote"); case UUnicodeCategory.FinalQuotePunctuation: return("Punctuation, Final quote"); default: return("Unassigned"); } }