public static StringBuffer Decode(StringBuffer input, char[] case_flags) { char[] @in = input.ToString().ToCharArray(); int[] outLen = new int[1]; outLen[0] = MAX_BUFFER_SIZE; int[] output = new int[outLen[0]]; int rc = punycode_success; StringBuffer result = new StringBuffer(); for (; ;) { rc = Decode(input.Length, @in, outLen, output, case_flags); if (rc == punycode_big_output) { outLen[0] = output.Length * 4; output = new int[outLen[0]]; continue; } break; } if (rc == punycode_success) { for (int i = 0; i < outLen[0]; i++) { UTF16.Append(result, output[i]); } } else { GetException(rc); } return(result); }
public static int Current32(CharacterIterator ci) { char lead = ci.Current; int retVal = lead; if (retVal < UTF16.LeadSurrogateMinValue) { return(retVal); } if (UTF16.IsLeadSurrogate(lead)) { int trail = (int)ci.Next(); ci.Previous(); if (UTF16.IsTrailSurrogate((char)trail)) { retVal = ((lead - UTF16.LeadSurrogateMinValue) << 10) + (trail - UTF16.TrailSurrogateMinValue) + UTF16.SupplementaryMinValue; } } else { if (lead == CharacterIterator.Done) { if (ci.Index >= ci.EndIndex) { retVal = Done32; } } } return(retVal); }
public static int Previous32(CharacterIterator ci) { if (ci.Index <= ci.BeginIndex) { return(Done32); } char trail = ci.Previous(); int retVal = trail; if (UTF16.IsTrailSurrogate(trail) && ci.Index > ci.BeginIndex) { char lead = ci.Previous(); if (UTF16.IsLeadSurrogate(lead)) { retVal = (((int)lead - UTF16.LeadSurrogateMinValue) << 10) + ((int)trail - UTF16.TrailSurrogateMinValue) + UTF16.SupplementaryMinValue; } else { ci.Next(); } } return(retVal); }
public static int Previous32(CharacterIterator ci) { if (ci.Index <= ci.BeginIndex) { return(DONE32); } char trail = ci.Previous(); int retVal = trail; if (UTF16.IsTrailSurrogate(trail) && ci.Index > ci.BeginIndex) { char lead = ci.Previous(); if (UTF16.IsLeadSurrogate(lead)) { retVal = (((int)lead - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) + ((int)trail - UTF16.TRAIL_SURROGATE_MIN_VALUE) + UTF16.SUPPLEMENTARY_MIN_VALUE; } else { ci.Next(); } } return(retVal); }
/** * Do a normalization using the iterative API in the given direction. * @param str a Java StringCharacterIterator * @param buf scratch buffer * @param dir either +1 or -1 */ private String iterativeNorm(StringCharacterIterator str, Normalizer.Mode mode, StringBuffer buf, int dir, int options) { normalizer.SetText(str); normalizer.SetMode(mode); buf.Length = (0); normalizer.SetOption(-1, false); // reset all options normalizer.SetOption(options, true); // set desired options int ch; if (dir > 0) { for (ch = normalizer.First(); ch != Normalizer.DONE; ch = normalizer.Next()) { buf.Append(UTF16.ValueOf(ch)); } } else { for (ch = normalizer.Last(); ch != Normalizer.DONE; ch = normalizer.Previous()) { buf.Insert(0, UTF16.ValueOf(ch)); } } return(buf.ToString()); }
/// <summary> /// Internal trie getter from a code point. /// Could be faster(?) but longer with /// <code> /// if((c32)<=0xd7ff) { (result)=_TRIE_GET_RAW(trie, data, 0, c32); } /// </code> /// Gets the offset to data which the codepoint points to. /// </summary> /// <param name="ch">Codepoint.</param> /// <returns>Offset to data.</returns> protected int GetCodePointOffset(int ch) { // if ((ch >> 16) == 0) slower if (ch < 0) { return(-1); } else if (ch < UTF16.LeadSurrogateMinValue) { // fastpath for the part of the BMP below surrogates (D800) where getRawOffset() works return(GetRawOffset(0, (char)ch)); } else if (ch < UTF16.SupplementaryMinValue) { // BMP codepoint return(GetBMPOffset((char)ch)); } else if (ch <= UChar.MaxValue) { // look at the construction of supplementary characters // trail forms the ends of it. return(GetSurrogateOffset(UTF16.GetLeadSurrogate(ch), (char)(ch & SurrogateMask))); } else { // return -1 if there is an error, in this case we return return(-1); } }
public static int Current32(CharacterIterator ci) { char lead = ci.Current; int retVal = lead; if (retVal < UTF16.LEAD_SURROGATE_MIN_VALUE) { return(retVal); } if (UTF16.IsLeadSurrogate(lead)) { int trail = (int)ci.Next(); ci.Previous(); if (UTF16.IsTrailSurrogate((char)trail)) { retVal = ((lead - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) + (trail - UTF16.TRAIL_SURROGATE_MIN_VALUE) + UTF16.SUPPLEMENTARY_MIN_VALUE; } } else { if (lead == CharacterIterator.DONE) { if (ci.Index >= ci.EndIndex) { retVal = DONE32; } } } return(retVal); }
public void TestToCodePoint() { char[] pairs = { (char)(UCharacter.MIN_HIGH_SURROGATE + 0), (char)(UCharacter.MIN_LOW_SURROGATE + 0), (char)(UCharacter.MIN_HIGH_SURROGATE + 1), (char)(UCharacter.MIN_LOW_SURROGATE + 1), (char)(UCharacter.MIN_HIGH_SURROGATE + 2), (char)(UCharacter.MIN_LOW_SURROGATE + 2), (char)(UCharacter.MAX_HIGH_SURROGATE - 2), (char)(UCharacter.MAX_LOW_SURROGATE - 2), (char)(UCharacter.MAX_HIGH_SURROGATE - 1), (char)(UCharacter.MAX_LOW_SURROGATE - 1), (char)(UCharacter.MAX_HIGH_SURROGATE - 0), (char)(UCharacter.MAX_LOW_SURROGATE - 0), }; for (int i = 0; i < pairs.Length; i += 2) { int cp = UCharacter.ToCodePoint(pairs[i], pairs[i + 1]); if (pairs[i] != UTF16.GetLeadSurrogate(cp) || pairs[i + 1] != UTF16.GetTrailSurrogate(cp)) { Errln((pairs[i]).ToHexString() + ", " + pairs[i + 1]); break; } } }
private RuleStatus CalcStatus(int current, int next) { if (current == BreakIterator.Done || next == BreakIterator.Done) { return(ICU4N.Text.RuleStatus.WordNone); } int begin = m_start + current; int end = m_start + next; int codepoint; for (int i = begin; i < end; i += UTF16.GetCharCount(codepoint)) { codepoint = UTF16.CharAt(m_text, 0, end, begin); if (UChar.IsDigit(codepoint)) { return(ICU4N.Text.RuleStatus.WordNumber); } else if (UChar.IsLetter(codepoint)) { // TODO: try to separately specify ideographic, kana? // [currently all bundled as letter for this case] return(ICU4N.Text.RuleStatus.WordLetter); } } return(ICU4N.Text.RuleStatus.WordNone); }
public void TestExhaustive() { int counter = 0; CanonicalIterator it = new CanonicalIterator(""); /* * CanonicalIterator slowIt = new CanonicalIterator(""); * slowIt.SKIP_ZEROS = false; */ //Transliterator name = Transliterator.getInstance("[^\\u0020-\\u007F] name"); //Set itSet = new TreeSet(); //Set slowItSet = new TreeSet(); for (int i = 0; i < 0x10FFFF; ++i) { // skip characters we know don't have decomps UCharacterCategory type = UCharacter.GetType(i); if (type == UCharacterCategory.OtherNotAssigned || type == UCharacterCategory.PrivateUse || type == UCharacterCategory.Surrogate) { continue; } if ((++counter % 5000) == 0) { Logln("Testing " + Utility.Hex(i, 0)); } string s = UTF16.ValueOf(i); CharacterTest(s, i, it); CharacterTest(s + "\u0345", i, it); } }
public void TestToCodePoint() { char[] pairs = { (char)(UChar.MinHighSurrogate + 0), (char)(UChar.MinLowSurrogate + 0), (char)(UChar.MinHighSurrogate + 1), (char)(UChar.MinLowSurrogate + 1), (char)(UChar.MinHighSurrogate + 2), (char)(UChar.MinLowSurrogate + 2), (char)(UChar.MaxHighSurrogate - 2), (char)(UChar.MaxLowSurrogate - 2), (char)(UChar.MaxHighSurrogate - 1), (char)(UChar.MaxLowSurrogate - 1), (char)(UChar.MaxHighSurrogate - 0), (char)(UChar.MaxLowSurrogate - 0), }; for (int i = 0; i < pairs.Length; i += 2) { int cp = UChar.ToCodePoint(pairs[i], pairs[i + 1]); if (pairs[i] != UTF16.GetLeadSurrogate(cp) || pairs[i + 1] != UTF16.GetTrailSurrogate(cp)) { Errln((pairs[i]).ToHexString() + ", " + pairs[i + 1]); break; } } }
// two strings that are canonically equivalent must test // equal under a canonical caseless match // see UAX #21 Case Mappings and Jitterbug 2021 and // Unicode Technical Committee meeting consensus 92-C31 private void compare(String s1, String s2) { if (s1.Length == 1 && s2.Length == 1) { if (Normalizer.Compare(UTF16.CharAt(s1, 0), UTF16.CharAt(s2, 0), Normalizer.COMPARE_IGNORE_CASE) != 0) { Errln("Normalizer.compare(int,int) failed for s1: " + Utility.Hex(s1) + " s2: " + Utility.Hex(s2)); } } if (s1.Length == 1 && s2.Length > 1) { if (Normalizer.Compare(UTF16.CharAt(s1, 0), s2, Normalizer.COMPARE_IGNORE_CASE) != 0) { Errln("Normalizer.compare(int,String) failed for s1: " + Utility.Hex(s1) + " s2: " + Utility.Hex(s2)); } } if (s1.Length > 1 && s2.Length > 1) { // TODO: Re-enable this tests after UTC fixes UAX 21 if (Normalizer.Compare(s1.ToCharArray(), s2.ToCharArray(), Normalizer.COMPARE_IGNORE_CASE) != 0) { Errln("Normalizer.compare(char[],char[]) failed for s1: " + Utility.Hex(s1) + " s2: " + Utility.Hex(s2)); } } }
/// <summary> /// Returns the next character using the given options, or <see cref="DONE"/> if there /// are no more characters, and advance the position to the next /// character. /// </summary> /// <param name="options">One or more of the following options, bitwise-OR-ed /// together: <see cref="RuleCharacterIteratorOptions.ParseVariables"/>, /// <see cref="RuleCharacterIteratorOptions.ParseEscapes"/>, /// <see cref="RuleCharacterIteratorOptions.SkipWhitespace"/>.</param> /// <returns>The current 32-bit code point, or <see cref="DONE"/>.</returns> public virtual int Next(RuleCharacterIteratorOptions options) { int c = DONE; isEscaped = false; for (; ;) { c = Current(); Advance(UTF16.GetCharCount(c)); if (c == SymbolTable.SYMBOL_REF && buf == null && (options & RuleCharacterIteratorOptions.ParseVariables) != 0 && sym != null) { string name = sym.ParseReference(text, pos, text.Length); // If name == null there was an isolated SYMBOL_REF; // return it. Caller must be prepared for this. if (name == null) { break; } bufPos = 0; buf = sym.Lookup(name); if (buf == null) { throw new ArgumentException( "Undefined variable: " + name); } // Handle empty variable value if (buf.Length == 0) { buf = null; } continue; } if ((options & RuleCharacterIteratorOptions.SkipWhitespace) != 0 && PatternProps.IsWhiteSpace(c)) { continue; } if (c == '\\' && (options & RuleCharacterIteratorOptions.ParseEscapes) != 0) { int[] offset = new int[] { 0 }; c = Utility.UnescapeAt(Lookahead(), offset); Jumpahead(offset[0]); isEscaped = true; if (c < 0) { throw new ArgumentException("Invalid escape"); } } break; } return(c); }
/// <summary> /// Traverses the trie from the current state for the /// one or two UTF-16 code units for this input code point. /// </summary> /// <param name="cp">A Unicode code point 0..0x10ffff.</param> /// <returns>The match/value <see cref="Result"/>.</returns> /// <stable>ICU 4.8</stable> public Result NextForCodePoint(int cp) { return(cp <= 0xffff ? Next(cp) : (Next(UTF16.GetLeadSurrogate(cp)).HasNext() ? Next(UTF16.GetTrailSurrogate(cp)) : Result.NoMatch)); }
/** * Gets the current string from the iterator. Only use after calling next(), not nextRange(). */ public string GetString() { if (Codepoint != IS_STRING) { return(UTF16.ValueOf(Codepoint)); } return(String); }
public int AddString(string s) { List <int> offsets = new List <int>(strings.Keys); offsets.Sort(); int offset = offsets[offsets.Count - 1]; offset = offset + UTF16.GetByteCount(strings[offset]) + 2; strings[offset] = s; return(offset); }
/// <summary> /// Returns the current 32-bit code point without parsing escapes, parsing /// variables, or skipping whitespace. /// </summary> /// <returns>The current 32-bit code point.</returns> private int Current() { if (buf != null) { return(UTF16.CharAt(buf, 0, buf.Length, bufPos)); } else { int i = pos.Index; return((i < text.Length) ? UTF16.CharAt(text, i) : DONE); } }
/// <summary> /// Skips ahead past any ignored characters, as indicated by the given /// options. This is useful in conjunction with the <see cref="Lookahead()"/> method. /// <para/> /// Currently, this only has an effect for <see cref="RuleCharacterIteratorOptions.SkipWhitespace"/>. /// </summary> /// <param name="options">One or more of the following options, bitwise-OR-ed /// together: <see cref="RuleCharacterIteratorOptions.ParseVariables"/>, /// <see cref="RuleCharacterIteratorOptions.ParseEscapes"/>, /// <see cref="RuleCharacterIteratorOptions.SkipWhitespace"/>.</param> public virtual void SkipIgnored(RuleCharacterIteratorOptions options) { if ((options & RuleCharacterIteratorOptions.SkipWhitespace) != 0) { for (; ;) { int a = Current(); if (!PatternProps.IsWhiteSpace(a)) { break; } Advance(UTF16.GetCharCount(a)); } } }
/** * Change unicode string from <00AD> to \u00AD, for the later is accepted * by Java * @param str String including <*> style unicode * @return \\u String */ private static String StringReplace(String str) { StringBuffer result = new StringBuffer(); char[] chars = str.ToCharArray(); StringBuffer sbTemp = new StringBuffer(); for (int i = 0; i < chars.Length; i++) { if ('<' == chars[i]) { sbTemp = new StringBuffer(); while ('>' != chars[i + 1]) { sbTemp.Append(chars[++i]); } /* * The unicode sometimes is larger then \uFFFF, so have to use * UTF16. */ int toBeInserted = int.Parse(sbTemp.ToString(), NumberStyles.HexNumber, CultureInfo.InvariantCulture); if ((toBeInserted >> 16) == 0) { result.Append((char)toBeInserted); } else { String utf16String = UTF16.ValueOf(toBeInserted); char[] charsTemp = utf16String.ToCharArray(); for (int j = 0; j < charsTemp.Length; j++) { result.Append(charsTemp[j]); } } } else if ('>' == chars[i]) {//end when met with '>' continue; } else { result.Append(chars[i]); } } return(result.ToString()); }
/// <summary> /// Iterates to the next script run, returning true if one exists. /// </summary> /// <returns>true if there is another script run, false otherwise.</returns> public bool Next() { if (scriptLimit >= limit) { return(false); } scriptCode = UScript.Common; scriptStart = scriptLimit; while (index < limit) { int ch = UTF16.CharAt(text, start, limit, index - start); int sc = GetScript(ch); /* * From UTR #24: Implementations that determine the boundaries between * characters of given scripts should never break between a non-spacing * mark and its base character. Thus for boundary determinations and * similar sorts of processing, a non-spacing mark — whatever its script * value — should inherit the script value of its base character. */ if (IsSameScript(scriptCode, sc) || UChar.GetUnicodeCategory(ch) == UUnicodeCategory.NonSpacingMark) { index += UTF16.GetCharCount(ch); /* * Inherited or Common becomes the script code of the surrounding text. */ if (scriptCode <= UScript.Inherited && sc > UScript.Inherited) { scriptCode = sc; } } else { break; } } scriptLimit = index; return(true); }
/// <summary> /// Get the value associated with a pair of surrogates. /// </summary> /// <param name="lead">A lead surrogate.</param> /// <param name="trail">A trail surrogate.</param> public int GetSurrogateValue(char lead, char trail) { if (!UTF16.IsLeadSurrogate(lead) || !UTF16.IsTrailSurrogate(trail)) { throw new ArgumentException( "Argument characters do not form a supplementary character"); } // get fold position for the next trail surrogate int offset = GetSurrogateOffset(lead, trail); // get the real data from the folded lead/trail units if (offset > 0) { return(m_data_[offset]); } // return m_initialValue_ if there is an error return(m_initialValue_); }
public static String ReplaceAll(String source, UnicodeSet set, String replacement) { StringBuffer results = new StringBuffer(); int cp; for (int i = 0; i < source.Length; i += UTF16.GetCharCount(cp)) { cp = UTF16.CharAt(source, i); if (set.Contains(cp)) { results.Append(replacement); } else { UTF16.Append(results, cp); } } return(results.ToString()); }
internal static bool MayHaveLccc(int c) { // Handles all of Unicode 0..10FFFF. // c can be negative, e.g., Collation.SENTINEL_CP. // U+0300 is the first character with lccc!=0. if (c < 0x300) { return(false); } if (c > 0xffff) { c = UTF16.GetLeadSurrogate(c); } int i; return ((i = lcccIndex[c >> 5]) != 0 && (lcccBits[i] & (1 << (c & 0x1f))) != 0); }
/** * @param rand * @param others * @return */ private String GetRandomKey(Random rand) { int r = rand.Next(30); if (r == 0) { return(UTF16.ValueOf(r)); } else if (r < 10) { return(UTF16.ValueOf('A' - 1 + r)); } else if (r < 20) { return(UTF16.ValueOf(0x10FFFF - (r - 10))); // } else if (r == 20) { // return ""; } return("a" + UTF16.ValueOf(r + 'a' - 1)); }
/// <summary>Returns <c>true</c> if the current text represents emoji character or sequence.</summary> private bool IsEmoji(int current, int next) { int begin = start + current; int end = start + next; int codepoint = UTF16.CharAt(text, 0, end, begin); if (EMOJI.Contains(codepoint)) { if (EMOJI_RK.Contains(codepoint)) { // if its in EmojiRK, we don't treat it as emoji unless there is evidence it forms emoji sequence, // an emoji presentation selector or keycap follows. int trailer = begin + Character.CharCount(codepoint); return(trailer < end && (text[trailer] == 0xFE0F || text[trailer] == 0x20E3)); } else { return(true); } } return(false); }
internal String GetTestSource() { if (random == null) { random = CreateRandom(); // use test framework's random seed } String source = ""; int i = 0; while (i < (random.Next(maxCharCount) + 1)) { int codepoint = random.Next(maxCodePoint); //Elimate unassigned characters while (UCharacter.GetType(codepoint) == UCharacterCategory.OtherNotAssigned) { codepoint = random.Next(maxCodePoint); } source = source + UTF16.ValueOf(codepoint); i++; } return(source); }
/// <summary> /// Checks if we are beginning at the start of a initial block. /// If we are then the rest of the codepoints in this initial block /// has the same values. /// We increment <see cref="m_nextCodepoint_"/> and relevant data members if so. /// This is used only in for the supplementary codepoints because /// the offset to the trail indexes could be 0. /// </summary> /// <returns>true if we are at the start of a initial block.</returns> private bool CheckNullNextTrailIndex() { if (m_nextIndex_ <= 0) { m_nextCodepoint_ += TRAIL_SURROGATE_COUNT_ - 1; int nextLead = UTF16.GetLeadSurrogate(m_nextCodepoint_); int leadBlock = m_trie_.m_index_[nextLead >> Trie.INDEX_STAGE_1_SHIFT_] << Trie.INDEX_STAGE_2_SHIFT_; if (m_trie_.m_dataManipulate_ == null) { throw new InvalidOperationException( "The field DataManipulate in this Trie is null"); // ICU4N: This was originally NullPointerException } m_nextIndex_ = m_trie_.m_dataManipulate_.GetFoldingOffset( m_trie_[leadBlock + (nextLead & Trie.INDEX_STAGE_3_MASK_)]); m_nextIndex_--; m_nextBlockIndex_ = DATA_BLOCK_LENGTH_; return(true); } return(false); }
/// <summary> /// Out-of-line portion of the in-line <see cref="Next32(CharacterIterator)"/> code. /// The call site does an initial ci.Next() and calls this function /// if the 16 bit value it gets is >= <see cref="UTF16.LeadSurrogateMinValue"/>. /// </summary> // NOTE: we leave the underlying char iterator positioned in the // middle of a surrogate pair. ci.next() will work correctly // from there, but the ci.getIndex() will be wrong, and needs // adjustment. public static int NextTrail32(CharacterIterator ci, int lead) { if (lead == CharacterIterator.Done && ci.Index >= ci.EndIndex) { return(Done32); } int retVal = lead; if (lead <= UTF16.LeadSurrogateMaxValue) { char cTrail = ci.Next(); if (UTF16.IsTrailSurrogate(cTrail)) { retVal = ((lead - UTF16.LeadSurrogateMinValue) << 10) + (cTrail - UTF16.TrailSurrogateMinValue) + UTF16.SupplementaryMinValue; } else { ci.Previous(); } } return(retVal); }
/// <summary> /// Out-of-line portion of the in-line <see cref="Next32(CharacterIterator)"/> code. /// The call site does an initial ci.Next() and calls this function /// if the 16 bit value it gets is >= <see cref="UTF16.LEAD_SURROGATE_MIN_VALUE"/>. /// </summary> // NOTE: we leave the underlying char iterator positioned in the // middle of a surrogate pair. ci.next() will work correctly // from there, but the ci.getIndex() will be wrong, and needs // adjustment. public static int NextTrail32(CharacterIterator ci, int lead) { if (lead == CharacterIterator.DONE && ci.Index >= ci.EndIndex) { return(DONE32); } int retVal = lead; if (lead <= UTF16.LEAD_SURROGATE_MAX_VALUE) { char cTrail = ci.Next(); if (UTF16.IsTrailSurrogate(cTrail)) { retVal = ((lead - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) + (cTrail - UTF16.TRAIL_SURROGATE_MIN_VALUE) + UTF16.SUPPLEMENTARY_MIN_VALUE; } else { ci.Previous(); } } return(retVal); }
private void _testTrieRanges(SetRange[] setRanges, int countSetRanges, CheckRange[] checkRanges, int countCheckRanges, bool latin1Linear) { Int32TrieBuilder newTrie = new Int32TrieBuilder(null, 2000, checkRanges[0].Value, checkRanges[0].Value, latin1Linear); // set values from setRanges[] bool ok = true; for (int i = 0; i < countSetRanges; ++i) { int start = setRanges[i].start; int limit = setRanges[i].limit; int value = setRanges[i].value; bool overwrite = setRanges[i].overwrite; if ((limit - start) == 1 && overwrite) { ok &= newTrie.SetValue(start, value); } else { ok &= newTrie.SetRange(start, limit, value, overwrite); } } if (!ok) { Errln("setting values into a trie failed"); return; } { // verify that all these values are in the new Trie int start = 0; for (int i = 0; i < countCheckRanges; ++i) { int limit = checkRanges[i].Limit; int value = checkRanges[i].Value; while (start < limit) { if (value != newTrie.GetValue(start)) { Errln("newTrie [U+" + (start).ToHexString() + "]==0x" + (newTrie.GetValue(start).ToHexString()) + " instead of 0x" + (value).ToHexString()); } ++start; } } Int32Trie trie = newTrie.Serialize(new _testFoldedValue(newTrie), new _testFoldingOffset()); // test linear Latin-1 range from utrie_getData() if (latin1Linear) { start = 0; for (int i = 0; i < countCheckRanges && start <= 0xff; ++i) { int limit = checkRanges[i].Limit; int value = checkRanges[i].Value; while (start < limit && start <= 0xff) { if (value != trie.GetLatin1LinearValue((char)start)) { Errln("IntTrie.getLatin1LinearValue[U+" + (start).ToHexString() + "]==0x" + ( trie.GetLatin1LinearValue((char)start).ToHexString()) + " instead of 0x" + (value).ToHexString()); } ++start; } } } if (latin1Linear != trie.IsLatin1Linear) { Errln("trie serialization did not preserve " + "Latin-1-linearity"); } // verify that all these values are in the serialized Trie start = 0; for (int i = 0; i < countCheckRanges; ++i) { int limit = checkRanges[i].Limit; int value = checkRanges[i].Value; if (start == 0xd800) { // skip surrogates start = limit; continue; } while (start < limit) { if (start <= 0xffff) { int value2 = trie.GetBMPValue((char)start); if (value != value2) { Errln("serialized trie.getBMPValue(U+" + (start).ToHexString() + " == 0x" + (value2).ToHexString() + " instead of 0x" + (value).ToHexString()); } if (!UTF16.IsLeadSurrogate((char)start)) { value2 = trie.GetLeadValue((char)start); if (value != value2) { Errln("serialized trie.getLeadValue(U+" + (start).ToHexString() + " == 0x" + (value2).ToHexString() + " instead of 0x" + (value).ToHexString()); } } } { int value2 = trie.GetCodePointValue(start); if (value != value2) { Errln("serialized trie.getCodePointValue(U+" + (start).ToHexString() + ")==0x" + (value2).ToHexString() + " instead of 0x" + (value).ToHexString()); } ++start; } } } // enumerate and verify all ranges int enumRanges = 1; TrieEnumerator iter = new _testEnumValue(trie); while (iter.MoveNext()) { RangeValueEnumeratorElement result = iter.Current; if (result.Start != checkRanges[enumRanges - 1].Limit || result.Limit != checkRanges[enumRanges].Limit || (result.Value ^ 0x5555) != checkRanges[enumRanges].Value) { Errln("utrie_enum() delivers wrong range [U+" + (result.Start).ToHexString() + "..U+" + (result.Limit).ToHexString() + "].0x" + (result.Value ^ 0x5555).ToHexString() + " instead of [U+" + (checkRanges[enumRanges - 1].Limit).ToHexString() + "..U+" + (checkRanges[enumRanges].Limit).ToHexString() + "].0x" + (checkRanges[enumRanges].Value).ToHexString()); } enumRanges++; } // test linear Latin-1 range if (trie.IsLatin1Linear) { for (start = 0; start < 0x100; ++start) { if (trie.GetLatin1LinearValue((char)start) != trie.GetLeadValue((char)start)) { Errln("trie.getLatin1LinearValue[U+" + (start).ToHexString() + "]=0x" + ( trie.GetLatin1LinearValue((char)start).ToHexString()) + " instead of 0x" + ( trie.GetLeadValue((char)start)).ToHexString()); } } } _testTrieIteration(trie, checkRanges, countCheckRanges); } }