private bool GetCEsFromCE32(CollationData data, int c, int ce32) { ce32 = data.GetFinalCE32(ce32); ce1 = 0; if (Collation.IsSimpleOrLongCE32(ce32)) { ce0 = Collation.CeFromCE32(ce32); } else { switch (Collation.TagFromCE32(ce32)) { case Collation.LATIN_EXPANSION_TAG: ce0 = Collation.LatinCE0FromCE32(ce32); ce1 = Collation.LatinCE1FromCE32(ce32); break; case Collation.EXPANSION32_TAG: { int index = Collation.IndexFromCE32(ce32); int length = Collation.LengthFromCE32(ce32); if (length <= 2) { ce0 = Collation.CeFromCE32(data.ce32s[index]); if (length == 2) { ce1 = Collation.CeFromCE32(data.ce32s[index + 1]); } break; } else { return(false); } } case Collation.EXPANSION_TAG: { int index = Collation.IndexFromCE32(ce32); int length = Collation.LengthFromCE32(ce32); if (length <= 2) { ce0 = data.ces[index]; if (length == 2) { ce1 = data.ces[index + 1]; } break; } else { return(false); } } // Note: We could support PREFIX_TAG (assert c>=0) // by recursing on its default CE32 and checking that none of the prefixes starts // with a fast Latin character. // However, currently (2013) there are only the L-before-middle-dot // prefix mappings in the Latin range, and those would be rejected anyway. case Collation.CONTRACTION_TAG: Debug.Assert(c >= 0); return(GetCEsFromContractionCE32(data, ce32)); case Collation.OFFSET_TAG: Debug.Assert(c >= 0); ce0 = data.GetCEFromOffsetCE32(c, ce32); break; default: return(false); } } // A mapping can be completely ignorable. if (ce0 == 0) { return(ce1 == 0); } // We do not support an ignorable ce0 unless it is completely ignorable. long p0 = ce0.TripleShift(32); if (p0 == 0) { return(false); } // We only support primaries up to the Latin script. if (p0 > lastLatinPrimary) { return(false); } // We support non-common secondary and case weights only together with short primaries. int lower32_0 = (int)ce0; if (p0 < firstShortPrimary) { int sc0 = lower32_0 & Collation.SECONDARY_AND_CASE_MASK; if (sc0 != Collation.COMMON_SECONDARY_CE) { return(false); } } // No below-common tertiary weights. if ((lower32_0 & Collation.OnlyTertiaryMask) < Collation.CommonWeight16) { return(false); } if (ce1 != 0) { // Both primaries must be in the same group, // or both must get short mini primaries, // or a short-primary CE is followed by a secondary CE. // This is so that we can test the first primary and use the same mask for both, // and determine for both whether they are variable. long p1 = ce1.TripleShift(32); if (p1 == 0 ? p0 < firstShortPrimary : !InSameGroup(p0, p1)) { return(false); } int lower32_1 = (int)ce1; // No tertiary CEs. if ((lower32_1.TripleShift(16)) == 0) { return(false); } // We support non-common secondary and case weights // only for secondary CEs or together with short primaries. if (p1 != 0 && p1 < firstShortPrimary) { int sc1 = lower32_1 & Collation.SECONDARY_AND_CASE_MASK; if (sc1 != Collation.COMMON_SECONDARY_CE) { return(false); } } // No below-common tertiary weights. if ((lower32_0 & Collation.OnlyTertiaryMask) < Collation.CommonWeight16) { return(false); } } // No quaternary weights. if (((ce0 | ce1) & Collation.QuaternaryMask) != 0) { return(false); } return(true); }
private void HandleCE32(int start, int end, int ce32) { for (; ;) { if ((ce32 & 0xff) < Collation.SPECIAL_CE32_LOW_BYTE) { // !isSpecialCE32() if (sink != null) { sink.HandleCE(Collation.CeFromSimpleCE32(ce32)); } return; } switch (Collation.TagFromCE32(ce32)) { case Collation.FALLBACK_TAG: return; case Collation.RESERVED_TAG_3: case Collation.BUILDER_DATA_TAG: case Collation.LEAD_SURROGATE_TAG: // Java porting note: U_INTERNAL_PROGRAM_ERROR is set to errorCode in ICU4C. throw new InvalidOperationException( string.Format("Unexpected CE32 tag type {0} for ce32=0x{1:x8}", Collation.TagFromCE32(ce32), ce32)); case Collation.LONG_PRIMARY_TAG: if (sink != null) { sink.HandleCE(Collation.CeFromLongPrimaryCE32(ce32)); } return; case Collation.LONG_SECONDARY_TAG: if (sink != null) { sink.HandleCE(Collation.CeFromLongSecondaryCE32(ce32)); } return; case Collation.LATIN_EXPANSION_TAG: if (sink != null) { ces[0] = Collation.LatinCE0FromCE32(ce32); ces[1] = Collation.LatinCE1FromCE32(ce32); sink.HandleExpansion(ces, 0, 2); } // Optimization: If we have a prefix, // then the relevant strings have been added already. if (unreversedPrefix.Length == 0) { AddExpansions(start, end); } return; case Collation.EXPANSION32_TAG: if (sink != null) { int idx = Collation.IndexFromCE32(ce32); int length = Collation.LengthFromCE32(ce32); for (int i = 0; i < length; ++i) { ces[i] = Collation.CeFromCE32(data.ce32s[idx + i]); } sink.HandleExpansion(ces, 0, length); } // Optimization: If we have a prefix, // then the relevant strings have been added already. if (unreversedPrefix.Length == 0) { AddExpansions(start, end); } return; case Collation.EXPANSION_TAG: if (sink != null) { int idx = Collation.IndexFromCE32(ce32); int length = Collation.LengthFromCE32(ce32); sink.HandleExpansion(data.ces, idx, length); } // Optimization: If we have a prefix, // then the relevant strings have been added already. if (unreversedPrefix.Length == 0) { AddExpansions(start, end); } return; case Collation.PREFIX_TAG: HandlePrefixes(start, end, ce32); return; case Collation.CONTRACTION_TAG: HandleContractions(start, end, ce32); return; case Collation.DIGIT_TAG: // Fetch the non-numeric-collation CE32 and continue. ce32 = data.ce32s[Collation.IndexFromCE32(ce32)]; break; case Collation.U0000_TAG: Debug.Assert(start == 0 && end == 0); // Fetch the normal ce32 for U+0000 and continue. ce32 = data.ce32s[0]; break; case Collation.HANGUL_TAG: if (sink != null) { // TODO: This should be optimized, // especially if [start..end] is the complete Hangul range. (assert that) UTF16CollationIterator iter = new UTF16CollationIterator(data); StringBuilderCharSequence hangul = new StringBuilderCharSequence(new StringBuilder(1)); for (int c = start; c <= end; ++c) { hangul.StringBuilder.Length = 0; hangul.StringBuilder.AppendCodePoint(c); iter.SetText(false, hangul, 0); int length = iter.FetchCEs(); // Ignore the terminating non-CE. Debug.Assert(length >= 2 && iter.GetCE(length - 1) == Collation.NO_CE); sink.HandleExpansion(iter.GetCEs(), 0, length - 1); } } // Optimization: If we have a prefix, // then the relevant strings have been added already. if (unreversedPrefix.Length == 0) { AddExpansions(start, end); } return; case Collation.OFFSET_TAG: // Currently no need to send offset CEs to the sink. return; case Collation.IMPLICIT_TAG: // Currently no need to send implicit CEs to the sink. return; } } }