private bool GetCEsFromCE32(CollationData data, int c, int ce32)
        {
            ce32 = data.GetFinalCE32(ce32);
            ce1  = 0;
            if (Collation.IsSimpleOrLongCE32(ce32))
            {
                ce0 = Collation.CeFromCE32(ce32);
            }
            else
            {
                switch (Collation.TagFromCE32(ce32))
                {
                case Collation.LATIN_EXPANSION_TAG:
                    ce0 = Collation.LatinCE0FromCE32(ce32);
                    ce1 = Collation.LatinCE1FromCE32(ce32);
                    break;

                case Collation.EXPANSION32_TAG:
                {
                    int index  = Collation.IndexFromCE32(ce32);
                    int length = Collation.LengthFromCE32(ce32);
                    if (length <= 2)
                    {
                        ce0 = Collation.CeFromCE32(data.ce32s[index]);
                        if (length == 2)
                        {
                            ce1 = Collation.CeFromCE32(data.ce32s[index + 1]);
                        }
                        break;
                    }
                    else
                    {
                        return(false);
                    }
                }

                case Collation.EXPANSION_TAG:
                {
                    int index  = Collation.IndexFromCE32(ce32);
                    int length = Collation.LengthFromCE32(ce32);
                    if (length <= 2)
                    {
                        ce0 = data.ces[index];
                        if (length == 2)
                        {
                            ce1 = data.ces[index + 1];
                        }
                        break;
                    }
                    else
                    {
                        return(false);
                    }
                }

                // Note: We could support PREFIX_TAG (assert c>=0)
                // by recursing on its default CE32 and checking that none of the prefixes starts
                // with a fast Latin character.
                // However, currently (2013) there are only the L-before-middle-dot
                // prefix mappings in the Latin range, and those would be rejected anyway.
                case Collation.CONTRACTION_TAG:
                    Debug.Assert(c >= 0);
                    return(GetCEsFromContractionCE32(data, ce32));

                case Collation.OFFSET_TAG:
                    Debug.Assert(c >= 0);
                    ce0 = data.GetCEFromOffsetCE32(c, ce32);
                    break;

                default:
                    return(false);
                }
            }
            // A mapping can be completely ignorable.
            if (ce0 == 0)
            {
                return(ce1 == 0);
            }
            // We do not support an ignorable ce0 unless it is completely ignorable.
            long p0 = ce0.TripleShift(32);

            if (p0 == 0)
            {
                return(false);
            }
            // We only support primaries up to the Latin script.
            if (p0 > lastLatinPrimary)
            {
                return(false);
            }
            // We support non-common secondary and case weights only together with short primaries.
            int lower32_0 = (int)ce0;

            if (p0 < firstShortPrimary)
            {
                int sc0 = lower32_0 & Collation.SECONDARY_AND_CASE_MASK;
                if (sc0 != Collation.COMMON_SECONDARY_CE)
                {
                    return(false);
                }
            }
            // No below-common tertiary weights.
            if ((lower32_0 & Collation.OnlyTertiaryMask) < Collation.CommonWeight16)
            {
                return(false);
            }
            if (ce1 != 0)
            {
                // Both primaries must be in the same group,
                // or both must get short mini primaries,
                // or a short-primary CE is followed by a secondary CE.
                // This is so that we can test the first primary and use the same mask for both,
                // and determine for both whether they are variable.
                long p1 = ce1.TripleShift(32);
                if (p1 == 0 ? p0 < firstShortPrimary : !InSameGroup(p0, p1))
                {
                    return(false);
                }
                int lower32_1 = (int)ce1;
                // No tertiary CEs.
                if ((lower32_1.TripleShift(16)) == 0)
                {
                    return(false);
                }
                // We support non-common secondary and case weights
                // only for secondary CEs or together with short primaries.
                if (p1 != 0 && p1 < firstShortPrimary)
                {
                    int sc1 = lower32_1 & Collation.SECONDARY_AND_CASE_MASK;
                    if (sc1 != Collation.COMMON_SECONDARY_CE)
                    {
                        return(false);
                    }
                }
                // No below-common tertiary weights.
                if ((lower32_0 & Collation.OnlyTertiaryMask) < Collation.CommonWeight16)
                {
                    return(false);
                }
            }
            // No quaternary weights.
            if (((ce0 | ce1) & Collation.QuaternaryMask) != 0)
            {
                return(false);
            }
            return(true);
        }
Example #2
0
        private void HandleCE32(int start, int end, int ce32)
        {
            for (; ;)
            {
                if ((ce32 & 0xff) < Collation.SPECIAL_CE32_LOW_BYTE)
                {
                    // !isSpecialCE32()
                    if (sink != null)
                    {
                        sink.HandleCE(Collation.CeFromSimpleCE32(ce32));
                    }
                    return;
                }
                switch (Collation.TagFromCE32(ce32))
                {
                case Collation.FALLBACK_TAG:
                    return;

                case Collation.RESERVED_TAG_3:
                case Collation.BUILDER_DATA_TAG:
                case Collation.LEAD_SURROGATE_TAG:
                    // Java porting note: U_INTERNAL_PROGRAM_ERROR is set to errorCode in ICU4C.
                    throw new InvalidOperationException(
                              string.Format("Unexpected CE32 tag type {0} for ce32=0x{1:x8}",
                                            Collation.TagFromCE32(ce32), ce32));

                case Collation.LONG_PRIMARY_TAG:
                    if (sink != null)
                    {
                        sink.HandleCE(Collation.CeFromLongPrimaryCE32(ce32));
                    }
                    return;

                case Collation.LONG_SECONDARY_TAG:
                    if (sink != null)
                    {
                        sink.HandleCE(Collation.CeFromLongSecondaryCE32(ce32));
                    }
                    return;

                case Collation.LATIN_EXPANSION_TAG:
                    if (sink != null)
                    {
                        ces[0] = Collation.LatinCE0FromCE32(ce32);
                        ces[1] = Collation.LatinCE1FromCE32(ce32);
                        sink.HandleExpansion(ces, 0, 2);
                    }
                    // Optimization: If we have a prefix,
                    // then the relevant strings have been added already.
                    if (unreversedPrefix.Length == 0)
                    {
                        AddExpansions(start, end);
                    }
                    return;

                case Collation.EXPANSION32_TAG:
                    if (sink != null)
                    {
                        int idx    = Collation.IndexFromCE32(ce32);
                        int length = Collation.LengthFromCE32(ce32);
                        for (int i = 0; i < length; ++i)
                        {
                            ces[i] = Collation.CeFromCE32(data.ce32s[idx + i]);
                        }
                        sink.HandleExpansion(ces, 0, length);
                    }
                    // Optimization: If we have a prefix,
                    // then the relevant strings have been added already.
                    if (unreversedPrefix.Length == 0)
                    {
                        AddExpansions(start, end);
                    }
                    return;

                case Collation.EXPANSION_TAG:
                    if (sink != null)
                    {
                        int idx    = Collation.IndexFromCE32(ce32);
                        int length = Collation.LengthFromCE32(ce32);
                        sink.HandleExpansion(data.ces, idx, length);
                    }
                    // Optimization: If we have a prefix,
                    // then the relevant strings have been added already.
                    if (unreversedPrefix.Length == 0)
                    {
                        AddExpansions(start, end);
                    }
                    return;

                case Collation.PREFIX_TAG:
                    HandlePrefixes(start, end, ce32);
                    return;

                case Collation.CONTRACTION_TAG:
                    HandleContractions(start, end, ce32);
                    return;

                case Collation.DIGIT_TAG:
                    // Fetch the non-numeric-collation CE32 and continue.
                    ce32 = data.ce32s[Collation.IndexFromCE32(ce32)];
                    break;

                case Collation.U0000_TAG:
                    Debug.Assert(start == 0 && end == 0);
                    // Fetch the normal ce32 for U+0000 and continue.
                    ce32 = data.ce32s[0];
                    break;

                case Collation.HANGUL_TAG:
                    if (sink != null)
                    {
                        // TODO: This should be optimized,
                        // especially if [start..end] is the complete Hangul range. (assert that)
                        UTF16CollationIterator    iter   = new UTF16CollationIterator(data);
                        StringBuilderCharSequence hangul = new StringBuilderCharSequence(new StringBuilder(1));
                        for (int c = start; c <= end; ++c)
                        {
                            hangul.StringBuilder.Length = 0;
                            hangul.StringBuilder.AppendCodePoint(c);
                            iter.SetText(false, hangul, 0);
                            int length = iter.FetchCEs();
                            // Ignore the terminating non-CE.
                            Debug.Assert(length >= 2 && iter.GetCE(length - 1) == Collation.NO_CE);
                            sink.HandleExpansion(iter.GetCEs(), 0, length - 1);
                        }
                    }
                    // Optimization: If we have a prefix,
                    // then the relevant strings have been added already.
                    if (unreversedPrefix.Length == 0)
                    {
                        AddExpansions(start, end);
                    }
                    return;

                case Collation.OFFSET_TAG:
                    // Currently no need to send offset CEs to the sink.
                    return;

                case Collation.IMPLICIT_TAG:
                    // Currently no need to send implicit CEs to the sink.
                    return;
                }
            }
        }