// Java porting note: ICU4C returns U_SUCCESS(error) and it's not applicable to ICU4J. // Also, ICU4C requires handleCE32() to be public because it is used by the callback // function (enumTailoredRange()). This is not necessary for Java implementation. private void HandleCE32(int start, int end, int ce32) { Debug.Assert(ce32 != Collation.FALLBACK_CE32); if (Collation.IsSpecialCE32(ce32)) { ce32 = data.GetIndirectCE32(ce32); if (ce32 == Collation.FALLBACK_CE32) { return; } } do { int baseCE32 = baseData.GetFinalCE32(baseData.GetCE32(start)); // Do not just continue if ce32 == baseCE32 because // contractions and expansions in different data objects // normally differ even if they have the same data offsets. if (Collation.IsSelfContainedCE32(ce32) && Collation.IsSelfContainedCE32(baseCE32)) { // fastpath if (ce32 != baseCE32) { tailored.Add(start); } } else { Compare(start, ce32, baseCE32); } } while (++start <= end); }
private void AddPrefix(CollationData d, ICharSequence pfx, int c, int ce32) { SetPrefix(pfx); ce32 = d.GetFinalCE32(ce32); if (Collation.IsContractionCE32(ce32)) { int idx = Collation.IndexFromCE32(ce32); AddContractions(c, d.contexts, idx + 2); } tailored.Add(unreversedPrefix.AppendCodePoint(c).ToString()); ResetPrefix(); }
private bool GetCEsFromCE32(CollationData data, int c, int ce32) { ce32 = data.GetFinalCE32(ce32); ce1 = 0; if (Collation.IsSimpleOrLongCE32(ce32)) { ce0 = Collation.CeFromCE32(ce32); } else { switch (Collation.TagFromCE32(ce32)) { case Collation.LATIN_EXPANSION_TAG: ce0 = Collation.LatinCE0FromCE32(ce32); ce1 = Collation.LatinCE1FromCE32(ce32); break; case Collation.EXPANSION32_TAG: { int index = Collation.IndexFromCE32(ce32); int length = Collation.LengthFromCE32(ce32); if (length <= 2) { ce0 = Collation.CeFromCE32(data.ce32s[index]); if (length == 2) { ce1 = Collation.CeFromCE32(data.ce32s[index + 1]); } break; } else { return(false); } } case Collation.EXPANSION_TAG: { int index = Collation.IndexFromCE32(ce32); int length = Collation.LengthFromCE32(ce32); if (length <= 2) { ce0 = data.ces[index]; if (length == 2) { ce1 = data.ces[index + 1]; } break; } else { return(false); } } // Note: We could support PREFIX_TAG (assert c>=0) // by recursing on its default CE32 and checking that none of the prefixes starts // with a fast Latin character. // However, currently (2013) there are only the L-before-middle-dot // prefix mappings in the Latin range, and those would be rejected anyway. case Collation.CONTRACTION_TAG: Debug.Assert(c >= 0); return(GetCEsFromContractionCE32(data, ce32)); case Collation.OFFSET_TAG: Debug.Assert(c >= 0); ce0 = data.GetCEFromOffsetCE32(c, ce32); break; default: return(false); } } // A mapping can be completely ignorable. if (ce0 == 0) { return(ce1 == 0); } // We do not support an ignorable ce0 unless it is completely ignorable. long p0 = ce0.TripleShift(32); if (p0 == 0) { return(false); } // We only support primaries up to the Latin script. if (p0 > lastLatinPrimary) { return(false); } // We support non-common secondary and case weights only together with short primaries. int lower32_0 = (int)ce0; if (p0 < firstShortPrimary) { int sc0 = lower32_0 & Collation.SECONDARY_AND_CASE_MASK; if (sc0 != Collation.COMMON_SECONDARY_CE) { return(false); } } // No below-common tertiary weights. if ((lower32_0 & Collation.OnlyTertiaryMask) < Collation.CommonWeight16) { return(false); } if (ce1 != 0) { // Both primaries must be in the same group, // or both must get short mini primaries, // or a short-primary CE is followed by a secondary CE. // This is so that we can test the first primary and use the same mask for both, // and determine for both whether they are variable. long p1 = ce1.TripleShift(32); if (p1 == 0 ? p0 < firstShortPrimary : !InSameGroup(p0, p1)) { return(false); } int lower32_1 = (int)ce1; // No tertiary CEs. if ((lower32_1.TripleShift(16)) == 0) { return(false); } // We support non-common secondary and case weights // only for secondary CEs or together with short primaries. if (p1 != 0 && p1 < firstShortPrimary) { int sc1 = lower32_1 & Collation.SECONDARY_AND_CASE_MASK; if (sc1 != Collation.COMMON_SECONDARY_CE) { return(false); } } // No below-common tertiary weights. if ((lower32_0 & Collation.OnlyTertiaryMask) < Collation.CommonWeight16) { return(false); } } // No quaternary weights. if (((ce0 | ce1) & Collation.QuaternaryMask) != 0) { return(false); } return(true); }
private void Compare(int c, int ce32, int baseCE32) { if (Collation.IsPrefixCE32(ce32)) { int dataIndex = Collation.IndexFromCE32(ce32); ce32 = data.GetFinalCE32(data.GetCE32FromContexts(dataIndex)); if (Collation.IsPrefixCE32(baseCE32)) { int baseIndex = Collation.IndexFromCE32(baseCE32); baseCE32 = baseData.GetFinalCE32(baseData.GetCE32FromContexts(baseIndex)); ComparePrefixes(c, data.contexts, dataIndex + 2, baseData.contexts, baseIndex + 2); } else { AddPrefixes(data, c, data.contexts, dataIndex + 2); } } else if (Collation.IsPrefixCE32(baseCE32)) { int baseIndex = Collation.IndexFromCE32(baseCE32); baseCE32 = baseData.GetFinalCE32(baseData.GetCE32FromContexts(baseIndex)); AddPrefixes(baseData, c, baseData.contexts, baseIndex + 2); } if (Collation.IsContractionCE32(ce32)) { int dataIndex = Collation.IndexFromCE32(ce32); if ((ce32 & Collation.CONTRACT_SINGLE_CP_NO_MATCH) != 0) { ce32 = Collation.NO_CE32; } else { ce32 = data.GetFinalCE32(data.GetCE32FromContexts(dataIndex)); } if (Collation.IsContractionCE32(baseCE32)) { int baseIndex = Collation.IndexFromCE32(baseCE32); if ((baseCE32 & Collation.CONTRACT_SINGLE_CP_NO_MATCH) != 0) { baseCE32 = Collation.NO_CE32; } else { baseCE32 = baseData.GetFinalCE32(baseData.GetCE32FromContexts(baseIndex)); } CompareContractions(c, data.contexts, dataIndex + 2, baseData.contexts, baseIndex + 2); } else { AddContractions(c, data.contexts, dataIndex + 2); } } else if (Collation.IsContractionCE32(baseCE32)) { int baseIndex = Collation.IndexFromCE32(baseCE32); baseCE32 = baseData.GetFinalCE32(baseData.GetCE32FromContexts(baseIndex)); AddContractions(c, baseData.contexts, baseIndex + 2); } int tag; if (Collation.IsSpecialCE32(ce32)) { tag = Collation.TagFromCE32(ce32); Debug.Assert(tag != Collation.PREFIX_TAG); Debug.Assert(tag != Collation.CONTRACTION_TAG); // Currently, the tailoring data builder does not write offset tags. // They might be useful for saving space, // but they would complicate the builder, // and in tailorings we assume that performance of tailored characters is more important. Debug.Assert(tag != Collation.OFFSET_TAG); } else { tag = -1; } int baseTag; if (Collation.IsSpecialCE32(baseCE32)) { baseTag = Collation.TagFromCE32(baseCE32); Debug.Assert(baseTag != Collation.PREFIX_TAG); Debug.Assert(baseTag != Collation.CONTRACTION_TAG); } else { baseTag = -1; } // Non-contextual mappings, expansions, etc. if (baseTag == Collation.OFFSET_TAG) { // We might be comparing a tailoring CE which is a copy of // a base offset-tag CE, via the [optimize [set]] syntax // or when a single-character mapping was copied for tailored contractions. // Offset tags always result in long-primary CEs, // with common secondary/tertiary weights. if (!Collation.IsLongPrimaryCE32(ce32)) { Add(c); return; } long dataCE = baseData.ces[Collation.IndexFromCE32(baseCE32)]; long p = Collation.GetThreeBytePrimaryForOffsetData(c, dataCE); if (Collation.PrimaryFromLongPrimaryCE32(ce32) != p) { Add(c); return; } } if (tag != baseTag) { Add(c); return; } if (tag == Collation.EXPANSION32_TAG) { int length = Collation.LengthFromCE32(ce32); int baseLength = Collation.LengthFromCE32(baseCE32); if (length != baseLength) { Add(c); return; } int idx0 = Collation.IndexFromCE32(ce32); int idx1 = Collation.IndexFromCE32(baseCE32); for (int i = 0; i < length; ++i) { if (data.ce32s[idx0 + i] != baseData.ce32s[idx1 + i]) { Add(c); break; } } } else if (tag == Collation.EXPANSION_TAG) { int length = Collation.LengthFromCE32(ce32); int baseLength = Collation.LengthFromCE32(baseCE32); if (length != baseLength) { Add(c); return; } int idx0 = Collation.IndexFromCE32(ce32); int idx1 = Collation.IndexFromCE32(baseCE32); for (int i = 0; i < length; ++i) { if (data.ces[idx0 + i] != baseData.ces[idx1 + i]) { Add(c); break; } } } else if (tag == Collation.HANGUL_TAG) { StringBuilder jamos = new StringBuilder(); int length = Hangul.Decompose(c, jamos); if (tailored.Contains(jamos[0]) || tailored.Contains(jamos[1]) || (length == 3 && tailored.Contains(jamos[2]))) { Add(c); } } else if (ce32 != baseCE32) { Add(c); } }