Пример #1
0
        private void HandlePrefixes(int start, int end, int ce32)
        {
            int index = Collation.IndexFromCE32(ce32);

            ce32 = data.GetCE32FromContexts(index); // Default if no prefix match.
            HandleCE32(start, end, ce32);
            if (!addPrefixes)
            {
                return;
            }
            using (CharsTrie.Enumerator prefixes = new CharsTrie(data.contexts, index + 2).GetEnumerator())
            {
                while (prefixes.MoveNext())
                {
                    var e = prefixes.Current;
                    SetPrefix(e.Chars);
                    // Prefix/pre-context mappings are special kinds of contractions
                    // that always yield expansions.
                    AddStrings(start, end, contractions);
                    AddStrings(start, end, expansions);
                    HandleCE32(start, end, e.Value);
                }
            }
            ResetPrefix();
        }
        private bool GetCEsFromContractionCE32(CollationData data, int ce32)
        {
            int trieIndex = Collation.IndexFromCE32(ce32);

            ce32 = data.GetCE32FromContexts(trieIndex);  // Default if no suffix match.
                                                         // Since the original ce32 is not a prefix mapping,
                                                         // the default ce32 must not be another contraction.
            Debug.Assert(!Collation.IsContractionCE32(ce32));
            int contractionIndex = contractionCEs.Count;

            if (GetCEsFromCE32(data, Collation.SentinelCodePoint, ce32))
            {
                AddContractionEntry(CollationFastLatin.CONTR_CHAR_MASK, ce0, ce1);
            }
            else
            {
                // Bail out for c-without-contraction.
                AddContractionEntry(CollationFastLatin.CONTR_CHAR_MASK, Collation.NoCE, 0);
            }
            // Handle an encodable contraction unless the next contraction is too long
            // and starts with the same character.
            int  prevX          = -1;
            bool addContraction = false;

            using (CharsTrieEnumerator suffixes = CharsTrie.GetEnumerator(data.contexts, trieIndex + 2, 0))
            {
                while (suffixes.MoveNext())
                {
                    CharsTrieEntry entry  = suffixes.Current;
                    ICharSequence  suffix = entry.Chars;
                    int            x      = CollationFastLatin.GetCharIndex(suffix[0]);
                    if (x < 0)
                    {
                        continue;
                    }                         // ignore anything but fast Latin text
                    if (x == prevX)
                    {
                        if (addContraction)
                        {
                            // Bail out for all contractions starting with this character.
                            AddContractionEntry(x, Collation.NoCE, 0);
                            addContraction = false;
                        }
                        continue;
                    }
                    if (addContraction)
                    {
                        AddContractionEntry(prevX, ce0, ce1);
                    }
                    ce32 = entry.Value;
                    if (suffix.Length == 1 && GetCEsFromCE32(data, Collation.SentinelCodePoint, ce32))
                    {
                        addContraction = true;
                    }
                    else
                    {
                        AddContractionEntry(x, Collation.NoCE, 0);
                        addContraction = false;
                    }
                    prevX = x;
                }
            }
            if (addContraction)
            {
                AddContractionEntry(prevX, ce0, ce1);
            }
            // Note: There might not be any fast Latin contractions, but
            // we need to enter contraction handling anyway so that we can bail out
            // when there is a non-fast-Latin character following.
            // For example: Danish &Y<<u+umlaut, when we compare Y vs. u\u0308 we need to see the
            // following umlaut and bail out, rather than return the difference of Y vs. u.
            ce0 = (Collation.NO_CE_PRIMARY << 32) | CONTRACTION_FLAG | (uint)contractionIndex;
            ce1 = 0;
            return(true);
        }
Пример #3
0
        private void Compare(int c, int ce32, int baseCE32)
        {
            if (Collation.IsPrefixCE32(ce32))
            {
                int dataIndex = Collation.IndexFromCE32(ce32);
                ce32 = data.GetFinalCE32(data.GetCE32FromContexts(dataIndex));
                if (Collation.IsPrefixCE32(baseCE32))
                {
                    int baseIndex = Collation.IndexFromCE32(baseCE32);
                    baseCE32 = baseData.GetFinalCE32(baseData.GetCE32FromContexts(baseIndex));
                    ComparePrefixes(c, data.contexts, dataIndex + 2, baseData.contexts, baseIndex + 2);
                }
                else
                {
                    AddPrefixes(data, c, data.contexts, dataIndex + 2);
                }
            }
            else if (Collation.IsPrefixCE32(baseCE32))
            {
                int baseIndex = Collation.IndexFromCE32(baseCE32);
                baseCE32 = baseData.GetFinalCE32(baseData.GetCE32FromContexts(baseIndex));
                AddPrefixes(baseData, c, baseData.contexts, baseIndex + 2);
            }

            if (Collation.IsContractionCE32(ce32))
            {
                int dataIndex = Collation.IndexFromCE32(ce32);
                if ((ce32 & Collation.CONTRACT_SINGLE_CP_NO_MATCH) != 0)
                {
                    ce32 = Collation.NO_CE32;
                }
                else
                {
                    ce32 = data.GetFinalCE32(data.GetCE32FromContexts(dataIndex));
                }
                if (Collation.IsContractionCE32(baseCE32))
                {
                    int baseIndex = Collation.IndexFromCE32(baseCE32);
                    if ((baseCE32 & Collation.CONTRACT_SINGLE_CP_NO_MATCH) != 0)
                    {
                        baseCE32 = Collation.NO_CE32;
                    }
                    else
                    {
                        baseCE32 = baseData.GetFinalCE32(baseData.GetCE32FromContexts(baseIndex));
                    }
                    CompareContractions(c, data.contexts, dataIndex + 2, baseData.contexts, baseIndex + 2);
                }
                else
                {
                    AddContractions(c, data.contexts, dataIndex + 2);
                }
            }
            else if (Collation.IsContractionCE32(baseCE32))
            {
                int baseIndex = Collation.IndexFromCE32(baseCE32);
                baseCE32 = baseData.GetFinalCE32(baseData.GetCE32FromContexts(baseIndex));
                AddContractions(c, baseData.contexts, baseIndex + 2);
            }

            int tag;

            if (Collation.IsSpecialCE32(ce32))
            {
                tag = Collation.TagFromCE32(ce32);
                Debug.Assert(tag != Collation.PREFIX_TAG);
                Debug.Assert(tag != Collation.CONTRACTION_TAG);
                // Currently, the tailoring data builder does not write offset tags.
                // They might be useful for saving space,
                // but they would complicate the builder,
                // and in tailorings we assume that performance of tailored characters is more important.
                Debug.Assert(tag != Collation.OFFSET_TAG);
            }
            else
            {
                tag = -1;
            }
            int baseTag;

            if (Collation.IsSpecialCE32(baseCE32))
            {
                baseTag = Collation.TagFromCE32(baseCE32);
                Debug.Assert(baseTag != Collation.PREFIX_TAG);
                Debug.Assert(baseTag != Collation.CONTRACTION_TAG);
            }
            else
            {
                baseTag = -1;
            }

            // Non-contextual mappings, expansions, etc.
            if (baseTag == Collation.OFFSET_TAG)
            {
                // We might be comparing a tailoring CE which is a copy of
                // a base offset-tag CE, via the [optimize [set]] syntax
                // or when a single-character mapping was copied for tailored contractions.
                // Offset tags always result in long-primary CEs,
                // with common secondary/tertiary weights.
                if (!Collation.IsLongPrimaryCE32(ce32))
                {
                    Add(c);
                    return;
                }
                long dataCE = baseData.ces[Collation.IndexFromCE32(baseCE32)];
                long p      = Collation.GetThreeBytePrimaryForOffsetData(c, dataCE);
                if (Collation.PrimaryFromLongPrimaryCE32(ce32) != p)
                {
                    Add(c);
                    return;
                }
            }

            if (tag != baseTag)
            {
                Add(c);
                return;
            }

            if (tag == Collation.EXPANSION32_TAG)
            {
                int length     = Collation.LengthFromCE32(ce32);
                int baseLength = Collation.LengthFromCE32(baseCE32);

                if (length != baseLength)
                {
                    Add(c);
                    return;
                }

                int idx0 = Collation.IndexFromCE32(ce32);
                int idx1 = Collation.IndexFromCE32(baseCE32);

                for (int i = 0; i < length; ++i)
                {
                    if (data.ce32s[idx0 + i] != baseData.ce32s[idx1 + i])
                    {
                        Add(c);
                        break;
                    }
                }
            }
            else if (tag == Collation.EXPANSION_TAG)
            {
                int length     = Collation.LengthFromCE32(ce32);
                int baseLength = Collation.LengthFromCE32(baseCE32);

                if (length != baseLength)
                {
                    Add(c);
                    return;
                }

                int idx0 = Collation.IndexFromCE32(ce32);
                int idx1 = Collation.IndexFromCE32(baseCE32);

                for (int i = 0; i < length; ++i)
                {
                    if (data.ces[idx0 + i] != baseData.ces[idx1 + i])
                    {
                        Add(c);
                        break;
                    }
                }
            }
            else if (tag == Collation.HANGUL_TAG)
            {
                StringBuilder jamos  = new StringBuilder();
                int           length = Hangul.Decompose(c, jamos);
                if (tailored.Contains(jamos[0]) || tailored.Contains(jamos[1]) ||
                    (length == 3 && tailored.Contains(jamos[2])))
                {
                    Add(c);
                }
            }
            else if (ce32 != baseCE32)
            {
                Add(c);
            }
        }