Пример #1
0
 public void ForData(CollationData d)
 {
     // Add all from the data, can be tailoring or base.
     if (d.Base != null)
     {
         checkTailored = -1;
     }
     data = d;
     using (IEnumerator <Trie2.Range> trieIterator = data.trie.GetEnumerator())
     {
         Trie2.Range range;
         while (trieIterator.MoveNext() && !(range = trieIterator.Current).LeadSurrogate)
         {
             EnumCnERange(range.StartCodePoint, range.EndCodePoint, range.Value, this);
         }
     }
     if (d.Base == null)
     {
         return;
     }
     // Add all from the base data but only for un-tailored code points.
     tailored.Freeze();
     checkTailored = 1;
     data          = d.Base;
     using (IEnumerator <Trie2.Range> trieIterator = data.trie.GetEnumerator())
     {
         Trie2.Range range;
         while (trieIterator.MoveNext() && !(range = trieIterator.Current).LeadSurrogate)
         {
             EnumCnERange(range.StartCodePoint, range.EndCodePoint, range.Value, this);
         }
     }
 }
        /// <summary>Creates a new <see cref="ICUNormalizer2FilterFactory"/>.</summary>
        public ICUNormalizer2FilterFactory(IDictionary <string, string> args)
            : base(args)
        {
            string name = Get(args, "name", "nfkc_cf");
            string mode = Get(args, "mode", new string[] { "compose", "decompose" }, "compose");

            Normalizer2 normalizer = Normalizer2.GetInstance
                                         (null, name, "compose".Equals(mode, StringComparison.Ordinal) ? Normalizer2Mode.Compose : Normalizer2Mode.Decompose);

            string filter = Get(args, "filter");

            if (filter != null)
            {
                UnicodeSet set = new UnicodeSet(filter);
                if (set.Any())
                {
                    set.Freeze();
                    normalizer = new FilteredNormalizer2(normalizer, set);
                }
            }
            if (args.Count > 0)
            {
                throw new ArgumentException(string.Format(J2N.Text.StringFormatter.CurrentCulture, "Unknown parameters: {0}", args));
            }
            this.normalizer = normalizer;
        }
Пример #3
0
        /// <summary>Creates a new <see cref="ICUNormalizer2CharFilterFactory"/>.</summary>
        public ICUNormalizer2CharFilterFactory(IDictionary <string, string> args)
            : base(args)
        {
            string      name       = Get(args, "name", "nfkc_cf");
            string      mode       = Get(args, "mode", new string[] { "compose", "decompose" }, "compose");
            Normalizer2 normalizer = Normalizer2.GetInstance
                                         (null, name, "compose".Equals(mode) ? Normalizer2Mode.Compose : Normalizer2Mode.Decompose);

            string filter = Get(args, "filter");

            if (filter != null)
            {
                UnicodeSet set = new UnicodeSet(filter);
                if (set.Any())
                {
                    set.Freeze();
                    normalizer = new FilteredNormalizer2(normalizer, set);
                }
            }
            if (args.Count > 0)
            {
                throw new ArgumentException("Unknown parameters: " + args);
            }
            this.normalizer = normalizer;
        }
Пример #4
0
        /// <summary>
        /// Constructs for all variants of <see cref="Span(string, int, SpanCondition)"/>, or only for any one variant.
        /// Initializes as little as possible, for single use.
        /// </summary>
        public UnicodeSetStringSpan(UnicodeSet set, IList <string> setStrings, int which)
        {
            spanSet = new UnicodeSet(0, 0x10ffff);
            // TODO: With Java 6, just take the parent set's strings as is,
            // as a NavigableSet<String>, rather than as an ArrayList copy of the set of strings.
            // Then iterate via the first() and higher() methods.
            // (We do not want to create multiple Iterator objects in each span().)
            // See ICU ticket #7454.
            strings = setStrings;
            all     = (which == All);
            spanSet.RetainAll(set);
            if (0 != (which & NotContained))
            {
                // Default to the same sets.
                // addToSpanNotSet() will create a separate set if necessary.
                spanNotSet = spanSet;
            }
            offsets = new OffsetList();

            // Determine if the strings even need to be taken into account at all for span() etc.
            // If any string is relevant, then all strings need to be used for
            // span(longest match) but only the relevant ones for span(while contained).
            // TODO: Possible optimization: Distinguish CONTAINED vs. LONGEST_MATCH
            // and do not store UTF-8 strings if !thisRelevant and CONTAINED.
            // (Only store irrelevant UTF-8 strings for LONGEST_MATCH where they are relevant after all.)
            // Also count the lengths of the UTF-8 versions of the strings for memory allocation.
            int stringsLength = strings.Count;

            int i, spanLength;
            int maxLength16 = 0;

            someRelevant = false;
            for (i = 0; i < stringsLength; ++i)
            {
                string str      = strings[i];
                int    length16 = str.Length;
                spanLength = spanSet.Span(str, SpanCondition.Contained);
                if (spanLength < length16)
                { // Relevant string.
                    someRelevant = true;
                }
                if (/* (0 != (which & UTF16)) && */ length16 > maxLength16)
                {
                    maxLength16 = length16;
                }
            }
            this.maxLength16 = maxLength16;
            if (!someRelevant && (which & WithCount) == 0)
            {
                return;
            }

            // Freeze after checking for the need to use strings at all because freezing
            // a set takes some time and memory which are wasted if there are no relevant strings.
            if (all)
            {
                spanSet.Freeze();
            }

            int spanBackLengthsOffset;

            // Allocate a block of meta data.
            int allocSize;

            if (all)
            {
                // 2 sets of span lengths
                allocSize = stringsLength * (2);
            }
            else
            {
                allocSize = stringsLength; // One set of span lengths.
            }
            spanLengths = new short[allocSize];

            if (all)
            {
                // Store span lengths for all span() variants.
                spanBackLengthsOffset = stringsLength;
            }
            else
            {
                // Store span lengths for only one span() variant.
                spanBackLengthsOffset = 0;
            }

            // Set the meta data and spanNotSet and write the UTF-8 strings.

            for (i = 0; i < stringsLength; ++i)
            {
                string str      = strings[i];
                int    length16 = str.Length;
                spanLength = spanSet.Span(str, SpanCondition.Contained);
                if (spanLength < length16)
                { // Relevant string.
                    if (true /* 0 != (which & UTF16) */)
                    {
                        if (0 != (which & Contained))
                        {
                            if (0 != (which & Forward))
                            {
                                spanLengths[i] = MakeSpanLengthByte(spanLength);
                            }
                            if (0 != (which & Backward))
                            {
                                spanLength = length16
                                             - spanSet.SpanBack(str, length16, SpanCondition.Contained);
                                spanLengths[spanBackLengthsOffset + i] = MakeSpanLengthByte(spanLength);
                            }
                        }
                        else /* not CONTAINED, not all, but NOT_CONTAINED */
                        {
                            spanLengths[i] = spanLengths[spanBackLengthsOffset + i] = 0; // Only store a relevant/irrelevant
                                                                                         // flag.
                        }
                    }
                    if (0 != (which & NotContained))
                    {
                        // Add string start and end code points to the spanNotSet so that
                        // a span(while not contained) stops before any string.
                        int c;
                        if (0 != (which & Forward))
                        {
                            c = str.CodePointAt(0);
                            AddToSpanNotSet(c);
                        }
                        if (0 != (which & Backward))
                        {
                            c = str.CodePointBefore(length16);
                            AddToSpanNotSet(c);
                        }
                    }
                }
                else
                { // Irrelevant string.
                    if (all)
                    {
                        spanLengths[i] = spanLengths[spanBackLengthsOffset + i] = ALL_CP_CONTAINED;
                    }
                    else
                    {
                        // All spanXYZLengths pointers contain the same address.
                        spanLengths[i] = ALL_CP_CONTAINED;
                    }
                }
            }

            // Finish.
            if (all)
            {
                spanNotSet.Freeze();
            }
        }