Exemplo n.º 1
0
        public virtual void AddSourceTargetSet(Transliterator transliterator, UnicodeSet inputFilter, UnicodeSet sourceSet,
                                               UnicodeSet targetSet)
        {
#pragma warning disable 612, 618
            UnicodeSet myFilter = transliterator.GetFilterAsUnicodeSet(inputFilter);
#pragma warning restore 612, 618
            UnicodeSet affectedCharacters = new UnicodeSet(sourceCache).RetainAll(myFilter);
            sourceSet.AddAll(affectedCharacters);
            foreach (string s in affectedCharacters)
            {
                targetSet.AddAll(transform.Transform(s));
            }
            foreach (string s in sourceStrings)
            {
                if (myFilter.ContainsAll(s))
                {
                    string t = transform.Transform(s);
                    if (!s.Equals(t))
                    {
                        targetSet.AddAll(t);
                        sourceSet.AddAll(s);
                    }
                }
            }
        }
Exemplo n.º 2
0
        /// <seealso cref="Transliterator.AddSourceTargetSet(UnicodeSet, UnicodeSet, UnicodeSet)"/>
        public override void AddSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet)
        {
            // Each form consists of a prefix, suffix,
            // * radix, minimum digit count, and maximum digit count.  These
            // * values are stored as a five character header. ...
            UnicodeSet    myFilter = GetFilterAsUnicodeSet(inputFilter);
            UnicodeSet    items    = new UnicodeSet();
            StringBuilder buffer   = new StringBuilder();

            for (int i = 0; spec[i] != END;)
            {
                // first 5 items are header
                int end   = i + spec[i] + spec[i + 1] + 5;
                int radix = spec[i + 2];
                for (int j = 0; j < radix; ++j)
                {
                    Utility.AppendNumber(buffer, j, radix, 0);
                }
                // then add the characters
                for (int j = i + 5; j < end; ++j)
                {
                    items.Add(spec[j]);
                }
                // and go to next block
                i = end;
            }
            items.AddAll(buffer.ToString());
            items.RetainAll(myFilter);

            if (items.Count > 0)
            {
                sourceSet.AddAll(items);
                targetSet.AddAll(0, 0x10FFFF); // assume we can produce any character
            }
        }
Exemplo n.º 3
0
        /// <seealso cref="Transliterator.AddSourceTargetSet(UnicodeSet, UnicodeSet, UnicodeSet)"/>
        public override void AddSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet)
        {
            UnicodeSet myFilter = GetFilterAsUnicodeSet(inputFilter);

            if (!myFilter.ContainsAll(UnicodeNameTransliterator.OPEN_DELIM) || !myFilter.Contains(CLOSE_DELIM))
            {
                return; // we have to contain both prefix and suffix
            }
            UnicodeSet items = new UnicodeSet()
                               .AddAll('0', '9')
                               .AddAll('A', 'F')
                               .AddAll('a', 'z')  // for controls
                               .Add('<').Add('>') // for controls
                               .Add('(').Add(')') // for controls
                               .Add('-')
                               .Add(' ')
                               .AddAll(UnicodeNameTransliterator.OPEN_DELIM)
                               .Add(CLOSE_DELIM);

            items.RetainAll(myFilter);
            if (items.Count > 0)
            {
                sourceSet.AddAll(items);
                // could produce any character
                targetSet.AddAll(0, 0x10FFFF);
            }
        }
Exemplo n.º 4
0
 /// <summary>
 /// Adds each of the characters in this string to the set. Thus "ch" =&gt; {"c", "h"}
 /// If this set already any particular character, it has no effect on that character.
 /// </summary>
 /// <param name="set">This set.</param>
 /// <param name="s">The source string.</param>
 /// <returns>this object, for chaining.</returns>
 /// <draft>ICU4N 60.1</draft>
 /// <provisional>This API might change or be removed in a future release.</provisional>
 internal static UnicodeSet AddAll(this UnicodeSet set, ICharSequence s)
 {
     if (set == null)
     {
         throw new ArgumentNullException(nameof(set));
     }
     return(set.AddAll(s));
 }
Exemplo n.º 5
0
        /// <seealso cref="UnicodeSet.AddAll(UnicodeSet)"/>
        /// <draft>ICU4N 60.1</draft>
        /// <provisional>This API might change or be removed in a future release.</provisional>
        // See ticket #11395, this is safe.

        public static UnicodeSet AddAll(this UnicodeSet set, params ICharSequence[] collection)
        {
            if (set == null)
            {
                throw new ArgumentNullException(nameof(set));
            }
            return(set.AddAll(collection));
        }
Exemplo n.º 6
0
 /// <summary>
 /// Adds each of the characters in this string to the set. Thus "ch" =&gt; {"c", "h"}
 /// If this set already any particular character, it has no effect on that character.
 /// </summary>
 /// <param name="set">This set.</param>
 /// <param name="s">The source string.</param>
 /// <returns>this object, for chaining.</returns>
 /// <draft>ICU4N 60.1</draft>
 /// <provisional>This API might change or be removed in a future release.</provisional>
 public static UnicodeSet AddAll(this UnicodeSet set, string s)
 {
     if (set == null)
     {
         throw new ArgumentNullException(nameof(set));
     }
     return(set.AddAll(s));
 }
Exemplo n.º 7
0
 /// <seealso cref="Transliterator.AddSourceTargetSet(UnicodeSet, UnicodeSet, UnicodeSet)"/>
 public override void AddSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet)
 {
     sourceSet.AddAll(GetFilterAsUnicodeSet(inputFilter));
     for (EscapeTransliterator it = this; it != null; it = it.supplementalHandler)
     {
         if (inputFilter.Count != 0)
         {
             targetSet.AddAll(it.prefix);
             targetSet.AddAll(it.suffix);
             StringBuilder buffer = new StringBuilder();
             for (int i = 0; i < it.radix; ++i)
             {
                 Utility.AppendNumber(buffer, i, it.radix, it.minDigits);
             }
             targetSet.AddAll(buffer.ToString()); // TODO drop once String is changed to CharSequence in UnicodeSet
         }
     }
 }
Exemplo n.º 8
0
 public CjkBreakEngine(bool korean)
     : base(BreakIterator.KIND_WORD)
 {
     fDictionary = DictionaryData.LoadDictionaryFor("Hira");
     if (korean)
     {
         SetCharacters(fHangulWordSet);
     }
     else
     { //Chinese and Japanese
         UnicodeSet cjSet = new UnicodeSet();
         cjSet.AddAll(fHanWordSet);
         cjSet.AddAll(fKatakanaWordSet);
         cjSet.AddAll(fHiraganaWordSet);
         cjSet.Add(0xFF70); // HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
         cjSet.Add(0x30FC); // KATAKANA-HIRAGANA PROLONGED SOUND MARK
         SetCharacters(cjSet);
     }
 }
Exemplo n.º 9
0
        /// <seealso cref="Transliterator.AddSourceTargetSet(UnicodeSet, UnicodeSet, UnicodeSet)"/>
        public override void AddSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet)
        {
            UnicodeSet myFilter = GetFilterAsUnicodeSet(inputFilter);

            // Doesn't actually modify the source characters, so leave them alone.
            // add the characters inserted
            if (myFilter.Count != 0)
            {
                targetSet.AddAll(insertion);
            }
        }
Exemplo n.º 10
0
        /// <seealso cref="Transliterator.AddSourceTargetSet(UnicodeSet, UnicodeSet, UnicodeSet)"/>
        public override void AddSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet)
        {
            UnicodeSet myFilter = GetFilterAsUnicodeSet(inputFilter);

            // Assume that it can modify any character to any other character
            sourceSet.AddAll(myFilter);
            if (myFilter.Count != 0)
            {
                targetSet.AddAll(0, 0x10FFFF);
            }
        }
Exemplo n.º 11
0
        /// <seealso cref="Transliterator.AddSourceTargetSet(UnicodeSet, UnicodeSet, UnicodeSet)"/>
#pragma warning disable 672
        public override void AddSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet)
        {
#pragma warning restore 672
#pragma warning disable 612, 618
            UnicodeSet myFilter = GetFilterAsUnicodeSet(inputFilter);
#pragma warning restore 612, 618
            // Assume that it can modify any character to any other character
            sourceSet.AddAll(myFilter);
            if (myFilter.Count != 0)
            {
                targetSet.AddAll(0, 0x10FFFF);
            }
        }
Exemplo n.º 12
0
        // TODO Handle the case where we have :: [a] ; a > |b ; b > c ;
        // TODO Merge into r.addSourceTargetSet, to avoid duplicate testing
        internal virtual void AddSourceTargetSet(UnicodeSet filter, UnicodeSet sourceSet, UnicodeSet targetSet)
        {
            UnicodeSet currentFilter = new UnicodeSet(filter);
            UnicodeSet revisiting    = new UnicodeSet();
            int        count         = ruleVector.Count;

            for (int i = 0; i < count; ++i)
            {
                TransliterationRule r = ruleVector[i];
                r.AddSourceTargetSet(currentFilter, sourceSet, targetSet, revisiting.Clear());
                currentFilter.AddAll(revisiting);
            }
        }
Exemplo n.º 13
0
        /// <seealso cref="Transliterator.AddSourceTargetSet(UnicodeSet, UnicodeSet, UnicodeSet)"/>
#pragma warning disable 672
        public override void AddSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet)
#pragma warning restore 672
        {
#pragma warning disable 612, 618
            UnicodeSet myFilter = GetFilterAsUnicodeSet(inputFilter);
#pragma warning restore 612, 618
            // Doesn't actually modify the source characters, so leave them alone.
            // add the characters inserted
            if (myFilter.Count != 0)
            {
                targetSet.AddAll(insertion);
            }
        }
Exemplo n.º 14
0
 /// <summary>
 /// Update the set of unhandled characters for the specified breakType to include
 /// all that have the same script as <paramref name="c"/>.
 /// May be called concurrently with <see cref="Handles(int, int)"/> or <see cref="FindBreaks(CharacterIterator, int, int, int, DictionaryBreakEngine.DequeI)"/>.
 /// Must not be called concurrently with itself.
 /// </summary>
 public void HandleChar(int c, int breakType)
 {
     if (breakType >= 0 && breakType < fHandled.Length && c != CharacterIteration.Done32)
     {
         UnicodeSet originalSet = fHandled[breakType];
         if (!originalSet.Contains(c))
         {
             int        script = UChar.GetIntPropertyValue(c, UProperty.Script);
             UnicodeSet newSet = new UnicodeSet();
             newSet.ApplyInt32PropertyValue(UProperty.Script, script);
             newSet.AddAll(originalSet);
             fHandled[breakType] = newSet;
         }
     }
 }
Exemplo n.º 15
0
        /// <seealso cref="Transliterator.AddSourceTargetSet(UnicodeSet, UnicodeSet, UnicodeSet)"/>
        public override void AddSourceTargetSet(UnicodeSet filter, UnicodeSet sourceSet, UnicodeSet targetSet)
        {
            UnicodeSet myFilter      = new UnicodeSet(GetFilterAsUnicodeSet(filter));
            UnicodeSet tempTargetSet = new UnicodeSet();

            for (int i = 0; i < trans.Length; ++i)
            {
                // each time we produce targets, those can be used by subsequent items, despite the filter.
                // so we get just those items, and add them to the filter each time.
                tempTargetSet.Clear();
                trans[i].AddSourceTargetSet(myFilter, sourceSet, tempTargetSet);
                targetSet.AddAll(tempTargetSet);
                myFilter.AddAll(tempTargetSet);
            }
        }
Exemplo n.º 16
0
        /// <summary>
        /// Find the source and target sets, subject to the input filter.
        /// There is a known issue with filters containing multiple characters.
        /// </summary>
        // TODO: Problem: the rule is [{ab}]c > x
        // The filter is [a{bc}].
        // If the input is abc, then the rule will work.
        // However, following code applying the filter won't catch that case.
        internal void AddSourceTargetSet(UnicodeSet filter, UnicodeSet sourceSet, UnicodeSet targetSet, UnicodeSet revisiting)
        {
            int        limit      = anteContextLength + keyLength;
            UnicodeSet tempSource = new UnicodeSet();
            UnicodeSet temp       = new UnicodeSet();

            // We need to walk through the pattern.
            // Iff some of the characters at ALL of the the positions are matched by the filter, then we add temp to toUnionTo
            for (int i = anteContextLength; i < limit;)
            {
                int ch = UTF16.CharAt(pattern, i);
                i += UTF16.GetCharCount(ch);
                IUnicodeMatcher matcher = data.LookupMatcher(ch);
                if (matcher == null)
                {
                    if (!filter.Contains(ch))
                    {
                        return;
                    }
                    tempSource.Add(ch);
                }
                else
                {
                    try
                    {
                        if (!filter.ContainsSome((UnicodeSet)matcher))
                        {
                            return;
                        }
                        matcher.AddMatchSetTo(tempSource);
                    }
                    catch (InvalidCastException)
                    { // if the matcher is not a UnicodeSet
                        temp.Clear();
                        matcher.AddMatchSetTo(temp);
                        if (!filter.ContainsSome(temp))
                        {
                            return;
                        }
                        tempSource.AddAll(temp);
                    }
                }
            }
            // if we made our way through the gauntlet, add to source/target
            sourceSet.AddAll(tempSource);
            output.AddReplacementSetTo(targetSet);
        }
Exemplo n.º 17
0
        /// <seealso cref="Transliterator.AddSourceTargetSet(UnicodeSet, UnicodeSet, UnicodeSet)"/>
        public override void AddSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet)
        {
            UnicodeSet myFilter = GetFilterAsUnicodeSet(inputFilter);

            if (myFilter.Count > 0)
            {
                sourceSet.AddAll(myFilter);
                targetSet.AddAll('0', '9')
                .AddAll('A', 'Z')
                .Add('-')
                .Add(' ')
                .AddAll(OPEN_DELIM)
                .Add(CLOSE_DELIM)
                .AddAll('a', 'z')  // for controls
                .Add('<').Add('>') // for controls
                .Add('(').Add(')') // for controls
                ;
            }
        }
Exemplo n.º 18
0
        /// <seealso cref="Transliterator.AddSourceTargetSet(UnicodeSet, UnicodeSet, UnicodeSet)"/>
#pragma warning disable 672
        public override void AddSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet)
#pragma warning restore 672
        {
#pragma warning disable 612, 618
            UnicodeSet myFilter = GetFilterAsUnicodeSet(inputFilter);
#pragma warning restore 612, 618
            if (myFilter.Count > 0)
            {
                sourceSet.AddAll(myFilter);
                targetSet.AddAll('0', '9')
                .AddAll('A', 'Z')
                .Add('-')
                .Add(' ')
                .AddAll(OPEN_DELIM)
                .Add(CLOSE_DELIM)
                .AddAll('a', 'z')  // for controls
                .Add('<').Add('>') // for controls
                .Add('(').Add(')') // for controls
                ;
            }
        }
Exemplo n.º 19
0
 /// <summary>
 /// Union the set of all characters that may output by this object
 /// into the given set.
 /// </summary>
 /// <param name="toUnionTo">The set into which to union the output characters.</param>
 public virtual void AddReplacementSetTo(UnicodeSet toUnionTo)
 {
     toUnionTo.AddAll(translit.GetTargetSet());
 }