public virtual void AddSourceTargetSet(Transliterator transliterator, UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet) { #pragma warning disable 612, 618 UnicodeSet myFilter = transliterator.GetFilterAsUnicodeSet(inputFilter); #pragma warning restore 612, 618 UnicodeSet affectedCharacters = new UnicodeSet(sourceCache).RetainAll(myFilter); sourceSet.AddAll(affectedCharacters); foreach (string s in affectedCharacters) { targetSet.AddAll(transform.Transform(s)); } foreach (string s in sourceStrings) { if (myFilter.ContainsAll(s)) { string t = transform.Transform(s); if (!s.Equals(t)) { targetSet.AddAll(t); sourceSet.AddAll(s); } } } }
/// <seealso cref="Transliterator.AddSourceTargetSet(UnicodeSet, UnicodeSet, UnicodeSet)"/> public override void AddSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet) { // Each form consists of a prefix, suffix, // * radix, minimum digit count, and maximum digit count. These // * values are stored as a five character header. ... UnicodeSet myFilter = GetFilterAsUnicodeSet(inputFilter); UnicodeSet items = new UnicodeSet(); StringBuilder buffer = new StringBuilder(); for (int i = 0; spec[i] != END;) { // first 5 items are header int end = i + spec[i] + spec[i + 1] + 5; int radix = spec[i + 2]; for (int j = 0; j < radix; ++j) { Utility.AppendNumber(buffer, j, radix, 0); } // then add the characters for (int j = i + 5; j < end; ++j) { items.Add(spec[j]); } // and go to next block i = end; } items.AddAll(buffer.ToString()); items.RetainAll(myFilter); if (items.Count > 0) { sourceSet.AddAll(items); targetSet.AddAll(0, 0x10FFFF); // assume we can produce any character } }
/// <seealso cref="Transliterator.AddSourceTargetSet(UnicodeSet, UnicodeSet, UnicodeSet)"/> public override void AddSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet) { UnicodeSet myFilter = GetFilterAsUnicodeSet(inputFilter); if (!myFilter.ContainsAll(UnicodeNameTransliterator.OPEN_DELIM) || !myFilter.Contains(CLOSE_DELIM)) { return; // we have to contain both prefix and suffix } UnicodeSet items = new UnicodeSet() .AddAll('0', '9') .AddAll('A', 'F') .AddAll('a', 'z') // for controls .Add('<').Add('>') // for controls .Add('(').Add(')') // for controls .Add('-') .Add(' ') .AddAll(UnicodeNameTransliterator.OPEN_DELIM) .Add(CLOSE_DELIM); items.RetainAll(myFilter); if (items.Count > 0) { sourceSet.AddAll(items); // could produce any character targetSet.AddAll(0, 0x10FFFF); } }
/// <summary> /// Adds each of the characters in this string to the set. Thus "ch" => {"c", "h"} /// If this set already any particular character, it has no effect on that character. /// </summary> /// <param name="set">This set.</param> /// <param name="s">The source string.</param> /// <returns>this object, for chaining.</returns> /// <draft>ICU4N 60.1</draft> /// <provisional>This API might change or be removed in a future release.</provisional> internal static UnicodeSet AddAll(this UnicodeSet set, ICharSequence s) { if (set == null) { throw new ArgumentNullException(nameof(set)); } return(set.AddAll(s)); }
/// <seealso cref="UnicodeSet.AddAll(UnicodeSet)"/> /// <draft>ICU4N 60.1</draft> /// <provisional>This API might change or be removed in a future release.</provisional> // See ticket #11395, this is safe. public static UnicodeSet AddAll(this UnicodeSet set, params ICharSequence[] collection) { if (set == null) { throw new ArgumentNullException(nameof(set)); } return(set.AddAll(collection)); }
/// <summary> /// Adds each of the characters in this string to the set. Thus "ch" => {"c", "h"} /// If this set already any particular character, it has no effect on that character. /// </summary> /// <param name="set">This set.</param> /// <param name="s">The source string.</param> /// <returns>this object, for chaining.</returns> /// <draft>ICU4N 60.1</draft> /// <provisional>This API might change or be removed in a future release.</provisional> public static UnicodeSet AddAll(this UnicodeSet set, string s) { if (set == null) { throw new ArgumentNullException(nameof(set)); } return(set.AddAll(s)); }
/// <seealso cref="Transliterator.AddSourceTargetSet(UnicodeSet, UnicodeSet, UnicodeSet)"/> public override void AddSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet) { sourceSet.AddAll(GetFilterAsUnicodeSet(inputFilter)); for (EscapeTransliterator it = this; it != null; it = it.supplementalHandler) { if (inputFilter.Count != 0) { targetSet.AddAll(it.prefix); targetSet.AddAll(it.suffix); StringBuilder buffer = new StringBuilder(); for (int i = 0; i < it.radix; ++i) { Utility.AppendNumber(buffer, i, it.radix, it.minDigits); } targetSet.AddAll(buffer.ToString()); // TODO drop once String is changed to CharSequence in UnicodeSet } } }
public CjkBreakEngine(bool korean) : base(BreakIterator.KIND_WORD) { fDictionary = DictionaryData.LoadDictionaryFor("Hira"); if (korean) { SetCharacters(fHangulWordSet); } else { //Chinese and Japanese UnicodeSet cjSet = new UnicodeSet(); cjSet.AddAll(fHanWordSet); cjSet.AddAll(fKatakanaWordSet); cjSet.AddAll(fHiraganaWordSet); cjSet.Add(0xFF70); // HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK cjSet.Add(0x30FC); // KATAKANA-HIRAGANA PROLONGED SOUND MARK SetCharacters(cjSet); } }
/// <seealso cref="Transliterator.AddSourceTargetSet(UnicodeSet, UnicodeSet, UnicodeSet)"/> public override void AddSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet) { UnicodeSet myFilter = GetFilterAsUnicodeSet(inputFilter); // Doesn't actually modify the source characters, so leave them alone. // add the characters inserted if (myFilter.Count != 0) { targetSet.AddAll(insertion); } }
/// <seealso cref="Transliterator.AddSourceTargetSet(UnicodeSet, UnicodeSet, UnicodeSet)"/> public override void AddSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet) { UnicodeSet myFilter = GetFilterAsUnicodeSet(inputFilter); // Assume that it can modify any character to any other character sourceSet.AddAll(myFilter); if (myFilter.Count != 0) { targetSet.AddAll(0, 0x10FFFF); } }
/// <seealso cref="Transliterator.AddSourceTargetSet(UnicodeSet, UnicodeSet, UnicodeSet)"/> #pragma warning disable 672 public override void AddSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet) { #pragma warning restore 672 #pragma warning disable 612, 618 UnicodeSet myFilter = GetFilterAsUnicodeSet(inputFilter); #pragma warning restore 612, 618 // Assume that it can modify any character to any other character sourceSet.AddAll(myFilter); if (myFilter.Count != 0) { targetSet.AddAll(0, 0x10FFFF); } }
// TODO Handle the case where we have :: [a] ; a > |b ; b > c ; // TODO Merge into r.addSourceTargetSet, to avoid duplicate testing internal virtual void AddSourceTargetSet(UnicodeSet filter, UnicodeSet sourceSet, UnicodeSet targetSet) { UnicodeSet currentFilter = new UnicodeSet(filter); UnicodeSet revisiting = new UnicodeSet(); int count = ruleVector.Count; for (int i = 0; i < count; ++i) { TransliterationRule r = ruleVector[i]; r.AddSourceTargetSet(currentFilter, sourceSet, targetSet, revisiting.Clear()); currentFilter.AddAll(revisiting); } }
/// <seealso cref="Transliterator.AddSourceTargetSet(UnicodeSet, UnicodeSet, UnicodeSet)"/> #pragma warning disable 672 public override void AddSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet) #pragma warning restore 672 { #pragma warning disable 612, 618 UnicodeSet myFilter = GetFilterAsUnicodeSet(inputFilter); #pragma warning restore 612, 618 // Doesn't actually modify the source characters, so leave them alone. // add the characters inserted if (myFilter.Count != 0) { targetSet.AddAll(insertion); } }
/// <summary> /// Update the set of unhandled characters for the specified breakType to include /// all that have the same script as <paramref name="c"/>. /// May be called concurrently with <see cref="Handles(int, int)"/> or <see cref="FindBreaks(CharacterIterator, int, int, int, DictionaryBreakEngine.DequeI)"/>. /// Must not be called concurrently with itself. /// </summary> public void HandleChar(int c, int breakType) { if (breakType >= 0 && breakType < fHandled.Length && c != CharacterIteration.Done32) { UnicodeSet originalSet = fHandled[breakType]; if (!originalSet.Contains(c)) { int script = UChar.GetIntPropertyValue(c, UProperty.Script); UnicodeSet newSet = new UnicodeSet(); newSet.ApplyInt32PropertyValue(UProperty.Script, script); newSet.AddAll(originalSet); fHandled[breakType] = newSet; } } }
/// <seealso cref="Transliterator.AddSourceTargetSet(UnicodeSet, UnicodeSet, UnicodeSet)"/> public override void AddSourceTargetSet(UnicodeSet filter, UnicodeSet sourceSet, UnicodeSet targetSet) { UnicodeSet myFilter = new UnicodeSet(GetFilterAsUnicodeSet(filter)); UnicodeSet tempTargetSet = new UnicodeSet(); for (int i = 0; i < trans.Length; ++i) { // each time we produce targets, those can be used by subsequent items, despite the filter. // so we get just those items, and add them to the filter each time. tempTargetSet.Clear(); trans[i].AddSourceTargetSet(myFilter, sourceSet, tempTargetSet); targetSet.AddAll(tempTargetSet); myFilter.AddAll(tempTargetSet); } }
/// <summary> /// Find the source and target sets, subject to the input filter. /// There is a known issue with filters containing multiple characters. /// </summary> // TODO: Problem: the rule is [{ab}]c > x // The filter is [a{bc}]. // If the input is abc, then the rule will work. // However, following code applying the filter won't catch that case. internal void AddSourceTargetSet(UnicodeSet filter, UnicodeSet sourceSet, UnicodeSet targetSet, UnicodeSet revisiting) { int limit = anteContextLength + keyLength; UnicodeSet tempSource = new UnicodeSet(); UnicodeSet temp = new UnicodeSet(); // We need to walk through the pattern. // Iff some of the characters at ALL of the the positions are matched by the filter, then we add temp to toUnionTo for (int i = anteContextLength; i < limit;) { int ch = UTF16.CharAt(pattern, i); i += UTF16.GetCharCount(ch); IUnicodeMatcher matcher = data.LookupMatcher(ch); if (matcher == null) { if (!filter.Contains(ch)) { return; } tempSource.Add(ch); } else { try { if (!filter.ContainsSome((UnicodeSet)matcher)) { return; } matcher.AddMatchSetTo(tempSource); } catch (InvalidCastException) { // if the matcher is not a UnicodeSet temp.Clear(); matcher.AddMatchSetTo(temp); if (!filter.ContainsSome(temp)) { return; } tempSource.AddAll(temp); } } } // if we made our way through the gauntlet, add to source/target sourceSet.AddAll(tempSource); output.AddReplacementSetTo(targetSet); }
/// <seealso cref="Transliterator.AddSourceTargetSet(UnicodeSet, UnicodeSet, UnicodeSet)"/> public override void AddSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet) { UnicodeSet myFilter = GetFilterAsUnicodeSet(inputFilter); if (myFilter.Count > 0) { sourceSet.AddAll(myFilter); targetSet.AddAll('0', '9') .AddAll('A', 'Z') .Add('-') .Add(' ') .AddAll(OPEN_DELIM) .Add(CLOSE_DELIM) .AddAll('a', 'z') // for controls .Add('<').Add('>') // for controls .Add('(').Add(')') // for controls ; } }
/// <seealso cref="Transliterator.AddSourceTargetSet(UnicodeSet, UnicodeSet, UnicodeSet)"/> #pragma warning disable 672 public override void AddSourceTargetSet(UnicodeSet inputFilter, UnicodeSet sourceSet, UnicodeSet targetSet) #pragma warning restore 672 { #pragma warning disable 612, 618 UnicodeSet myFilter = GetFilterAsUnicodeSet(inputFilter); #pragma warning restore 612, 618 if (myFilter.Count > 0) { sourceSet.AddAll(myFilter); targetSet.AddAll('0', '9') .AddAll('A', 'Z') .Add('-') .Add(' ') .AddAll(OPEN_DELIM) .Add(CLOSE_DELIM) .AddAll('a', 'z') // for controls .Add('<').Add('>') // for controls .Add('(').Add(')') // for controls ; } }
/// <summary> /// Union the set of all characters that may output by this object /// into the given set. /// </summary> /// <param name="toUnionTo">The set into which to union the output characters.</param> public virtual void AddReplacementSetTo(UnicodeSet toUnionTo) { toUnionTo.AddAll(translit.GetTargetSet()); }