예제 #1
0
        /// <summary>
        /// Find the source and target sets, subject to the input filter.
        /// There is a known issue with filters containing multiple characters.
        /// </summary>
        // TODO: Problem: the rule is [{ab}]c > x
        // The filter is [a{bc}].
        // If the input is abc, then the rule will work.
        // However, following code applying the filter won't catch that case.
        internal void AddSourceTargetSet(UnicodeSet filter, UnicodeSet sourceSet, UnicodeSet targetSet, UnicodeSet revisiting)
        {
            int        limit      = anteContextLength + keyLength;
            UnicodeSet tempSource = new UnicodeSet();
            UnicodeSet temp       = new UnicodeSet();

            // We need to walk through the pattern.
            // Iff some of the characters at ALL of the the positions are matched by the filter, then we add temp to toUnionTo
            for (int i = anteContextLength; i < limit;)
            {
                int ch = UTF16.CharAt(pattern, i);
                i += UTF16.GetCharCount(ch);
                IUnicodeMatcher matcher = data.LookupMatcher(ch);
                if (matcher == null)
                {
                    if (!filter.Contains(ch))
                    {
                        return;
                    }
                    tempSource.Add(ch);
                }
                else
                {
                    try
                    {
                        if (!filter.ContainsSome((UnicodeSet)matcher))
                        {
                            return;
                        }
                        matcher.AddMatchSetTo(tempSource);
                    }
                    catch (InvalidCastException)
                    { // if the matcher is not a UnicodeSet
                        temp.Clear();
                        matcher.AddMatchSetTo(temp);
                        if (!filter.ContainsSome(temp))
                        {
                            return;
                        }
                        tempSource.AddAll(temp);
                    }
                }
            }
            // if we made our way through the gauntlet, add to source/target
            sourceSet.AddAll(tempSource);
            output.AddReplacementSetTo(targetSet);
        }
예제 #2
0
        /// <summary>
        /// Implementation of <see cref="IUnicodeMatcher"/> API.  Union the set of all
        /// characters that may be matched by this object into the given
        /// set.
        /// </summary>
        /// <param name="toUnionTo">The set into which to union the source characters.</param>
        public virtual void AddMatchSetTo(UnicodeSet toUnionTo)
        {
            int ch;

            for (int i = 0; i < pattern.Length; i += UTF16.GetCharCount(ch))
            {
                ch = UTF16.CharAt(pattern, i);
                IUnicodeMatcher matcher = data.LookupMatcher(ch);
                if (matcher == null)
                {
                    toUnionTo.Add(ch);
                }
                else
                {
                    matcher.AddMatchSetTo(toUnionTo);
                }
            }
        }