/// <summary> /// Find the source and target sets, subject to the input filter. /// There is a known issue with filters containing multiple characters. /// </summary> // TODO: Problem: the rule is [{ab}]c > x // The filter is [a{bc}]. // If the input is abc, then the rule will work. // However, following code applying the filter won't catch that case. internal void AddSourceTargetSet(UnicodeSet filter, UnicodeSet sourceSet, UnicodeSet targetSet, UnicodeSet revisiting) { int limit = anteContextLength + keyLength; UnicodeSet tempSource = new UnicodeSet(); UnicodeSet temp = new UnicodeSet(); // We need to walk through the pattern. // Iff some of the characters at ALL of the the positions are matched by the filter, then we add temp to toUnionTo for (int i = anteContextLength; i < limit;) { int ch = UTF16.CharAt(pattern, i); i += UTF16.GetCharCount(ch); IUnicodeMatcher matcher = data.LookupMatcher(ch); if (matcher == null) { if (!filter.Contains(ch)) { return; } tempSource.Add(ch); } else { try { if (!filter.ContainsSome((UnicodeSet)matcher)) { return; } matcher.AddMatchSetTo(tempSource); } catch (InvalidCastException) { // if the matcher is not a UnicodeSet temp.Clear(); matcher.AddMatchSetTo(temp); if (!filter.ContainsSome(temp)) { return; } tempSource.AddAll(temp); } } } // if we made our way through the gauntlet, add to source/target sourceSet.AddAll(tempSource); output.AddReplacementSetTo(targetSet); }
/// <summary> /// Implementation of <see cref="IUnicodeMatcher"/> API. Union the set of all /// characters that may be matched by this object into the given /// set. /// </summary> /// <param name="toUnionTo">The set into which to union the source characters.</param> public virtual void AddMatchSetTo(UnicodeSet toUnionTo) { int ch; for (int i = 0; i < pattern.Length; i += UTF16.GetCharCount(ch)) { ch = UTF16.CharAt(pattern, i); IUnicodeMatcher matcher = data.LookupMatcher(ch); if (matcher == null) { toUnionTo.Add(ch); } else { matcher.AddMatchSetTo(toUnionTo); } } }