// TODO Handle the case where we have :: [a] ; a > |b ; b > c ; // TODO Merge into r.addSourceTargetSet, to avoid duplicate testing internal virtual void AddSourceTargetSet(UnicodeSet filter, UnicodeSet sourceSet, UnicodeSet targetSet) { UnicodeSet currentFilter = new UnicodeSet(filter); UnicodeSet revisiting = new UnicodeSet(); int count = ruleVector.Count; for (int i = 0; i < count; ++i) { TransliterationRule r = ruleVector[i]; r.AddSourceTargetSet(currentFilter, sourceSet, targetSet, revisiting.Clear()); currentFilter.AddAll(revisiting); } }
/// <seealso cref="Transliterator.AddSourceTargetSet(UnicodeSet, UnicodeSet, UnicodeSet)"/> public override void AddSourceTargetSet(UnicodeSet filter, UnicodeSet sourceSet, UnicodeSet targetSet) { UnicodeSet myFilter = new UnicodeSet(GetFilterAsUnicodeSet(filter)); UnicodeSet tempTargetSet = new UnicodeSet(); for (int i = 0; i < trans.Length; ++i) { // each time we produce targets, those can be used by subsequent items, despite the filter. // so we get just those items, and add them to the filter each time. tempTargetSet.Clear(); trans[i].AddSourceTargetSet(myFilter, sourceSet, tempTargetSet); targetSet.AddAll(tempTargetSet); myFilter.AddAll(tempTargetSet); } }
/// <summary> /// Find the source and target sets, subject to the input filter. /// There is a known issue with filters containing multiple characters. /// </summary> // TODO: Problem: the rule is [{ab}]c > x // The filter is [a{bc}]. // If the input is abc, then the rule will work. // However, following code applying the filter won't catch that case. internal void AddSourceTargetSet(UnicodeSet filter, UnicodeSet sourceSet, UnicodeSet targetSet, UnicodeSet revisiting) { int limit = anteContextLength + keyLength; UnicodeSet tempSource = new UnicodeSet(); UnicodeSet temp = new UnicodeSet(); // We need to walk through the pattern. // Iff some of the characters at ALL of the the positions are matched by the filter, then we add temp to toUnionTo for (int i = anteContextLength; i < limit;) { int ch = UTF16.CharAt(pattern, i); i += UTF16.GetCharCount(ch); IUnicodeMatcher matcher = data.LookupMatcher(ch); if (matcher == null) { if (!filter.Contains(ch)) { return; } tempSource.Add(ch); } else { try { if (!filter.ContainsSome((UnicodeSet)matcher)) { return; } matcher.AddMatchSetTo(tempSource); } catch (InvalidCastException) { // if the matcher is not a UnicodeSet temp.Clear(); matcher.AddMatchSetTo(temp); if (!filter.ContainsSome(temp)) { return; } tempSource.AddAll(temp); } } } // if we made our way through the gauntlet, add to source/target sourceSet.AddAll(tempSource); output.AddReplacementSetTo(targetSet); }