/// <summary> /// Implement UnicodeMatcher /// </summary> /// public virtual String ToPattern(bool escapeUnprintable) { StringBuilder result = new StringBuilder(); StringBuilder quoteBuf = new StringBuilder(); if (segmentNumber > 0) // i.e., if this is a segment { result.Append('('); } for (int i = 0; i < pattern.Length; ++i) { char keyChar = pattern[i]; // OK; see note (1) above UnicodeMatcher m = data.LookupMatcher(keyChar); if (m == null) { IBM.ICU.Impl.Utility.AppendToRule(result, keyChar, false, escapeUnprintable, quoteBuf); } else { IBM.ICU.Impl.Utility.AppendToRule(result, m.ToPattern(escapeUnprintable), true, escapeUnprintable, quoteBuf); } } if (segmentNumber > 0) // i.e., if this is a segment { result.Append(')'); } // Flush quoteBuf out to result IBM.ICU.Impl.Utility.AppendToRule(result, -1, true, escapeUnprintable, quoteBuf); return(result.ToString()); }
/// <summary> /// Internal method. Returns true if this rule matches the given index value. /// The index value is an 8-bit integer, 0..255, representing the low byte of /// the first character of the key. It matches this rule if it matches the /// first character of the key, or if the first character of the key is a /// set, and the set contains any character with a low byte equal to the /// index value. If the rule contains only ante context, as in foo)>bar, then /// it will match any key. /// </summary> /// internal bool MatchesIndexValue(int v) { // Delegate to the key, or if there is none, to the postContext. // If there is neither then we match any key; return true. UnicodeMatcher m = (key != null) ? key : postContext; return((m != null) ? m.MatchesIndexValue(v) : true); }
/// <summary> /// Given a matcher reference, which may be null, append its pattern as a /// literal to the given rule. /// </summary> /// public static void AppendToRule(StringBuilder rule, UnicodeMatcher matcher, bool escapeUnprintable, StringBuilder quoteBuf) { if (matcher != null) { AppendToRule(rule, matcher.ToPattern(escapeUnprintable), true, escapeUnprintable, quoteBuf); } }
/// <summary> /// Implement UnicodeMatcher /// </summary> /// public virtual bool MatchesIndexValue(int v) { if (pattern.Length == 0) { return(true); } int c = IBM.ICU.Text.UTF16.CharAt(pattern, 0); UnicodeMatcher m = data.LookupMatcher(c); return((m == null) ? ((c & 0xFF) == v) : m.MatchesIndexValue(v)); }
public Quantifier(UnicodeMatcher theMatcher, int theMinCount, int theMaxCount) { if (theMatcher == null || minCount < 0 || maxCount < 0 || minCount > maxCount) { throw new ArgumentException(); } matcher = theMatcher; minCount = theMinCount; maxCount = theMaxCount; }
/// <summary> /// Implementation of UnicodeMatcher API. Union the set of all characters /// that may be matched by this object into the given set. /// </summary> /// /// <param name="toUnionTo">the set into which to union the source characters</param> public virtual void AddMatchSetTo(UnicodeSet toUnionTo) { int ch; for (int i = 0; i < pattern.Length; i += IBM.ICU.Text.UTF16.GetCharCount(ch)) { ch = IBM.ICU.Text.UTF16.CharAt(pattern, i); UnicodeMatcher matcher = data.LookupMatcher(ch); if (matcher == null) { toUnionTo.Add(ch); } else { matcher.AddMatchSetTo(toUnionTo); } } }
/// <summary> /// Union the set of all characters that may be modified by this rule into /// the given set. /// </summary> /// internal void AddSourceSetTo(UnicodeSet toUnionTo) { int limit = anteContextLength + keyLength; for (int i = anteContextLength; i < limit;) { int ch = IBM.ICU.Text.UTF16.CharAt(pattern, i); i += IBM.ICU.Text.UTF16.GetCharCount(ch); UnicodeMatcher matcher = data.LookupMatcher(ch); if (matcher == null) { toUnionTo.Add(ch); } else { matcher.AddMatchSetTo(toUnionTo); } } }
/// <summary> /// Implement UnicodeMatcher /// </summary> /// public virtual int Matches(Replaceable text, int[] offset, int limit, bool incremental) { // Note (1): We process text in 16-bit code units, rather than // 32-bit code points. This works because stand-ins are // always in the BMP and because we are doing a literal match // operation, which can be done 16-bits at a time. int i; int[] cursor = new int[] { offset[0] }; if (limit < cursor[0]) { // Match in the reverse direction for (i = pattern.Length - 1; i >= 0; --i) { char keyChar = pattern[i]; // OK; see note (1) above UnicodeMatcher subm = data.LookupMatcher(keyChar); if (subm == null) { if (cursor[0] > limit && keyChar == text.CharAt(cursor[0])) // OK; // see // note // (1) // above { --cursor[0]; } else { return(IBM.ICU.Text.UnicodeMatcher_Constants.U_MISMATCH); } } else { int m = subm.Matches(text, cursor, limit, incremental); if (m != IBM.ICU.Text.UnicodeMatcher_Constants.U_MATCH) { return(m); } } } // Record the match position, but adjust for a normal // forward start, limit, and only if a prior match does not // exist -- we want the rightmost match. if (matchStart < 0) { matchStart = cursor[0] + 1; matchLimit = offset[0] + 1; } } else { for (i = 0; i < pattern.Length; ++i) { if (incremental && cursor[0] == limit) { // We've reached the context limit without a mismatch and // without completing our match. return(IBM.ICU.Text.UnicodeMatcher_Constants.U_PARTIAL_MATCH); } char keyChar_0 = pattern[i]; // OK; see note (1) above UnicodeMatcher subm_1 = data.LookupMatcher(keyChar_0); if (subm_1 == null) { // Don't need the cursor < limit check if // incremental is true (because it's done above); do need // it otherwise. if (cursor[0] < limit && keyChar_0 == text.CharAt(cursor[0])) // OK; // see // note // (1) // above { ++cursor[0]; } else { return(IBM.ICU.Text.UnicodeMatcher_Constants.U_MISMATCH); } } else { int m_2 = subm_1.Matches(text, cursor, limit, incremental); if (m_2 != IBM.ICU.Text.UnicodeMatcher_Constants.U_MATCH) { return(m_2); } } } // Record the match position matchStart = offset[0]; matchLimit = cursor[0]; } offset[0] = cursor[0]; return(IBM.ICU.Text.UnicodeMatcher_Constants.U_MATCH); }