Ejemplo n.º 1
0
        /// <summary>
        /// Implement UnicodeMatcher
        /// </summary>
        ///
        public virtual String ToPattern(bool escapeUnprintable)
        {
            StringBuilder result   = new StringBuilder();
            StringBuilder quoteBuf = new StringBuilder();

            if (segmentNumber > 0)       // i.e., if this is a segment
            {
                result.Append('(');
            }
            for (int i = 0; i < pattern.Length; ++i)
            {
                char           keyChar = pattern[i]; // OK; see note (1) above
                UnicodeMatcher m       = data.LookupMatcher(keyChar);
                if (m == null)
                {
                    IBM.ICU.Impl.Utility.AppendToRule(result, keyChar, false, escapeUnprintable,
                                                      quoteBuf);
                }
                else
                {
                    IBM.ICU.Impl.Utility.AppendToRule(result, m.ToPattern(escapeUnprintable),
                                                      true, escapeUnprintable, quoteBuf);
                }
            }
            if (segmentNumber > 0)       // i.e., if this is a segment
            {
                result.Append(')');
            }
            // Flush quoteBuf out to result
            IBM.ICU.Impl.Utility.AppendToRule(result, -1, true, escapeUnprintable, quoteBuf);
            return(result.ToString());
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Internal method. Returns true if this rule matches the given index value.
        /// The index value is an 8-bit integer, 0..255, representing the low byte of
        /// the first character of the key. It matches this rule if it matches the
        /// first character of the key, or if the first character of the key is a
        /// set, and the set contains any character with a low byte equal to the
        /// index value. If the rule contains only ante context, as in foo)>bar, then
        /// it will match any key.
        /// </summary>
        ///
        internal bool MatchesIndexValue(int v)
        {
            // Delegate to the key, or if there is none, to the postContext.
            // If there is neither then we match any key; return true.
            UnicodeMatcher m = (key != null) ? key : postContext;

            return((m != null) ? m.MatchesIndexValue(v) : true);
        }
Ejemplo n.º 3
0
 /// <summary>
 /// Given a matcher reference, which may be null, append its pattern as a
 /// literal to the given rule.
 /// </summary>
 ///
 public static void AppendToRule(StringBuilder rule, UnicodeMatcher matcher,
                                 bool escapeUnprintable, StringBuilder quoteBuf)
 {
     if (matcher != null)
     {
         AppendToRule(rule, matcher.ToPattern(escapeUnprintable), true,
                      escapeUnprintable, quoteBuf);
     }
 }
Ejemplo n.º 4
0
        /// <summary>
        /// Implement UnicodeMatcher
        /// </summary>
        ///
        public virtual bool MatchesIndexValue(int v)
        {
            if (pattern.Length == 0)
            {
                return(true);
            }
            int            c = IBM.ICU.Text.UTF16.CharAt(pattern, 0);
            UnicodeMatcher m = data.LookupMatcher(c);

            return((m == null) ? ((c & 0xFF) == v) : m.MatchesIndexValue(v));
        }
Ejemplo n.º 5
0
 public Quantifier(UnicodeMatcher theMatcher, int theMinCount,
                   int theMaxCount)
 {
     if (theMatcher == null || minCount < 0 || maxCount < 0 ||
         minCount > maxCount)
     {
         throw new ArgumentException();
     }
     matcher  = theMatcher;
     minCount = theMinCount;
     maxCount = theMaxCount;
 }
Ejemplo n.º 6
0
        /// <summary>
        /// Implementation of UnicodeMatcher API. Union the set of all characters
        /// that may be matched by this object into the given set.
        /// </summary>
        ///
        /// <param name="toUnionTo">the set into which to union the source characters</param>
        public virtual void AddMatchSetTo(UnicodeSet toUnionTo)
        {
            int ch;

            for (int i = 0; i < pattern.Length; i += IBM.ICU.Text.UTF16.GetCharCount(ch))
            {
                ch = IBM.ICU.Text.UTF16.CharAt(pattern, i);
                UnicodeMatcher matcher = data.LookupMatcher(ch);
                if (matcher == null)
                {
                    toUnionTo.Add(ch);
                }
                else
                {
                    matcher.AddMatchSetTo(toUnionTo);
                }
            }
        }
Ejemplo n.º 7
0
        /// <summary>
        /// Union the set of all characters that may be modified by this rule into
        /// the given set.
        /// </summary>
        ///
        internal void AddSourceSetTo(UnicodeSet toUnionTo)
        {
            int limit = anteContextLength + keyLength;

            for (int i = anteContextLength; i < limit;)
            {
                int ch = IBM.ICU.Text.UTF16.CharAt(pattern, i);
                i += IBM.ICU.Text.UTF16.GetCharCount(ch);
                UnicodeMatcher matcher = data.LookupMatcher(ch);
                if (matcher == null)
                {
                    toUnionTo.Add(ch);
                }
                else
                {
                    matcher.AddMatchSetTo(toUnionTo);
                }
            }
        }
Ejemplo n.º 8
0
        /// <summary>
        /// Implement UnicodeMatcher
        /// </summary>
        ///
        public virtual int Matches(Replaceable text, int[] offset, int limit,
                                   bool incremental)
        {
            // Note (1): We process text in 16-bit code units, rather than
            // 32-bit code points. This works because stand-ins are
            // always in the BMP and because we are doing a literal match
            // operation, which can be done 16-bits at a time.
            int i;

            int[] cursor = new int[] { offset[0] };
            if (limit < cursor[0])
            {
                // Match in the reverse direction
                for (i = pattern.Length - 1; i >= 0; --i)
                {
                    char           keyChar = pattern[i]; // OK; see note (1) above
                    UnicodeMatcher subm    = data.LookupMatcher(keyChar);
                    if (subm == null)
                    {
                        if (cursor[0] > limit && keyChar == text.CharAt(cursor[0]))       // OK;
                                                                                          // see
                                                                                          // note
                                                                                          // (1)
                                                                                          // above
                        {
                            --cursor[0];
                        }
                        else
                        {
                            return(IBM.ICU.Text.UnicodeMatcher_Constants.U_MISMATCH);
                        }
                    }
                    else
                    {
                        int m = subm.Matches(text, cursor, limit, incremental);
                        if (m != IBM.ICU.Text.UnicodeMatcher_Constants.U_MATCH)
                        {
                            return(m);
                        }
                    }
                }
                // Record the match position, but adjust for a normal
                // forward start, limit, and only if a prior match does not
                // exist -- we want the rightmost match.
                if (matchStart < 0)
                {
                    matchStart = cursor[0] + 1;
                    matchLimit = offset[0] + 1;
                }
            }
            else
            {
                for (i = 0; i < pattern.Length; ++i)
                {
                    if (incremental && cursor[0] == limit)
                    {
                        // We've reached the context limit without a mismatch and
                        // without completing our match.
                        return(IBM.ICU.Text.UnicodeMatcher_Constants.U_PARTIAL_MATCH);
                    }
                    char           keyChar_0 = pattern[i]; // OK; see note (1) above
                    UnicodeMatcher subm_1    = data.LookupMatcher(keyChar_0);
                    if (subm_1 == null)
                    {
                        // Don't need the cursor < limit check if
                        // incremental is true (because it's done above); do need
                        // it otherwise.
                        if (cursor[0] < limit && keyChar_0 == text.CharAt(cursor[0]))       // OK;
                                                                                            // see
                                                                                            // note
                                                                                            // (1)
                                                                                            // above
                        {
                            ++cursor[0];
                        }
                        else
                        {
                            return(IBM.ICU.Text.UnicodeMatcher_Constants.U_MISMATCH);
                        }
                    }
                    else
                    {
                        int m_2 = subm_1.Matches(text, cursor, limit, incremental);
                        if (m_2 != IBM.ICU.Text.UnicodeMatcher_Constants.U_MATCH)
                        {
                            return(m_2);
                        }
                    }
                }
                // Record the match position
                matchStart = offset[0];
                matchLimit = cursor[0];
            }

            offset[0] = cursor[0];
            return(IBM.ICU.Text.UnicodeMatcher_Constants.U_MATCH);
        }