Пример #1
0
        // Find matches in input.
        private void allMatches(MachineInput input, int n, DeliverFunc deliver)
        {
            int end = input.endPos();

            if (n < 0)
            {
                n = end + 1;
            }

            for (int pos = 0, i = 0, prevMatchEnd = -1; i < n && pos <= end;)
            {
                int[] matches = doExecute(input, pos, UNANCHORED, prog.numCap);
                if (matches == null || matches.Length == 0)
                {
                    break;
                }

                bool accept = true;
                if (matches[1] == pos)
                {
                    // We've found an empty match.
                    if (matches[0] == prevMatchEnd)
                    {
                        // We don't allow an empty match right
                        // after a previous match, so ignore it.
                        accept = false;
                    }

                    int r = input.step(pos);
                    if (r < 0)
                    {
                        // EOF
                        pos = end + 1;
                    }
                    else
                    {
                        pos += r & 0x7;
                    }
                }
                else
                {
                    pos = matches[1];
                }

                prevMatchEnd = matches[1];

                if (accept)
                {
                    deliver(pad(matches));
                    i++;
                }
            }
        }
Пример #2
0
        // match() runs the machine over the input |in| starting at |pos| with the
        // RE2 Anchor |anchor|.
        // It reports whether a match was found.
        // If so, matchcap holds the submatch information.
        public bool match(MachineInput @in, int pos, int anchor)
        {
            int startCond = re2.cond;

            if (startCond == Utils.EMPTY_ALL)
            {
                // impossible
                return(false);
            }

            if ((anchor == RE2.ANCHOR_START || anchor == RE2.ANCHOR_BOTH) && pos != 0)
            {
                return(false);
            }

            matched = false;
            for (int jj = 0; jj < prog.numCap; ++jj)
            {
                matchcap[jj] = -1;
            }
            Queue runq = q0, nextq = q1;
            int   r      = @in.step(pos);
            int   rune   = r >> 3;
            int   width  = r & 7;
            int   rune1  = -1;
            int   width1 = 0;

            if (r != MachineInput.EOF)
            {
                r      = @in.step(pos + width);
                rune1  = r >> 3;
                width1 = r & 7;
            }

            int flag; // bitmask of EMPTY_* flags

            if (pos == 0)
            {
                flag = Utils.emptyOpContext(-1, rune);
            }
            else
            {
                flag = @in.context(pos);
            }

            for (;;)
            {
                if (runq.isEmpty())
                {
                    if ((startCond & Utils.EMPTY_BEGIN_TEXT) != 0 && pos != 0)
                    {
                        // Anchored match, past beginning of text.
                        break;
                    }

                    if (matched)
                    {
                        // Have match; finished exploring alternatives.
                        break;
                    }

                    if (re2.prefix.Length != 0 && rune1 != re2.prefixRune && @in.canCheckPrefix())
                    {
                        // Match requires literal prefix; fast search for it.
                        int advance = @in.index(re2, pos);
                        if (advance < 0)
                        {
                            break;
                        }

                        pos   += advance;
                        r      = @in.step(pos);
                        rune   = r >> 3;
                        width  = r & 7;
                        r      = @in.step(pos + width);
                        rune1  = r >> 3;
                        width1 = r & 7;
                    }
                }

                if (!matched && (pos == 0 || anchor == RE2.UNANCHORED))
                {
                    // If we are anchoring at begin then only add threads that begin
                    // at |pos| = 0.
                    if (ncap > 0)
                    {
                        matchcap[0] = pos;
                    }

                    add(runq, prog.start, pos, matchcap, flag, null);
                }

                flag = Utils.emptyOpContext(rune, rune1);
                step(runq, nextq, pos, pos + width, rune, flag, anchor, pos == @in.endPos());
                if (width == 0)
                {
                    // EOF
                    break;
                }

                if (ncap == 0 && matched)
                {
                    // Found a match and not paying attention
                    // to where it is, so any match will do.
                    break;
                }

                pos  += width;
                rune  = rune1;
                width = width1;
                if (rune != -1)
                {
                    r      = @in.step(pos + width);
                    rune1  = r >> 3;
                    width1 = r & 7;
                }

                Queue tmpq = runq;
                runq  = nextq;
                nextq = tmpq;
            }

            free(nextq);
            return(matched);
        }
Пример #3
0
        /**
         * Returns a copy of {@code src} in which at most {@code maxReplaces} matches for this regexp have
         * been replaced by the return value of of function {@code repl} (whose first argument is the
         * matched string). No support is provided for expressions (e.g. {@code \1} or {@code $1}) in the
         * replacement string.
         */
        // This is visible for testing.
        public string replaceAllFunc(string src, ReplaceFunc repl, int maxReplaces)
        {
            int           lastMatchEnd = 0; // end position of the most recent match
            int           searchPos    = 0; // position where we next look for a match
            StringBuilder buf          = new StringBuilder();
            MachineInput  input        = MachineInput.fromUTF16(src);
            int           numReplaces  = 0;

            while (searchPos <= src.Length)
            {
                int[] a = doExecute(input, searchPos, UNANCHORED, 2);
                if (a == null || a.Length == 0)
                {
                    break; // no more matches
                }

                // Copy the unmatched characters before this match.
                buf.Append(src.Substring(lastMatchEnd, a[0]));

                // Now insert a copy of the replacement string, but not for a
                // match of the empty string immediately after another match.
                // (Otherwise, we get double replacement for patterns that
                // match both empty and nonempty strings.)
                // FIXME(adonovan), FIXME(afrozm) - JDK seems to be doing exactly this
                // put a replacement for a pattern that also matches empty and non-empty
                // strings. The fix would not just be a[1] >= lastMatchEnd, there are a
                // few corner cases in that as well, and there are tests which will fail
                // when that case is touched (happens only at the end of the input string
                // though).
                if (a[1] > lastMatchEnd || a[0] == 0)
                {
                    buf.Append(repl(src.Substring(a[0], a[1])));
                    // Increment the replace count.
                    ++numReplaces;
                }

                lastMatchEnd = a[1];

                // Advance past this match; always advance at least one character.
                int width = input.step(searchPos) & 0x7;
                if (searchPos + width > a[1])
                {
                    searchPos += width;
                }
                else if (searchPos + 1 > a[1])
                {
                    // This clause is only needed at the end of the input
                    // string.  In that case, DecodeRuneInString returns width=0.
                    searchPos++;
                }
                else
                {
                    searchPos = a[1];
                }

                if (numReplaces >= maxReplaces)
                {
                    // Should never be greater though.
                    break;
                }
            }

            // Copy the unmatched characters after the last match.
            buf.Append(src.Substring(lastMatchEnd));

            return(buf.ToString());
        }