// Find matches in input. private void allMatches(MachineInput input, int n, DeliverFunc deliver) { int end = input.endPos(); if (n < 0) { n = end + 1; } for (int pos = 0, i = 0, prevMatchEnd = -1; i < n && pos <= end;) { int[] matches = doExecute(input, pos, UNANCHORED, prog.numCap); if (matches == null || matches.Length == 0) { break; } bool accept = true; if (matches[1] == pos) { // We've found an empty match. if (matches[0] == prevMatchEnd) { // We don't allow an empty match right // after a previous match, so ignore it. accept = false; } int r = input.step(pos); if (r < 0) { // EOF pos = end + 1; } else { pos += r & 0x7; } } else { pos = matches[1]; } prevMatchEnd = matches[1]; if (accept) { deliver(pad(matches)); i++; } } }
// match() runs the machine over the input |in| starting at |pos| with the // RE2 Anchor |anchor|. // It reports whether a match was found. // If so, matchcap holds the submatch information. public bool match(MachineInput @in, int pos, int anchor) { int startCond = re2.cond; if (startCond == Utils.EMPTY_ALL) { // impossible return(false); } if ((anchor == RE2.ANCHOR_START || anchor == RE2.ANCHOR_BOTH) && pos != 0) { return(false); } matched = false; for (int jj = 0; jj < prog.numCap; ++jj) { matchcap[jj] = -1; } Queue runq = q0, nextq = q1; int r = @in.step(pos); int rune = r >> 3; int width = r & 7; int rune1 = -1; int width1 = 0; if (r != MachineInput.EOF) { r = @in.step(pos + width); rune1 = r >> 3; width1 = r & 7; } int flag; // bitmask of EMPTY_* flags if (pos == 0) { flag = Utils.emptyOpContext(-1, rune); } else { flag = @in.context(pos); } for (;;) { if (runq.isEmpty()) { if ((startCond & Utils.EMPTY_BEGIN_TEXT) != 0 && pos != 0) { // Anchored match, past beginning of text. break; } if (matched) { // Have match; finished exploring alternatives. break; } if (re2.prefix.Length != 0 && rune1 != re2.prefixRune && @in.canCheckPrefix()) { // Match requires literal prefix; fast search for it. int advance = @in.index(re2, pos); if (advance < 0) { break; } pos += advance; r = @in.step(pos); rune = r >> 3; width = r & 7; r = @in.step(pos + width); rune1 = r >> 3; width1 = r & 7; } } if (!matched && (pos == 0 || anchor == RE2.UNANCHORED)) { // If we are anchoring at begin then only add threads that begin // at |pos| = 0. if (ncap > 0) { matchcap[0] = pos; } add(runq, prog.start, pos, matchcap, flag, null); } flag = Utils.emptyOpContext(rune, rune1); step(runq, nextq, pos, pos + width, rune, flag, anchor, pos == @in.endPos()); if (width == 0) { // EOF break; } if (ncap == 0 && matched) { // Found a match and not paying attention // to where it is, so any match will do. break; } pos += width; rune = rune1; width = width1; if (rune != -1) { r = @in.step(pos + width); rune1 = r >> 3; width1 = r & 7; } Queue tmpq = runq; runq = nextq; nextq = tmpq; } free(nextq); return(matched); }
/** * Returns a copy of {@code src} in which at most {@code maxReplaces} matches for this regexp have * been replaced by the return value of of function {@code repl} (whose first argument is the * matched string). No support is provided for expressions (e.g. {@code \1} or {@code $1}) in the * replacement string. */ // This is visible for testing. public string replaceAllFunc(string src, ReplaceFunc repl, int maxReplaces) { int lastMatchEnd = 0; // end position of the most recent match int searchPos = 0; // position where we next look for a match StringBuilder buf = new StringBuilder(); MachineInput input = MachineInput.fromUTF16(src); int numReplaces = 0; while (searchPos <= src.Length) { int[] a = doExecute(input, searchPos, UNANCHORED, 2); if (a == null || a.Length == 0) { break; // no more matches } // Copy the unmatched characters before this match. buf.Append(src.Substring(lastMatchEnd, a[0])); // Now insert a copy of the replacement string, but not for a // match of the empty string immediately after another match. // (Otherwise, we get double replacement for patterns that // match both empty and nonempty strings.) // FIXME(adonovan), FIXME(afrozm) - JDK seems to be doing exactly this // put a replacement for a pattern that also matches empty and non-empty // strings. The fix would not just be a[1] >= lastMatchEnd, there are a // few corner cases in that as well, and there are tests which will fail // when that case is touched (happens only at the end of the input string // though). if (a[1] > lastMatchEnd || a[0] == 0) { buf.Append(repl(src.Substring(a[0], a[1]))); // Increment the replace count. ++numReplaces; } lastMatchEnd = a[1]; // Advance past this match; always advance at least one character. int width = input.step(searchPos) & 0x7; if (searchPos + width > a[1]) { searchPos += width; } else if (searchPos + 1 > a[1]) { // This clause is only needed at the end of the input // string. In that case, DecodeRuneInString returns width=0. searchPos++; } else { searchPos = a[1]; } if (numReplaces >= maxReplaces) { // Should never be greater though. break; } } // Copy the unmatched characters after the last match. buf.Append(src.Substring(lastMatchEnd)); return(buf.ToString()); }