/** * Matches the regular expression against input starting at position start and ending at position * end, with the given anchoring. Records the submatch boundaries in group, which is [start, end) * pairs of byte offsets. The number of boundaries needed is inferred from the size of the group * array. It is most efficient not to ask for submatch boundaries. * * @param input the input byte array * @param start the beginning position in the input * @param end the end position in the input * @param anchor the anchoring flag (UNANCHORED, ANCHOR_START, ANCHOR_BOTH) * @param group the array to fill with submatch positions * @param ngroup the number of array pairs to fill in * @return true if a match was found */ public bool match(string input, int start, int end, int anchor, int[] group, int ngroup) { if (start >= end) // strings in Java and C# indexed from zero. But, java doesn't crash if over, c# crashes. { return(false); } // TODO(afrozm): We suspect that the correct code should look something // like the following: // doExecute(MachineInput.fromUTF16(input), start, anchor, 2*ngroup); // // In Russ' own words: // That is, I believe doExecute needs to know the bounds of the whole input // as well as the bounds of the subpiece that is being searched. int[] groupMatch = doExecute(MachineInput.fromUTF16(input, 0, end), start, anchor, 2 * ngroup); if (groupMatch == null) { return(false); } if (group != null) { System.Array.Copy(groupMatch, 0, group, 0, groupMatch.Length); } return(true); }
/** * {@code findAllSubmatch} is the <a href='#all'>All</a> version of {@link #findSubmatch}; it * returns a list of up to {@code n} successive matches of the expression, as defined by the <a * href='#all'>All</a> description above. * * <p> * A return value of null indicates no match. */ // This is visible for testing. public List <String[]> findAllSubmatch(String s, int n) { List <String[]> result = new List <String[]>(); allMatches( MachineInput.fromUTF16(s), n, (int[] match) => { String[] slice = new String[match.Length / 2]; for (int j = 0; j < slice.Length; ++j) { if (match[2 * j] >= 0) { slice[j] = s.Substring(match[2 * j], match[2 * j + 1] - match[2 * j]); } } result.Add(slice); }); if (!result.Any()) { return(null); } return(result); }
/** * Returns a two-element array of integers defining the location of the leftmost match in * {@code s} of this regular expression. The match itself is at * {@code s.substring(loc[0], loc[1])}. * * <p> * A return value of null indicates no match. */ // This is visible for testing. public int[] findIndex(String s) { int[] a = doExecute(MachineInput.fromUTF16(s), 0, UNANCHORED, 2); if (a == null) { return(null); } return(a); }
/** * Returns a string holding the text of the leftmost match in {@code s} of this regular * expression. * * <p> * If there is no match, the return value is an empty string, but it will also be empty if the * regular expression successfully matches an empty string. Use {@link #findIndex} or * {@link #findSubmatch} if it is necessary to distinguish these cases. */ // This is visible for testing. public String find(String s) { int[] a = doExecute(MachineInput.fromUTF16(s), 0, UNANCHORED, 2); if (a == null) { return(""); } return(s.Substring(a[0], a[1] - a[0])); }
/** * {@code findAllSubmatchIndex} is the <a href='#all'>All</a> version of * {@link #findSubmatchIndex}; it returns a list of up to {@code n} successive matches of the * expression, as defined by the <a href='#all'>All</a> description above. * * <p> * A return value of null indicates no match. */ // This is visible for testing. public List <int[]> findAllSubmatchIndex(String s, int n) { List <int[]> result = new List <int[]>(); allMatches( MachineInput.fromUTF16(s), n, (int[] match) => { result.Add(match); }); if (!result.Any()) { return(null); } return(result); }
/** * {@code findAll} is the <a href='#all'>All</a> version of {@link #find}; it returns a list of up * to {@code n} successive matches of the expression, as defined by the <a href='#all'>All</a> * description above. * * <p> * A return value of null indicates no match. */ // This is visible for testing. public List <String> findAll(String s, int n) { List <String> result = new List <String>(); allMatches( MachineInput.fromUTF16(s), n, (int[] match) => { result.Add(s.Substring(match[0], match[1] - match[0])); }); if (!result.Any()) { return(null); } return(result); }
/** * Returns an array of strings holding the text of the leftmost match of the regular expression in * {@code s} and the matches, if any, of its subexpressions, as defined by the <a * href='#submatch'>Submatch</a> description above. * * <p> * A return value of null indicates no match. */ // This is visible for testing. public string[] findSubmatch(String s) { int[] a = doExecute(MachineInput.fromUTF16(s), 0, UNANCHORED, prog.numCap); if (a == null) { return(null); } string[] ret = new string[1 + numSubexp]; for (int i = 0; i < ret.Length; i++) { if (2 * i < a.Length && a[2 * i] >= 0) { ret[i] = s.Substring(a[2 * i], a[2 * i + 1] - a[2 * i]); } } return(ret); }
/** * Returns an array holding the index pairs identifying the leftmost match of this regular * expression in {@code s} and the matches, if any, of its subexpressions, as defined by the <a * href='#submatch'>Submatch</a> description above. * * <p> * A return value of null indicates no match. */ // This is visible for testing. public int[] findSubmatchIndex(String s) { return(pad(doExecute(MachineInput.fromUTF16(s), 0, UNANCHORED, prog.numCap))); }
/** * Returns a copy of {@code src} in which at most {@code maxReplaces} matches for this regexp have * been replaced by the return value of of function {@code repl} (whose first argument is the * matched string). No support is provided for expressions (e.g. {@code \1} or {@code $1}) in the * replacement string. */ // This is visible for testing. public string replaceAllFunc(string src, ReplaceFunc repl, int maxReplaces) { int lastMatchEnd = 0; // end position of the most recent match int searchPos = 0; // position where we next look for a match StringBuilder buf = new StringBuilder(); MachineInput input = MachineInput.fromUTF16(src); int numReplaces = 0; while (searchPos <= src.Length) { int[] a = doExecute(input, searchPos, UNANCHORED, 2); if (a == null || a.Length == 0) { break; // no more matches } // Copy the unmatched characters before this match. buf.Append(src.Substring(lastMatchEnd, a[0])); // Now insert a copy of the replacement string, but not for a // match of the empty string immediately after another match. // (Otherwise, we get double replacement for patterns that // match both empty and nonempty strings.) // FIXME(adonovan), FIXME(afrozm) - JDK seems to be doing exactly this // put a replacement for a pattern that also matches empty and non-empty // strings. The fix would not just be a[1] >= lastMatchEnd, there are a // few corner cases in that as well, and there are tests which will fail // when that case is touched (happens only at the end of the input string // though). if (a[1] > lastMatchEnd || a[0] == 0) { buf.Append(repl(src.Substring(a[0], a[1]))); // Increment the replace count. ++numReplaces; } lastMatchEnd = a[1]; // Advance past this match; always advance at least one character. int width = input.step(searchPos) & 0x7; if (searchPos + width > a[1]) { searchPos += width; } else if (searchPos + 1 > a[1]) { // This clause is only needed at the end of the input // string. In that case, DecodeRuneInString returns width=0. searchPos++; } else { searchPos = a[1]; } if (numReplaces >= maxReplaces) { // Should never be greater though. break; } } // Copy the unmatched characters after the last match. buf.Append(src.Substring(lastMatchEnd)); return(buf.ToString()); }
/** * Returns true iff this regexp matches the string {@code s}. */ public bool match(string s) { return(doExecute(MachineInput.fromUTF16(s), 0, UNANCHORED, 0) != null); }