Beispiel #1
0
        /**
         * {@code findAllUTF8Submatch} is the <a href='#all'>All</a> version of {@link #findUTF8Submatch};
         * it returns a list of up to {@code n} successive matches of the expression, as defined by the <a
         * href='#all'>All</a> description above.
         *
         * <p>
         * A return value of null indicates no match.
         */
        // This is visible for testing.
        public List <byte[][]> findAllUTF8Submatch(byte[] b, int n)
        {
            List <byte[][]> result = new List <byte[][]>();

            allMatches(
                MachineInput.fromUTF8(b),
                n,
                (int[] match) => {
                byte[][] slice = new byte[match.Length / 2][];
                for (int j = 0; j < slice.Length; ++j)
                {
                    if (match[2 * j] >= 0)
                    {
                        slice[j] = Utils.subarray(b, match[2 * j], match[2 * j + 1]);
                    }
                }
                result.Add(slice);
            });
            if (!result.Any())
            {
                return(null);
            }

            return(result);
        }
Beispiel #2
0
        /**
         * Matches the regular expression against input starting at position start and ending at position
         * end, with the given anchoring. Records the submatch boundaries in group, which is [start, end)
         * pairs of byte offsets. The number of boundaries needed is inferred from the size of the group
         * array. It is most efficient not to ask for submatch boundaries.
         *
         * @param input the input byte array
         * @param start the beginning position in the input
         * @param end the end position in the input
         * @param anchor the anchoring flag (UNANCHORED, ANCHOR_START, ANCHOR_BOTH)
         * @param group the array to fill with submatch positions
         * @param ngroup the number of array pairs to fill in
         * @return true if a match was found
         */
        public bool match(string input, int start, int end, int anchor, int[] group, int ngroup)
        {
            if (start >= end) // strings in Java and C# indexed from zero. But, java doesn't crash if over, c# crashes.
            {
                return(false);
            }

            // TODO(afrozm): We suspect that the correct code should look something
            // like the following:
            // doExecute(MachineInput.fromUTF16(input), start, anchor, 2*ngroup);
            //
            // In Russ' own words:
            // That is, I believe doExecute needs to know the bounds of the whole input
            // as well as the bounds of the subpiece that is being searched.
            int[] groupMatch = doExecute(MachineInput.fromUTF16(input, 0, end), start, anchor, 2 * ngroup);

            if (groupMatch == null)
            {
                return(false);
            }

            if (group != null)
            {
                System.Array.Copy(groupMatch, 0, group, 0, groupMatch.Length);
            }

            return(true);
        }
Beispiel #3
0
        /**
         * {@code findAllSubmatch} is the <a href='#all'>All</a> version of {@link #findSubmatch}; it
         * returns a list of up to {@code n} successive matches of the expression, as defined by the <a
         * href='#all'>All</a> description above.
         *
         * <p>
         * A return value of null indicates no match.
         */
        // This is visible for testing.
        public List <String[]> findAllSubmatch(String s, int n)
        {
            List <String[]> result = new List <String[]>();

            allMatches(
                MachineInput.fromUTF16(s),
                n,
                (int[] match) => {
                String[] slice = new String[match.Length / 2];
                for (int j = 0; j < slice.Length; ++j)
                {
                    if (match[2 * j] >= 0)
                    {
                        slice[j] = s.Substring(match[2 * j], match[2 * j + 1] - match[2 * j]);
                    }
                }
                result.Add(slice);
            });
            if (!result.Any())
            {
                return(null);
            }

            return(result);
        }
Beispiel #4
0
        // doExecute() finds the leftmost match in the input and returns
        // the position of its subexpressions.
        // Derived from exec.go.
        public int[] doExecute(MachineInput @in, int pos, int anchor, int ncap)
        {
            Machine m = get();

            m.init(ncap);
            int[] cap = m.match(@in, pos, anchor) ? m.submatches() : null;
            put(m);
            return(cap);
        }
Beispiel #5
0
        /**
         * Returns a two-element array of integers defining the location of the leftmost match in
         * {@code s} of this regular expression. The match itself is at
         * {@code s.substring(loc[0], loc[1])}.
         *
         * <p>
         * A return value of null indicates no match.
         */
        // This is visible for testing.
        public int[] findIndex(String s)
        {
            int[] a = doExecute(MachineInput.fromUTF16(s), 0, UNANCHORED, 2);
            if (a == null)
            {
                return(null);
            }

            return(a);
        }
Beispiel #6
0
        /**
         * Returns a string holding the text of the leftmost match in {@code s} of this regular
         * expression.
         *
         * <p>
         * If there is no match, the return value is an empty string, but it will also be empty if the
         * regular expression successfully matches an empty string. Use {@link #findIndex} or
         * {@link #findSubmatch} if it is necessary to distinguish these cases.
         */
        // This is visible for testing.
        public String find(String s)
        {
            int[] a = doExecute(MachineInput.fromUTF16(s), 0, UNANCHORED, 2);
            if (a == null)
            {
                return("");
            }

            return(s.Substring(a[0], a[1] - a[0]));
        }
Beispiel #7
0
        /**
         * Returns a two-element array of integers defining the location of the leftmost match in
         * {@code b} of this regular expression. The match itself is at {@code b[loc[0]...loc[1]]}.
         *
         * <p>
         * A return value of null indicates no match.
         */
        // This is visible for testing.
        public int[] findUTF8Index(byte[] b)
        {
            int[] a = doExecute(MachineInput.fromUTF8(b), 0, UNANCHORED, 2);
            if (a == null)
            {
                return(null);
            }

            return(Utils.subarray(a, 0, 2));
        }
Beispiel #8
0
        // Find matches in input.
        private void allMatches(MachineInput input, int n, DeliverFunc deliver)
        {
            int end = input.endPos();

            if (n < 0)
            {
                n = end + 1;
            }

            for (int pos = 0, i = 0, prevMatchEnd = -1; i < n && pos <= end;)
            {
                int[] matches = doExecute(input, pos, UNANCHORED, prog.numCap);
                if (matches == null || matches.Length == 0)
                {
                    break;
                }

                bool accept = true;
                if (matches[1] == pos)
                {
                    // We've found an empty match.
                    if (matches[0] == prevMatchEnd)
                    {
                        // We don't allow an empty match right
                        // after a previous match, so ignore it.
                        accept = false;
                    }

                    int r = input.step(pos);
                    if (r < 0)
                    {
                        // EOF
                        pos = end + 1;
                    }
                    else
                    {
                        pos += r & 0x7;
                    }
                }
                else
                {
                    pos = matches[1];
                }

                prevMatchEnd = matches[1];

                if (accept)
                {
                    deliver(pad(matches));
                    i++;
                }
            }
        }
Beispiel #9
0
        /**
         * {@code findAllUTF8SubmatchIndex} is the <a href='#all'>All</a> version of
         * {@link #findUTF8SubmatchIndex}; it returns a list of up to {@code n} successive matches of the
         * expression, as defined by the <a href='#all'>All</a> description above.
         *
         * <p>
         * A return value of null indicates no match.
         */
        // This is visible for testing.
        public List <int[]> findAllUTF8SubmatchIndex(byte[] b, int n)
        {
            List <int[]> result = new List <int[]>();

            allMatches(
                MachineInput.fromUTF8(b),
                n,
                (int[] match) => {
                result.Add(match);
            });
            if (!result.Any())
            {
                return(null);
            }

            return(result);
        }
Beispiel #10
0
        /**
         * {@code findAll} is the <a href='#all'>All</a> version of {@link #find}; it returns a list of up
         * to {@code n} successive matches of the expression, as defined by the <a href='#all'>All</a>
         * description above.
         *
         * <p>
         * A return value of null indicates no match.
         */
        // This is visible for testing.
        public List <String> findAll(String s, int n)
        {
            List <String> result = new List <String>();

            allMatches(
                MachineInput.fromUTF16(s),
                n,
                (int[] match) => {
                result.Add(s.Substring(match[0], match[1] - match[0]));
            });
            if (!result.Any())
            {
                return(null);
            }

            return(result);
        }
Beispiel #11
0
        /**
         * {@code findAllUTF8()} is the <a href='#all'>All</a> version of {@link #findUTF8}; it returns a
         * list of up to {@code n} successive matches of the expression, as defined by the <a
         * href='#all'>All</a> description above.
         *
         * <p>
         * A return value of null indicates no match.
         *
         * TODO(adonovan): think about defining a byte slice view class, like a read-only Go slice backed
         * by |b|.
         */
        // This is visible for testing.
        public List <byte[]> findAllUTF8(byte[] b, int n)
        {
            List <byte[]> result = new List <byte[]>();

            allMatches(
                MachineInput.fromUTF8(b),
                n,
                (int[] match) => {
                result.Add(Utils.subarray(b, match[0], match[1]));
            });
            if (!result.Any())
            {
                return(null);
            }

            return(result);
        }
Beispiel #12
0
        /**
         * {@code findAllSubmatchIndex} is the <a href='#all'>All</a> version of
         * {@link #findSubmatchIndex}; it returns a list of up to {@code n} successive matches of the
         * expression, as defined by the <a href='#all'>All</a> description above.
         *
         * <p>
         * A return value of null indicates no match.
         */
        // This is visible for testing.
        public List <int[]> findAllSubmatchIndex(String s, int n)
        {
            List <int[]> result = new List <int[]>();

            allMatches(
                MachineInput.fromUTF16(s),
                n,
                (int[] match) => {
                result.Add(match);
            });
            if (!result.Any())
            {
                return(null);
            }

            return(result);
        }
Beispiel #13
0
        /**
         * Returns an array of arrays the text of the leftmost match of the regular expression in
         * {@code b} and the matches, if any, of its subexpressions, as defined by the <a
         * href='#submatch'>Submatch</a> description above.
         *
         * <p>
         * A return value of null indicates no match.
         */
        // This is visible for testing.
        public byte[][] findUTF8Submatch(byte[] b)
        {
            int[] a = doExecute(MachineInput.fromUTF8(b), 0, UNANCHORED, prog.numCap);
            if (a == null)
            {
                return(null);
            }

            byte[][] ret = new byte[1 + numSubexp][];
            for (int i = 0; i < ret.Length; i++)
            {
                if (2 * i < a.Length && a[2 * i] >= 0)
                {
                    ret[i] = Utils.subarray(b, a[2 * i], a[2 * i + 1]);
                }
            }

            return(ret);
        }
Beispiel #14
0
        /**
         * Returns an array of strings holding the text of the leftmost match of the regular expression in
         * {@code s} and the matches, if any, of its subexpressions, as defined by the <a
         * href='#submatch'>Submatch</a> description above.
         *
         * <p>
         * A return value of null indicates no match.
         */
        // This is visible for testing.
        public string[] findSubmatch(String s)
        {
            int[] a = doExecute(MachineInput.fromUTF16(s), 0, UNANCHORED, prog.numCap);
            if (a == null)
            {
                return(null);
            }

            string[] ret = new string[1 + numSubexp];
            for (int i = 0; i < ret.Length; i++)
            {
                if (2 * i < a.Length && a[2 * i] >= 0)
                {
                    ret[i] = s.Substring(a[2 * i], a[2 * i + 1] - a[2 * i]);
                }
            }

            return(ret);
        }
Beispiel #15
0
 /**
  * Returns true iff this regexp matches the UTF-8 byte array {@code b}.
  */
 // This is visible for testing.
 bool matchUTF8(byte[] b)
 {
     return(doExecute(MachineInput.fromUTF8(b), 0, UNANCHORED, 0) != null);
 }
Beispiel #16
0
 /**
  * Returns an array holding the index pairs identifying the leftmost match of this regular
  * expression in {@code s} and the matches, if any, of its subexpressions, as defined by the <a
  * href='#submatch'>Submatch</a> description above.
  *
  * <p>
  * A return value of null indicates no match.
  */
 // This is visible for testing.
 public int[] findSubmatchIndex(String s)
 {
     return(pad(doExecute(MachineInput.fromUTF16(s), 0, UNANCHORED, prog.numCap)));
 }
Beispiel #17
0
 /**
  * Returns true iff this regexp matches the string {@code s}.
  */
 public bool match(string s)
 {
     return(doExecute(MachineInput.fromUTF16(s), 0, UNANCHORED, 0) != null);
 }
Beispiel #18
0
        /**
         * Returns a copy of {@code src} in which at most {@code maxReplaces} matches for this regexp have
         * been replaced by the return value of of function {@code repl} (whose first argument is the
         * matched string). No support is provided for expressions (e.g. {@code \1} or {@code $1}) in the
         * replacement string.
         */
        // This is visible for testing.
        public string replaceAllFunc(string src, ReplaceFunc repl, int maxReplaces)
        {
            int           lastMatchEnd = 0; // end position of the most recent match
            int           searchPos    = 0; // position where we next look for a match
            StringBuilder buf          = new StringBuilder();
            MachineInput  input        = MachineInput.fromUTF16(src);
            int           numReplaces  = 0;

            while (searchPos <= src.Length)
            {
                int[] a = doExecute(input, searchPos, UNANCHORED, 2);
                if (a == null || a.Length == 0)
                {
                    break; // no more matches
                }

                // Copy the unmatched characters before this match.
                buf.Append(src.Substring(lastMatchEnd, a[0]));

                // Now insert a copy of the replacement string, but not for a
                // match of the empty string immediately after another match.
                // (Otherwise, we get double replacement for patterns that
                // match both empty and nonempty strings.)
                // FIXME(adonovan), FIXME(afrozm) - JDK seems to be doing exactly this
                // put a replacement for a pattern that also matches empty and non-empty
                // strings. The fix would not just be a[1] >= lastMatchEnd, there are a
                // few corner cases in that as well, and there are tests which will fail
                // when that case is touched (happens only at the end of the input string
                // though).
                if (a[1] > lastMatchEnd || a[0] == 0)
                {
                    buf.Append(repl(src.Substring(a[0], a[1])));
                    // Increment the replace count.
                    ++numReplaces;
                }

                lastMatchEnd = a[1];

                // Advance past this match; always advance at least one character.
                int width = input.step(searchPos) & 0x7;
                if (searchPos + width > a[1])
                {
                    searchPos += width;
                }
                else if (searchPos + 1 > a[1])
                {
                    // This clause is only needed at the end of the input
                    // string.  In that case, DecodeRuneInString returns width=0.
                    searchPos++;
                }
                else
                {
                    searchPos = a[1];
                }

                if (numReplaces >= maxReplaces)
                {
                    // Should never be greater though.
                    break;
                }
            }

            // Copy the unmatched characters after the last match.
            buf.Append(src.Substring(lastMatchEnd));

            return(buf.ToString());
        }
Beispiel #19
0
        // match() runs the machine over the input |in| starting at |pos| with the
        // RE2 Anchor |anchor|.
        // It reports whether a match was found.
        // If so, matchcap holds the submatch information.
        public bool match(MachineInput @in, int pos, int anchor)
        {
            int startCond = re2.cond;

            if (startCond == Utils.EMPTY_ALL)
            {
                // impossible
                return(false);
            }

            if ((anchor == RE2.ANCHOR_START || anchor == RE2.ANCHOR_BOTH) && pos != 0)
            {
                return(false);
            }

            matched = false;
            for (int jj = 0; jj < prog.numCap; ++jj)
            {
                matchcap[jj] = -1;
            }
            Queue runq = q0, nextq = q1;
            int   r      = @in.step(pos);
            int   rune   = r >> 3;
            int   width  = r & 7;
            int   rune1  = -1;
            int   width1 = 0;

            if (r != MachineInput.EOF)
            {
                r      = @in.step(pos + width);
                rune1  = r >> 3;
                width1 = r & 7;
            }

            int flag; // bitmask of EMPTY_* flags

            if (pos == 0)
            {
                flag = Utils.emptyOpContext(-1, rune);
            }
            else
            {
                flag = @in.context(pos);
            }

            for (;;)
            {
                if (runq.isEmpty())
                {
                    if ((startCond & Utils.EMPTY_BEGIN_TEXT) != 0 && pos != 0)
                    {
                        // Anchored match, past beginning of text.
                        break;
                    }

                    if (matched)
                    {
                        // Have match; finished exploring alternatives.
                        break;
                    }

                    if (re2.prefix.Length != 0 && rune1 != re2.prefixRune && @in.canCheckPrefix())
                    {
                        // Match requires literal prefix; fast search for it.
                        int advance = @in.index(re2, pos);
                        if (advance < 0)
                        {
                            break;
                        }

                        pos   += advance;
                        r      = @in.step(pos);
                        rune   = r >> 3;
                        width  = r & 7;
                        r      = @in.step(pos + width);
                        rune1  = r >> 3;
                        width1 = r & 7;
                    }
                }

                if (!matched && (pos == 0 || anchor == RE2.UNANCHORED))
                {
                    // If we are anchoring at begin then only add threads that begin
                    // at |pos| = 0.
                    if (ncap > 0)
                    {
                        matchcap[0] = pos;
                    }

                    add(runq, prog.start, pos, matchcap, flag, null);
                }

                flag = Utils.emptyOpContext(rune, rune1);
                step(runq, nextq, pos, pos + width, rune, flag, anchor, pos == @in.endPos());
                if (width == 0)
                {
                    // EOF
                    break;
                }

                if (ncap == 0 && matched)
                {
                    // Found a match and not paying attention
                    // to where it is, so any match will do.
                    break;
                }

                pos  += width;
                rune  = rune1;
                width = width1;
                if (rune != -1)
                {
                    r      = @in.step(pos + width);
                    rune1  = r >> 3;
                    width1 = r & 7;
                }

                Queue tmpq = runq;
                runq  = nextq;
                nextq = tmpq;
            }

            free(nextq);
            return(matched);
        }
Beispiel #20
0
 /**
  * Returns an array holding the index pairs identifying the leftmost match of this regular
  * expression in {@code b} and the matches, if any, of its subexpressions, as defined by the the
  * <a href='#submatch'>Submatch</a> and <a href='#index'>Index</a> descriptions above.
  *
  * <p>
  * A return value of null indicates no match.
  */
 // This is visible for testing.
 public int[] findUTF8SubmatchIndex(byte[] b)
 {
     return(pad(doExecute(MachineInput.fromUTF8(b), 0, UNANCHORED, prog.numCap)));
 }