Example #1
0
File: RE2.cs Project: kaby76/re2cs
        /**
         * Matches the regular expression against input starting at position start and ending at position
         * end, with the given anchoring. Records the submatch boundaries in group, which is [start, end)
         * pairs of byte offsets. The number of boundaries needed is inferred from the size of the group
         * array. It is most efficient not to ask for submatch boundaries.
         *
         * @param input the input byte array
         * @param start the beginning position in the input
         * @param end the end position in the input
         * @param anchor the anchoring flag (UNANCHORED, ANCHOR_START, ANCHOR_BOTH)
         * @param group the array to fill with submatch positions
         * @param ngroup the number of array pairs to fill in
         * @return true if a match was found
         */
        public bool match(string input, int start, int end, int anchor, int[] group, int ngroup)
        {
            if (start >= end) // strings in Java and C# indexed from zero. But, java doesn't crash if over, c# crashes.
            {
                return(false);
            }

            // TODO(afrozm): We suspect that the correct code should look something
            // like the following:
            // doExecute(MachineInput.fromUTF16(input), start, anchor, 2*ngroup);
            //
            // In Russ' own words:
            // That is, I believe doExecute needs to know the bounds of the whole input
            // as well as the bounds of the subpiece that is being searched.
            int[] groupMatch = doExecute(MachineInput.fromUTF16(input, 0, end), start, anchor, 2 * ngroup);

            if (groupMatch == null)
            {
                return(false);
            }

            if (group != null)
            {
                System.Array.Copy(groupMatch, 0, group, 0, groupMatch.Length);
            }

            return(true);
        }
Example #2
0
File: RE2.cs Project: kaby76/re2cs
        /**
         * {@code findAllSubmatch} is the <a href='#all'>All</a> version of {@link #findSubmatch}; it
         * returns a list of up to {@code n} successive matches of the expression, as defined by the <a
         * href='#all'>All</a> description above.
         *
         * <p>
         * A return value of null indicates no match.
         */
        // This is visible for testing.
        public List <String[]> findAllSubmatch(String s, int n)
        {
            List <String[]> result = new List <String[]>();

            allMatches(
                MachineInput.fromUTF16(s),
                n,
                (int[] match) => {
                String[] slice = new String[match.Length / 2];
                for (int j = 0; j < slice.Length; ++j)
                {
                    if (match[2 * j] >= 0)
                    {
                        slice[j] = s.Substring(match[2 * j], match[2 * j + 1] - match[2 * j]);
                    }
                }
                result.Add(slice);
            });
            if (!result.Any())
            {
                return(null);
            }

            return(result);
        }
Example #3
0
File: RE2.cs Project: kaby76/re2cs
        /**
         * Returns a two-element array of integers defining the location of the leftmost match in
         * {@code s} of this regular expression. The match itself is at
         * {@code s.substring(loc[0], loc[1])}.
         *
         * <p>
         * A return value of null indicates no match.
         */
        // This is visible for testing.
        public int[] findIndex(String s)
        {
            int[] a = doExecute(MachineInput.fromUTF16(s), 0, UNANCHORED, 2);
            if (a == null)
            {
                return(null);
            }

            return(a);
        }
Example #4
0
File: RE2.cs Project: kaby76/re2cs
        /**
         * Returns a string holding the text of the leftmost match in {@code s} of this regular
         * expression.
         *
         * <p>
         * If there is no match, the return value is an empty string, but it will also be empty if the
         * regular expression successfully matches an empty string. Use {@link #findIndex} or
         * {@link #findSubmatch} if it is necessary to distinguish these cases.
         */
        // This is visible for testing.
        public String find(String s)
        {
            int[] a = doExecute(MachineInput.fromUTF16(s), 0, UNANCHORED, 2);
            if (a == null)
            {
                return("");
            }

            return(s.Substring(a[0], a[1] - a[0]));
        }
Example #5
0
File: RE2.cs Project: kaby76/re2cs
        /**
         * {@code findAllSubmatchIndex} is the <a href='#all'>All</a> version of
         * {@link #findSubmatchIndex}; it returns a list of up to {@code n} successive matches of the
         * expression, as defined by the <a href='#all'>All</a> description above.
         *
         * <p>
         * A return value of null indicates no match.
         */
        // This is visible for testing.
        public List <int[]> findAllSubmatchIndex(String s, int n)
        {
            List <int[]> result = new List <int[]>();

            allMatches(
                MachineInput.fromUTF16(s),
                n,
                (int[] match) => {
                result.Add(match);
            });
            if (!result.Any())
            {
                return(null);
            }

            return(result);
        }
Example #6
0
File: RE2.cs Project: kaby76/re2cs
        /**
         * {@code findAll} is the <a href='#all'>All</a> version of {@link #find}; it returns a list of up
         * to {@code n} successive matches of the expression, as defined by the <a href='#all'>All</a>
         * description above.
         *
         * <p>
         * A return value of null indicates no match.
         */
        // This is visible for testing.
        public List <String> findAll(String s, int n)
        {
            List <String> result = new List <String>();

            allMatches(
                MachineInput.fromUTF16(s),
                n,
                (int[] match) => {
                result.Add(s.Substring(match[0], match[1] - match[0]));
            });
            if (!result.Any())
            {
                return(null);
            }

            return(result);
        }
Example #7
0
File: RE2.cs Project: kaby76/re2cs
        /**
         * Returns an array of strings holding the text of the leftmost match of the regular expression in
         * {@code s} and the matches, if any, of its subexpressions, as defined by the <a
         * href='#submatch'>Submatch</a> description above.
         *
         * <p>
         * A return value of null indicates no match.
         */
        // This is visible for testing.
        public string[] findSubmatch(String s)
        {
            int[] a = doExecute(MachineInput.fromUTF16(s), 0, UNANCHORED, prog.numCap);
            if (a == null)
            {
                return(null);
            }

            string[] ret = new string[1 + numSubexp];
            for (int i = 0; i < ret.Length; i++)
            {
                if (2 * i < a.Length && a[2 * i] >= 0)
                {
                    ret[i] = s.Substring(a[2 * i], a[2 * i + 1] - a[2 * i]);
                }
            }

            return(ret);
        }
Example #8
0
File: RE2.cs Project: kaby76/re2cs
 /**
  * Returns an array holding the index pairs identifying the leftmost match of this regular
  * expression in {@code s} and the matches, if any, of its subexpressions, as defined by the <a
  * href='#submatch'>Submatch</a> description above.
  *
  * <p>
  * A return value of null indicates no match.
  */
 // This is visible for testing.
 public int[] findSubmatchIndex(String s)
 {
     return(pad(doExecute(MachineInput.fromUTF16(s), 0, UNANCHORED, prog.numCap)));
 }
Example #9
0
File: RE2.cs Project: kaby76/re2cs
        /**
         * Returns a copy of {@code src} in which at most {@code maxReplaces} matches for this regexp have
         * been replaced by the return value of of function {@code repl} (whose first argument is the
         * matched string). No support is provided for expressions (e.g. {@code \1} or {@code $1}) in the
         * replacement string.
         */
        // This is visible for testing.
        public string replaceAllFunc(string src, ReplaceFunc repl, int maxReplaces)
        {
            int           lastMatchEnd = 0; // end position of the most recent match
            int           searchPos    = 0; // position where we next look for a match
            StringBuilder buf          = new StringBuilder();
            MachineInput  input        = MachineInput.fromUTF16(src);
            int           numReplaces  = 0;

            while (searchPos <= src.Length)
            {
                int[] a = doExecute(input, searchPos, UNANCHORED, 2);
                if (a == null || a.Length == 0)
                {
                    break; // no more matches
                }

                // Copy the unmatched characters before this match.
                buf.Append(src.Substring(lastMatchEnd, a[0]));

                // Now insert a copy of the replacement string, but not for a
                // match of the empty string immediately after another match.
                // (Otherwise, we get double replacement for patterns that
                // match both empty and nonempty strings.)
                // FIXME(adonovan), FIXME(afrozm) - JDK seems to be doing exactly this
                // put a replacement for a pattern that also matches empty and non-empty
                // strings. The fix would not just be a[1] >= lastMatchEnd, there are a
                // few corner cases in that as well, and there are tests which will fail
                // when that case is touched (happens only at the end of the input string
                // though).
                if (a[1] > lastMatchEnd || a[0] == 0)
                {
                    buf.Append(repl(src.Substring(a[0], a[1])));
                    // Increment the replace count.
                    ++numReplaces;
                }

                lastMatchEnd = a[1];

                // Advance past this match; always advance at least one character.
                int width = input.step(searchPos) & 0x7;
                if (searchPos + width > a[1])
                {
                    searchPos += width;
                }
                else if (searchPos + 1 > a[1])
                {
                    // This clause is only needed at the end of the input
                    // string.  In that case, DecodeRuneInString returns width=0.
                    searchPos++;
                }
                else
                {
                    searchPos = a[1];
                }

                if (numReplaces >= maxReplaces)
                {
                    // Should never be greater though.
                    break;
                }
            }

            // Copy the unmatched characters after the last match.
            buf.Append(src.Substring(lastMatchEnd));

            return(buf.ToString());
        }
Example #10
0
File: RE2.cs Project: kaby76/re2cs
 /**
  * Returns true iff this regexp matches the string {@code s}.
  */
 public bool match(string s)
 {
     return(doExecute(MachineInput.fromUTF16(s), 0, UNANCHORED, 0) != null);
 }