示例#1
0
        /// <summary>
        ///  Compile a given set of patterns to a raw regex set handle
        /// </summary>
        /// <param name="patternsAsBytes">The collection of patterns</param>
        /// <param name="options">
        /// The regular expression options to use when compiling
        /// </param>
        /// <returns>The raw set handle or throws an exception</returns>
        private static RegexSetHandle CompileSetWithOptions(
            IReadOnlyCollection <byte[]> patternsAsBytes,
            Options options)
        {
            // TODO: we could maybe have a `RegexSetBuilder` to represent this
            // stage of regex set compilation.
            var handle = Re2Ffi.cre2_set_new(
                options.RawHandle, Re2Ffi.cre2_anchor_t.CRE2_UNANCHORED);

            var errBuff = new byte[100];

            foreach (var pattern in patternsAsBytes)
            {
                var r = Re2Ffi.cre2_set_add(
                    handle,
                    pattern, new UIntPtr((uint)pattern.Length),
                    errBuff, new UIntPtr((uint)errBuff.Length));
                if (r < 0)
                {
                    // If the regex failed to add then throw
                    var error = Encoding.UTF8.GetString(errBuff);
                    handle.Dispose();
                    throw new RegexCompilationException(
                              error, Encoding.UTF8.GetString(pattern));
                }
            }

            if (Re2Ffi.cre2_set_compile(handle) != 1)
            {
                throw new RegexCompilationException("Error compiling regex set");
            }

            return(handle);
        }
示例#2
0
        /// <summary>
        /// Managed wrapper around the raw match API.
        /// <para>From the RE2 docs for the underlying function:
        /// Don't ask for more match information than you will use:
        /// runs much faster with nsubmatch == 1 than nsubmatch > 1, and
        /// runs even faster if nsubmatch == 0.
        /// Doesn't make sense to use nsubmatch > 1 + NumberOfCapturingGroups(),
        /// but will be handled correctly.
        /// </para>
        /// </summary>
        /// <param name="hayBytes">The string to match the pattern against</param>
        /// <param name="startByteIndex">The byte offset to start at</param>
        /// <param name="numCaptures">The number of match groups to return</param>
        /// <returns>An array of byte ranges for the captures</returns>
        private unsafe ByteRange[] RawMatch(
            ReadOnlySpan <byte> hayBytes, int startByteIndex, int numCaptures)
        {
            var captures = new Re2Ffi.cre2_string_t[numCaptures];

            fixed(byte *pinnedHayBytes = hayBytes)
            {
                // TODO: Support anchor as a parameter
                var matchResult = Re2Ffi.cre2_match(
                    RawHandle,
                    pinnedHayBytes, hayBytes.Length,
                    startByteIndex, hayBytes.Length,
                    Re2Ffi.cre2_anchor_t.CRE2_UNANCHORED,
                    captures, captures.Length);

                if (matchResult != 1)
                {
                    return(Array.Empty <ByteRange>());
                }

                // Convert the captured strings to array indices while we still
                // have the haystack pinned. We can't have the haystack move
                // between the `_match` and the conversion to byte ranges
                // otherwise the pointer arithmetic we do will be invalidated.
                return(StringsToRanges(captures, new IntPtr(pinnedHayBytes)));
            }
        }
示例#3
0
 /// <summary>
 ///  Advance the enumerator
 /// </summary>
 /// <returns>True if <see cref="Current" /> now points to a valid
 /// <see cref="NamedCaptureGroup" /></returns>
 public unsafe bool MoveNext()
 {
     if (Re2Ffi.cre2_named_groups_iter_next(RawHandle, out var namePtr, out var index))
     {
         var name = Marshal.PtrToStringAnsi(new IntPtr(namePtr));
         _current = new NamedCaptureGroup(name, index);
         return(true);
     }
示例#4
0
        /// <summary>
        /// Checks if the pattern matches somewhere in the given
        /// <paramref name="haystack" />.
        /// </summary>
        /// <param name="haystack">The text to find the pattern in</param>
        /// <returns>True if the pattern matches, false otherwise.</returns>
        public unsafe bool IsMatch(ReadOnlySpan <byte> haystack)
        {
            var captures = Array.Empty <Re2Ffi.cre2_string_t>();

            fixed(byte *hayBytesPtr = haystack)
            {
                // TODO: Support anchor as a parameter
                var matchResult = Re2Ffi.cre2_match(
                    RawHandle,
                    hayBytesPtr, haystack.Length,
                    0, haystack.Length,
                    Re2Ffi.cre2_anchor_t.CRE2_UNANCHORED,
                    captures, 0);

                return(matchResult == 1);
            }
        }
示例#5
0
        /// <summary>
        /// Compile the regular expression
        /// </summary>
        /// <param name="patternBytes">
        /// The regex pattern, as a UTF-8 byte array
        /// </param>
        /// <param name="opts">
        /// The regex compilation options, or <c>null</c> to use the default
        /// </param>
        /// <returns>
        /// The raw handle to the Regex, or throws on compilation failure
        /// </returns>
        private static RegexHandle Compile(ReadOnlySpan <byte> patternBytes, Options?opts)
        {
            var handle = Re2Ffi.cre2_new(
                in MemoryMarshal.GetReference(patternBytes), patternBytes.Length,
                opts?.RawHandle ?? new OptionsHandle());

            // Check to see if there was an error compiling this expression
            var errorCode = Re2Ffi.cre2_error_code(handle);

            if (errorCode != Re2Ffi.cre2_error_code_t.CRE2_NO_ERROR)
            {
                var errorString = Re2Ffi.cre2_error_string(handle);
                var error       = Marshal.PtrToStringAnsi(errorString);
                var errorArg    = new Re2Ffi.cre2_string_t();
                Re2Ffi.cre2_error_arg(handle, ref errorArg);
                var offendingPortion = Marshal.PtrToStringAnsi(
                    errorArg.data, errorArg.length);
                // need to clean up the regex
                handle.Dispose();
                throw new RegexCompilationException(error, offendingPortion);
            }

            return(handle);
        }
示例#6
0
 public NamedCaptureEnumerator(Regex regex)
     : base(Re2Ffi.cre2_named_groups_iter_new(regex.RawHandle))
 {
 }
示例#7
0
 /// <summary>
 ///   Create a new <see cref="Options" /> instance with the defaults.
 /// </summary>
 public Options()
     : base(Re2Ffi.cre2_opt_new())
 {
 }
示例#8
0
 /// <summary>
 ///  Find a capture group index by name
 /// </summary>
 /// <param name="name">The named capture to search for</param>
 /// <returns>The capture group index, or -1 if no named group exists</returns>
 public int FindNamedCapture(string name) =>
 Re2Ffi.cre2_find_named_capturing_groups(RawHandle, name);