Ejemplo n.º 1
0
        /// <summary>
        /// Managed wrapper around the raw match API.
        /// <para>From the RE2 docs for the underlying function:
        /// Don't ask for more match information than you will use:
        /// runs much faster with nsubmatch == 1 than nsubmatch > 1, and
        /// runs even faster if nsubmatch == 0.
        /// Doesn't make sense to use nsubmatch > 1 + NumberOfCapturingGroups(),
        /// but will be handled correctly.
        /// </para>
        /// </summary>
        /// <param name="hayBytes">The string to match the pattern against</param>
        /// <param name="startByteIndex">The byte offset to start at</param>
        /// <param name="numCaptures">The number of match groups to return</param>
        /// <returns>An array of byte ranges for the captures</returns>
        private unsafe ByteRange[] RawMatch(
            ReadOnlySpan <byte> hayBytes, int startByteIndex, int numCaptures)
        {
            var captures = new Re2Ffi.cre2_string_t[numCaptures];

            fixed(byte *pinnedHayBytes = hayBytes)
            {
                // TODO: Support anchor as a parameter
                var matchResult = Re2Ffi.cre2_match(
                    RawHandle,
                    pinnedHayBytes, hayBytes.Length,
                    startByteIndex, hayBytes.Length,
                    Re2Ffi.cre2_anchor_t.CRE2_UNANCHORED,
                    captures, captures.Length);

                if (matchResult != 1)
                {
                    return(Array.Empty <ByteRange>());
                }

                // Convert the captured strings to array indices while we still
                // have the haystack pinned. We can't have the haystack move
                // between the `_match` and the conversion to byte ranges
                // otherwise the pointer arithmetic we do will be invalidated.
                return(StringsToRanges(captures, new IntPtr(pinnedHayBytes)));
            }
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Compile the regular expression
        /// </summary>
        /// <param name="patternBytes">
        /// The regex pattern, as a UTF-8 byte array
        /// </param>
        /// <param name="opts">
        /// The regex compilation options, or <c>null</c> to use the default
        /// </param>
        /// <returns>
        /// The raw handle to the Regex, or throws on compilation failure
        /// </returns>
        private static RegexHandle Compile(ReadOnlySpan <byte> patternBytes, Options?opts)
        {
            var handle = Re2Ffi.cre2_new(
                in MemoryMarshal.GetReference(patternBytes), patternBytes.Length,
                opts?.RawHandle ?? new OptionsHandle());

            // Check to see if there was an error compiling this expression
            var errorCode = Re2Ffi.cre2_error_code(handle);

            if (errorCode != Re2Ffi.cre2_error_code_t.CRE2_NO_ERROR)
            {
                var errorString = Re2Ffi.cre2_error_string(handle);
                var error       = Marshal.PtrToStringAnsi(errorString);
                var errorArg    = new Re2Ffi.cre2_string_t();
                Re2Ffi.cre2_error_arg(handle, ref errorArg);
                var offendingPortion = Marshal.PtrToStringAnsi(
                    errorArg.data, errorArg.length);
                // need to clean up the regex
                handle.Dispose();
                throw new RegexCompilationException(error, offendingPortion);
            }

            return(handle);
        }