/// <summary> /// Managed wrapper around the raw match API. /// <para>From the RE2 docs for the underlying function: /// Don't ask for more match information than you will use: /// runs much faster with nsubmatch == 1 than nsubmatch > 1, and /// runs even faster if nsubmatch == 0. /// Doesn't make sense to use nsubmatch > 1 + NumberOfCapturingGroups(), /// but will be handled correctly. /// </para> /// </summary> /// <param name="hayBytes">The string to match the pattern against</param> /// <param name="startByteIndex">The byte offset to start at</param> /// <param name="numCaptures">The number of match groups to return</param> /// <returns>An array of byte ranges for the captures</returns> private unsafe ByteRange[] RawMatch( ReadOnlySpan <byte> hayBytes, int startByteIndex, int numCaptures) { var captures = new Re2Ffi.cre2_string_t[numCaptures]; fixed(byte *pinnedHayBytes = hayBytes) { // TODO: Support anchor as a parameter var matchResult = Re2Ffi.cre2_match( RawHandle, pinnedHayBytes, hayBytes.Length, startByteIndex, hayBytes.Length, Re2Ffi.cre2_anchor_t.CRE2_UNANCHORED, captures, captures.Length); if (matchResult != 1) { return(Array.Empty <ByteRange>()); } // Convert the captured strings to array indices while we still // have the haystack pinned. We can't have the haystack move // between the `_match` and the conversion to byte ranges // otherwise the pointer arithmetic we do will be invalidated. return(StringsToRanges(captures, new IntPtr(pinnedHayBytes))); } }
/// <summary> /// Compile the regular expression /// </summary> /// <param name="patternBytes"> /// The regex pattern, as a UTF-8 byte array /// </param> /// <param name="opts"> /// The regex compilation options, or <c>null</c> to use the default /// </param> /// <returns> /// The raw handle to the Regex, or throws on compilation failure /// </returns> private static RegexHandle Compile(ReadOnlySpan <byte> patternBytes, Options?opts) { var handle = Re2Ffi.cre2_new( in MemoryMarshal.GetReference(patternBytes), patternBytes.Length, opts?.RawHandle ?? new OptionsHandle()); // Check to see if there was an error compiling this expression var errorCode = Re2Ffi.cre2_error_code(handle); if (errorCode != Re2Ffi.cre2_error_code_t.CRE2_NO_ERROR) { var errorString = Re2Ffi.cre2_error_string(handle); var error = Marshal.PtrToStringAnsi(errorString); var errorArg = new Re2Ffi.cre2_string_t(); Re2Ffi.cre2_error_arg(handle, ref errorArg); var offendingPortion = Marshal.PtrToStringAnsi( errorArg.data, errorArg.length); // need to clean up the regex handle.Dispose(); throw new RegexCompilationException(error, offendingPortion); } return(handle); }