/// <summary> /// Compile a given set of patterns to a raw regex set handle /// </summary> /// <param name="patternsAsBytes">The collection of patterns</param> /// <param name="options"> /// The regular expression options to use when compiling /// </param> /// <returns>The raw set handle or throws an exception</returns> private static RegexSetHandle CompileSetWithOptions( IReadOnlyCollection <byte[]> patternsAsBytes, Options options) { // TODO: we could maybe have a `RegexSetBuilder` to represent this // stage of regex set compilation. var handle = Re2Ffi.cre2_set_new( options.RawHandle, Re2Ffi.cre2_anchor_t.CRE2_UNANCHORED); var errBuff = new byte[100]; foreach (var pattern in patternsAsBytes) { var r = Re2Ffi.cre2_set_add( handle, pattern, new UIntPtr((uint)pattern.Length), errBuff, new UIntPtr((uint)errBuff.Length)); if (r < 0) { // If the regex failed to add then throw var error = Encoding.UTF8.GetString(errBuff); handle.Dispose(); throw new RegexCompilationException( error, Encoding.UTF8.GetString(pattern)); } } if (Re2Ffi.cre2_set_compile(handle) != 1) { throw new RegexCompilationException("Error compiling regex set"); } return(handle); }
/// <summary> /// Managed wrapper around the raw match API. /// <para>From the RE2 docs for the underlying function: /// Don't ask for more match information than you will use: /// runs much faster with nsubmatch == 1 than nsubmatch > 1, and /// runs even faster if nsubmatch == 0. /// Doesn't make sense to use nsubmatch > 1 + NumberOfCapturingGroups(), /// but will be handled correctly. /// </para> /// </summary> /// <param name="hayBytes">The string to match the pattern against</param> /// <param name="startByteIndex">The byte offset to start at</param> /// <param name="numCaptures">The number of match groups to return</param> /// <returns>An array of byte ranges for the captures</returns> private unsafe ByteRange[] RawMatch( ReadOnlySpan <byte> hayBytes, int startByteIndex, int numCaptures) { var captures = new Re2Ffi.cre2_string_t[numCaptures]; fixed(byte *pinnedHayBytes = hayBytes) { // TODO: Support anchor as a parameter var matchResult = Re2Ffi.cre2_match( RawHandle, pinnedHayBytes, hayBytes.Length, startByteIndex, hayBytes.Length, Re2Ffi.cre2_anchor_t.CRE2_UNANCHORED, captures, captures.Length); if (matchResult != 1) { return(Array.Empty <ByteRange>()); } // Convert the captured strings to array indices while we still // have the haystack pinned. We can't have the haystack move // between the `_match` and the conversion to byte ranges // otherwise the pointer arithmetic we do will be invalidated. return(StringsToRanges(captures, new IntPtr(pinnedHayBytes))); } }
/// <summary> /// Advance the enumerator /// </summary> /// <returns>True if <see cref="Current" /> now points to a valid /// <see cref="NamedCaptureGroup" /></returns> public unsafe bool MoveNext() { if (Re2Ffi.cre2_named_groups_iter_next(RawHandle, out var namePtr, out var index)) { var name = Marshal.PtrToStringAnsi(new IntPtr(namePtr)); _current = new NamedCaptureGroup(name, index); return(true); }
/// <summary> /// Checks if the pattern matches somewhere in the given /// <paramref name="haystack" />. /// </summary> /// <param name="haystack">The text to find the pattern in</param> /// <returns>True if the pattern matches, false otherwise.</returns> public unsafe bool IsMatch(ReadOnlySpan <byte> haystack) { var captures = Array.Empty <Re2Ffi.cre2_string_t>(); fixed(byte *hayBytesPtr = haystack) { // TODO: Support anchor as a parameter var matchResult = Re2Ffi.cre2_match( RawHandle, hayBytesPtr, haystack.Length, 0, haystack.Length, Re2Ffi.cre2_anchor_t.CRE2_UNANCHORED, captures, 0); return(matchResult == 1); } }
/// <summary> /// Compile the regular expression /// </summary> /// <param name="patternBytes"> /// The regex pattern, as a UTF-8 byte array /// </param> /// <param name="opts"> /// The regex compilation options, or <c>null</c> to use the default /// </param> /// <returns> /// The raw handle to the Regex, or throws on compilation failure /// </returns> private static RegexHandle Compile(ReadOnlySpan <byte> patternBytes, Options?opts) { var handle = Re2Ffi.cre2_new( in MemoryMarshal.GetReference(patternBytes), patternBytes.Length, opts?.RawHandle ?? new OptionsHandle()); // Check to see if there was an error compiling this expression var errorCode = Re2Ffi.cre2_error_code(handle); if (errorCode != Re2Ffi.cre2_error_code_t.CRE2_NO_ERROR) { var errorString = Re2Ffi.cre2_error_string(handle); var error = Marshal.PtrToStringAnsi(errorString); var errorArg = new Re2Ffi.cre2_string_t(); Re2Ffi.cre2_error_arg(handle, ref errorArg); var offendingPortion = Marshal.PtrToStringAnsi( errorArg.data, errorArg.length); // need to clean up the regex handle.Dispose(); throw new RegexCompilationException(error, offendingPortion); } return(handle); }
public NamedCaptureEnumerator(Regex regex) : base(Re2Ffi.cre2_named_groups_iter_new(regex.RawHandle)) { }
/// <summary> /// Create a new <see cref="Options" /> instance with the defaults. /// </summary> public Options() : base(Re2Ffi.cre2_opt_new()) { }
/// <summary> /// Find a capture group index by name /// </summary> /// <param name="name">The named capture to search for</param> /// <returns>The capture group index, or -1 if no named group exists</returns> public int FindNamedCapture(string name) => Re2Ffi.cre2_find_named_capturing_groups(RawHandle, name);