// Return the Regex Cache after use via a finally block. private static void CheckinCache(ParsedRegexCache cache) { if (cache != null) { ParsedRegexes.Add(cache); } }
// Retrieve a Regex Cache before each match to reuse parsed Regex objects. // We don't keep a threadlocal one so that they aren't leaked if threads are discarded. private static ParsedRegexCache CheckoutCache() { ParsedRegexCache cache; if (!ParsedRegexes.TryTake(out cache)) { Interlocked.Increment(ref _regexThreadCacheCount); cache = new ParsedRegexCache(); } return(cache); }
// Get the integer ID of the cached copy of the Regex from the native side; cache it if it hasn't been parsed. private static unsafe int BuildRegex(ParsedRegexCache cache, string expression, RegexOptions options) { if (string.IsNullOrEmpty(expression)) { throw new ArgumentNullException(nameof(expression)); } try { var key = Tuple.Create <string, RegexOptions>(expression, options); int expressionIndex; if (!cache.TryGetValue(key, out expressionIndex)) { // Remove named groups from the expression (on add only, so once per Regex only) expression = RemoveNamedGroups(expression); byte[] buffer = null; var expression8 = String8.Convert(expression, ref buffer); // The native BuildRegex code is thread-safe for creating compiled expressions. fixed(byte *expressionPtr = expression8.Array) { expressionIndex = NativeMethods.BuildRegex(new String8Interop(expressionPtr, expression8.Index, expression8.Length), (int)options); } // Throw if RE2 couldn't parse the regex. // Error Text is native allocated and so not returned; it's written to the console by RE2. if (expressionIndex == -1) { throw new ArgumentException($"RE2 could not parse regular expression \"{expression}\"."); } // Throw if RE2 couldn't support a passed RegexOption. if (expressionIndex == -2) { throw new ArgumentException($"RE2 doesn't support a passed RegexOption. Supported Options: Singleline, IgnoreCase. Options passed: {options}"); } cache[key] = expressionIndex; } return(expressionIndex); } catch (DllNotFoundException ex) { // Throw a clearer exception if RE2.Native.*.dll wasn't found in any of the DLL loading paths. throw new InvalidOperationException($"RE2.Native.*.dll was not found. It's required for RE2.Managed to run. Place RE2.Native.*.dll next to RE2.Managed.dll in '{Assembly.GetExecutingAssembly().Location}'. HR: {ex.HResult}", ex); } }
/// <summary> /// Match a Regular Expression against a UTF-8 converted body of text, starting at the desired index. /// </summary> /// <param name="text">UTF-8 converted text to match.</param> /// <param name="expression">Regular Expression to match; must not contain named groups or backreferences.</param> /// <param name="options">RegexOptions to use.</param> /// <param name="timeout">Timeout for runtime (checked between matches only).</param> /// <param name="fromIndex">Index in text to start searching from (used to resume matching).</param> /// <returns>IEnumerable of matches found.</returns> public static IEnumerable <Match2> Matches(String8 text, string expression, RegexOptions options = RegexOptions.None, Timeout timeout = default, int fromIndex = 0) { ParsedRegexCache cache = null; try { cache = CheckoutCache(); // Allocate an array to contain matches var matches = new Match2[32]; // Get or Cache the Regex on the native side and retrieve an index to it int expressionIndex = BuildRegex(cache, expression, options); while (true) { // Find the next batch of matches int matchCount = Matches(expressionIndex, text, fromIndex, matches, timeout.RemainingMilliseconds); // Return found matches for (int i = 0; i < matchCount; ++i) { yield return(matches[i]); } // If match array wasn't filled, we're done if (matchCount < matches.Length) { break; } // If the timeout expired, we're done if (timeout.IsExpired) { break; } // Otherwise, resume just after the last match fromIndex = matches[matchCount - 1].Index + 1; } } finally { CheckinCache(cache); } }
/// <summary> /// Return the first match for the given Regular Expression, index -1 if no matches. /// </summary> /// <param name="text">UTF8 text to search within.</param> /// <param name="expression">Regular Expression to match.</param> /// <param name="options">RegexOptions to use.</param> /// <param name="timeout">Timeout in ms.</param> /// <returns>First Match found in text; index will be -1 if no matches found.</returns> public static Match2 Match(String8 text, string expression, RegexOptions options = RegexOptions.None, Timeout timeout = default) { ParsedRegexCache cache = null; try { cache = CheckoutCache(); var matches = new Match2[1]; int expressionIndex = BuildRegex(cache, expression, options); int countFound = Matches(expressionIndex, text, 0, matches, timeout.RemainingMilliseconds); if (countFound == 0) { matches[0].Index = -1; matches[0].Length = -1; } return(matches[0]); } finally { CheckinCache(cache); } }