private FlexMatch ToFlex(Match2 match, FlexString input, ref int lastUtf8Index, ref int lastUtf16Index) { if (match.Index == -1) { return(new FlexMatch() { Success = false, Index = -1, Length = -1, Value = null }); } // Get the value using the UTF-8 string and indices String8 value = ((String8)input).Substring(match.Index, match.Length); // Map the UTF-8 index to UTF-16 int mappedIndex = String8.Utf8ToUtf16(match.Index, input, lastUtf8Index, lastUtf16Index); lastUtf8Index = match.Index; lastUtf16Index = mappedIndex; // Map the length to UTF-16 int mappedEnd = String8.Utf8ToUtf16(match.Index + match.Length, input, lastUtf8Index, lastUtf16Index); int mappedLength = mappedEnd - mappedIndex; // Return the UTF-16 indices but the UTF-8 derived value return(new FlexMatch() { Success = true, Index = mappedIndex, Length = mappedLength, Value = value }); }
/// <summary> /// Match a Regular Expression against a UTF-8 converted body of text, starting at the desired index. /// </summary> /// <param name="text">UTF-8 converted text to match.</param> /// <param name="expression">Regular Expression to match; must not contain named groups or backreferences.</param> /// <param name="options">RegexOptions to use.</param> /// <param name="timeout">Timeout for runtime (checked between matches only).</param> /// <param name="fromIndex">Index in text to start searching from (used to resume matching).</param> /// <returns>IEnumerable of matches found.</returns> public static IEnumerable <Match2> Matches(String8 text, string expression, RegexOptions options = RegexOptions.None, Timeout timeout = default, int fromIndex = 0) { ParsedRegexCache cache = null; try { cache = CheckoutCache(); // Allocate an array to contain matches var matches = new Match2[32]; // Get or Cache the Regex on the native side and retrieve an index to it int expressionIndex = BuildRegex(cache, expression, options); while (true) { // Find the next batch of matches int matchCount = Matches(expressionIndex, text, fromIndex, matches, timeout.RemainingMilliseconds); // Return found matches for (int i = 0; i < matchCount; ++i) { yield return(matches[i]); } // If match array wasn't filled, we're done if (matchCount < matches.Length) { break; } // If the timeout expired, we're done if (timeout.IsExpired) { break; } // Otherwise, resume just after the last match fromIndex = matches[matchCount - 1].Index + 1; } } finally { CheckinCache(cache); } }
/// <summary> /// Return the first match for the given Regular Expression, index -1 if no matches. /// </summary> /// <param name="text">UTF8 text to search within.</param> /// <param name="expression">Regular Expression to match.</param> /// <param name="options">RegexOptions to use.</param> /// <param name="timeout">Timeout in ms.</param> /// <returns>First Match found in text; index will be -1 if no matches found.</returns> public static Match2 Match(String8 text, string expression, RegexOptions options = RegexOptions.None, Timeout timeout = default) { ParsedRegexCache cache = null; try { cache = CheckoutCache(); var matches = new Match2[1]; int expressionIndex = BuildRegex(cache, expression, options); int countFound = Matches(expressionIndex, text, 0, matches, timeout.RemainingMilliseconds); if (countFound == 0) { matches[0].Index = -1; matches[0].Length = -1; } return(matches[0]); } finally { CheckinCache(cache); } }
private string MatchToString(Match2 match, String8 content) { return($"({match.Index}, {match.Length}: '{content.Substring(match.Index, match.Length)}')"); }