private void LoadRegexCache() { const string weekdaysRegex = @"(Sun|Mon|Tue|Wed|Thu|Fri|Sat)"; const string monthDayRegex = @"((0?[1-9]|[1-2][0-9]|3[01])(st|nd|rd|th)?)"; var wordedMonthRegex = "((Jan(uary)?)|(Feb(ruary)?)|(Mar(ch)?)|(Apr(il)?)|May|(Jun(e)?)|(Jul(y)?)|(Aug(ust)?)|(Sep(tember)?)|(Oct(ober?))|(Nov(ember)?)|(Dec(ember)?))"; wordedMonthRegex = $"({wordedMonthRegex}|{wordedMonthRegex.ToUpperInvariant()})"; const string numberedMonthRegex = @"((0?[1-9]|1[0-2]))"; const string yearRegex = "(19[0-9]{2}|[2-9][0-9]{3}|[0-9]{2})"; const string timeRegex = @"(\s+(2[0-3]|[0-1]?[0-9]):([0-5][0-9])(:(60|[0-5][0-9]))?)"; const string timezoneRegex = @"(([-\\+][0-9]{2}[0-5][0-9]|(?:UT|GMT|(?:E|C|M|P)(?:ST|DT)|[A-IK-Z])))"; var dateTimeFormats = new[] { $@"\s*{weekdaysRegex}?{monthDayRegex}[.\\/-]\s*{numberedMonthRegex}[.\\/-]\s*{yearRegex}{timeRegex}?{timezoneRegex}?", $@"\s*{weekdaysRegex}?{monthDayRegex}[ .\\/-]{wordedMonthRegex}[ .\\/-]{yearRegex}{timeRegex}?{timezoneRegex}?", $@"\s*{weekdaysRegex}?{numberedMonthRegex}[.\\/-]{yearRegex}[ .\\/-]{timeRegex}?{timezoneRegex}?", $@"\s*{weekdaysRegex}?{wordedMonthRegex}[ .\\/-]{monthDayRegex}[ .\\/-]{timeRegex}?{timezoneRegex}?\s+{yearRegex}", $@"\s*{wordedMonthRegex}\s*{yearRegex}([ .\\/-]{timeRegex}{timezoneRegex}?)?" }; var streetAbbreviations = new[] { "Ave", "Blvd", "Bdwy", "Cir", "Cl", "Ct", "Cr", "Dr", "Gdn", "Gdns", "Gn", "Gr", "Ln", "Mt", "Pl", "Pk", "Rdg", "Rd", "Sq", "St", "Ter", "Val" }; var streetTypeRegexes = streetAbbreviations.Select(i => { var regex = new StringBuilder("(\\s"); foreach (var c in i) { regex.Append($"[{c.ToString().ToUpperInvariant()}{c.ToString().ToLowerInvariant()}]"); } regex.Append(".)"); return(regex.ToString()); }).ToArray(); // date/times RegexCache.Add(new Regex(string.Join("|", dateTimeFormats), RegexOptions.Compiled)); // phone numbers RegexCache.Add(new Regex(@"(([0]|((\+|00)[0-9]{1-3}))[0-9][0-9][0-9]\s*[0-9]\s*[0-9][0-9]\s*[0-9]\s*[0-9][0-9][0-9])", RegexOptions.Compiled)); // nhs numbers RegexCache.Add(new Regex(@"([0-9][0-9][0-9][ -]?[0-9][0-9][0-9][ -]?[0-9][0-9][0-9][0-9])", RegexOptions.Compiled)); RegexCache.Add(new Regex(@"([0-9][0-9][0-9][ -][0-9][0-9][0-9][0-9][ -][0-9][0-9][0-9])", RegexOptions.Compiled)); // email addresses RegexCache.Add(new Regex( @"((?:[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*|""(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21\x23-\x5b\x5d-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])*"")@(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\[(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?|[a-z0-9-]*[a-z0-9]:(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21-\x5a\x53-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])+)\]))", RegexOptions.Compiled)); // post codes RegexCache.Add(new Regex( $@"(([,\sa-zA-Z0-9]|{string.Join("|", streetTypeRegexes)})*[,]\s)?(([gG][iI][rR] {{0,}}0[aA]{{2}})|((([a-pr-uwyzA-PR-UWYZ][a-hk-yA-HK-Y]?[0-9][0-9]?)|(([a-pr-uwyzA-PR-UWYZ][0-9][a-hjkstuwA-HJKSTUW])|([a-pr-uwyzA-PR-UWYZ][a-hk-yA-HK-Y][0-9O][abehmnprv-yABEHMNPRV-Y])))\s*[0-9O][abd-hjlnp-uw-zABD-HJLNP-UW-Z]{{2}}))", RegexOptions.Compiled)); }