static EmailFilter() { //This cannot be run in the constructor and needs to run in a separate thread //not our fault, see here: https://blogs.msdn.microsoft.com/pfxteam/2011/05/03/static-constructor-deadlocks/ new Thread((() => { //Create a set of IP addresses for known bad domains //used to reverse filter any future MX lookups for a match //this will catch aliases for temporary email address services BlockedMxAddresses = new HashSet <IPAddress>(); Parallel.ForEach(MxBlackList, () => new HashSet <IPAddress>(), (domain, state, local) => { try { var results = DnsLookup.GetMXRecords(domain, out bool found); foreach (var result in results) { DnsLookup.GetIpAddresses(result, out var addresses); if (addresses == null) { continue; } foreach (var ip in addresses) { local.Add(ip); } } } catch { } return(local); }, (local) => { foreach (var ip in local) { BlockedMxAddresses.Add(ip); } }); ReverseDnsCompleteEvent.Set(); })).Start(); }
static public bool IsProbablyFakeEmail(string email, int meanness, bool validateMx = false) { if (validateMx && !ReverseDnsComplete) { ReverseDnsCompleteEvent.Wait(); ReverseDnsComplete = true; } if (string.IsNullOrWhiteSpace(email)) { return(true); } //Instead of making all the regex rules case-insensitive email = email.ToLower(); if (!IsValidFormat(email)) { return(true); } var mailAddress = new MailAddress(email); if (meanness >= 0) { if (DomainMinimumPrefix.TryGetValue(mailAddress.Host, out var minimumPrefix) && minimumPrefix > mailAddress.User.Length) { return(true); } if (MistypedDomains.Contains(mailAddress.Host)) { return(true); } if (MistypedTldRegex.IsMatch(mailAddress.Host)) { return(true); } } if (meanness >= 1) { if (BlockedDomains.Contains(mailAddress.Host)) { return(true); } } if (meanness >= 2) { if (ExpressionRegex.IsMatch(mailAddress.User)) { return(true); } } if (meanness >= 4) { if (RepeatedCharsRegex.IsMatch(mailAddress.User) || RepeatedCharsRegex.IsMatch(mailAddress.Host)) { return(true); } } if (meanness >= 5) { if (NumericEmailRegex.IsMatch(mailAddress.User)) { return(true); } if (ExpressionRegex.IsMatch(email)) { return(true); } } if (meanness >= 6) { if (QwertyRegex.IsMatch(mailAddress.User)) { return(true); } if (QwertyDomainRegex.IsMatch(mailAddress.Host)) { return(true); } if (NumericDomainRegex.IsMatch(mailAddress.Host)) { return(true); } } if (meanness >= 7) { //this is including the tld, so 3 is insanely generous //2 letters + period + 3 tld = 6 if (mailAddress.Host.Length < 6) { return(true); } } if (meanness >= 8) { if (mailAddress.User.Length < 3) { return(true); } } if (meanness >= 9) { if (mailAddress.User.Length < 5) { return(true); } } if (meanness >= 10) { if (TldRegex.IsMatch(mailAddress.Host)) { return(true); } } //Do this last because it's the most expensive if (validateMx) { if (!ValidDomainCache.Contains(mailAddress.Host)) { bool mxFound; var mxRecords = DnsLookup.GetMXRecords(mailAddress.Host, out mxFound); if (!mxFound || !mxRecords.Any()) { //no MX record associated with this address or timeout return(true); } //compare against blacklist foreach (var record in mxRecords) { DnsLookup.GetIpAddresses(record, out var addresses); if (addresses != null && addresses.Any(BlockedMxAddresses.Contains)) { //this mx record points to the same IP as a blacklisted MX record or timeout return(true); } } lock (ValidDomainCache) { ValidDomainCache.Add(mailAddress.Host); } } } return(false); }