private static IEnumerable <string> GenerateEmails( SubstringsData substrings, SubstringWithCountList domains, SubstringWithCountList tlds, LengthDistribution lengthDistribution, SampleValuesGeneratorConfig.Result config) { var rand = new Random(Environment.TickCount); return(Enumerable.Range(0, 100 * config.SamplesToPublish) .Select(_ => GenerateEmail(substrings, domains, tlds, lengthDistribution, config, rand)) .Where(email => !string.IsNullOrEmpty(email)) .Take(config.SamplesToPublish)); }
private static string GenerateEmail( SubstringsData substrings, SubstringWithCountList domains, SubstringWithCountList tlds, LengthDistribution lengthDistribution, SampleValuesGeneratorConfig.Result config, Random rand) { // create local-part section var str = GenerateString(substrings, lengthDistribution, minLength: 6, rand); if (string.IsNullOrEmpty(str)) { return(string.Empty); } var allParts = str.Split('@', StringSplitOptions.RemoveEmptyEntries); var sb = new StringBuilder(); var partIndex = 0; var pnext = 1; while (partIndex < allParts.Length && rand.NextDouble() <= pnext) { sb.Append(allParts[partIndex]); pnext /= 2; partIndex++; } var localParts = sb.ToString() .Split('.', StringSplitOptions.RemoveEmptyEntries) .Where(s => (s.Length == 1 || s.Length > 3) && !BannedWords.Contains(s.ToUpperInvariant())); var localPart = string.Join('.', localParts); if (string.IsNullOrEmpty(localPart)) { return(string.Empty); } if (domains.TotalCount > config.MinValuesForCategoricalSampling) { // if the number of distinct domains is big enough we select one from the extracted list return(localPart + domains.GetRandomValue(rand)); } // create domain section sb.Clear(); while (partIndex < allParts.Length) { sb.Append(allParts[partIndex]); partIndex++; } var domainParts = sb.ToString() .Split('.', StringSplitOptions.RemoveEmptyEntries) .Where(p => p.Length > 3 && !BannedWords.Contains(p.ToUpperInvariant())); var domain = rand.NextDouble() > 0.15 ? domainParts.Aggregate(string.Empty, (max, cur) => max.Length > cur.Length ? max : cur) : string.Join('.', domainParts); if (string.IsNullOrEmpty(domain) || domain.Length < 4) { return(string.Empty); } return(localPart + "@" + domain + tlds.GetRandomValue(rand)); }