Exemple #1
0
        private static IEnumerable <string> GenerateEmails(
            SubstringsData substrings,
            SubstringWithCountList domains,
            SubstringWithCountList tlds,
            LengthDistribution lengthDistribution,
            SampleValuesGeneratorConfig.Result config)
        {
            var rand = new Random(Environment.TickCount);

            return(Enumerable.Range(0, 100 * config.SamplesToPublish)
                   .Select(_ => GenerateEmail(substrings, domains, tlds, lengthDistribution, config, rand))
                   .Where(email => !string.IsNullOrEmpty(email))
                   .Take(config.SamplesToPublish));
        }
Exemple #2
0
        private static string GenerateEmail(
            SubstringsData substrings,
            SubstringWithCountList domains,
            SubstringWithCountList tlds,
            LengthDistribution lengthDistribution,
            SampleValuesGeneratorConfig.Result config,
            Random rand)
        {
            // create local-part section
            var str = GenerateString(substrings, lengthDistribution, minLength: 6, rand);

            if (string.IsNullOrEmpty(str))
            {
                return(string.Empty);
            }
            var allParts  = str.Split('@', StringSplitOptions.RemoveEmptyEntries);
            var sb        = new StringBuilder();
            var partIndex = 0;
            var pnext     = 1;

            while (partIndex < allParts.Length && rand.NextDouble() <= pnext)
            {
                sb.Append(allParts[partIndex]);
                pnext /= 2;
                partIndex++;
            }
            var localParts = sb.ToString()
                             .Split('.', StringSplitOptions.RemoveEmptyEntries)
                             .Where(s => (s.Length == 1 || s.Length > 3) && !BannedWords.Contains(s.ToUpperInvariant()));
            var localPart = string.Join('.', localParts);

            if (string.IsNullOrEmpty(localPart))
            {
                return(string.Empty);
            }
            if (domains.TotalCount > config.MinValuesForCategoricalSampling)
            {
                // if the number of distinct domains is big enough we select one from the extracted list
                return(localPart + domains.GetRandomValue(rand));
            }

            // create domain section
            sb.Clear();
            while (partIndex < allParts.Length)
            {
                sb.Append(allParts[partIndex]);
                partIndex++;
            }
            var domainParts = sb.ToString()
                              .Split('.', StringSplitOptions.RemoveEmptyEntries)
                              .Where(p => p.Length > 3 && !BannedWords.Contains(p.ToUpperInvariant()));
            var domain = rand.NextDouble() > 0.15 ?
                         domainParts.Aggregate(string.Empty, (max, cur) => max.Length > cur.Length ? max : cur) :
                         string.Join('.', domainParts);

            if (string.IsNullOrEmpty(domain) || domain.Length < 4)
            {
                return(string.Empty);
            }
            return(localPart + "@" + domain + tlds.GetRandomValue(rand));
        }