private void BuildBloomFilter(BloomFilterParameters parameters, GeneratedData data) { if (parameters.Disabled) { return; } using (new Timer("DataGenerator.BuildBloomFilter")) { var filter = new BloomFilter(parameters.FilterSizeBytes * 8, parameters.HashFunctionsCount); var counter = new TestExecutor.MatchCounter(); foreach (var word in Words.Value) { // if (word.Length < 3) continue; if (!TestExecutor.Match(data, word, data.Parameters, counter, skipBloomFilter: true)) { continue; } var processed = ProcessWordForBloomFilter(parameters, word); filter.add(processed); } data.BloomFilter = filter; using (new Timer("DataGenerator.BuildBloomFilter[retouch]")) { if (parameters.RetouchWordCount > 0) { var retouched = FakeWordsByFrequency.Value .Where(t => t.Item2 > 2) .Where(t => TestExecutor.Match(data, t.Item1, data.Parameters, counter)) .Take(parameters.RetouchWordCount.Value) .ToArray(); foreach (var tuple in retouched) { var word = tuple.Item1; var processed = ProcessWordForBloomFilter(parameters, word); filter.retouch(processed, parameters.RetouchMaxWeight ?? 0); } // WriteRetouched(data.Parameters.Id.Value, retouched); } else if (parameters.RetouchMinRelWeight != null) { var falsePositives = FakeWordsByFrequency.Value .Where(t => t.Item2 > 2) .Where(t => TestExecutor.Match(data, t.Item1, data.Parameters, counter)) .ToArray(); var falsePositivesBF = new BloomFilter(filter.m, filter.k); foreach (var fp in falsePositives) { var processed = ProcessWordForBloomFilter(parameters, fp.Item1); falsePositivesBF.add(processed, fp.Item2); } filter.retouch(falsePositivesBF, parameters.RetouchMinRelWeight.Value); } } } }