예제 #1
0
        private void BuildBloomFilter(BloomFilterParameters parameters, GeneratedData data)
        {
            if (parameters.Disabled)
            {
                return;
            }

            using (new Timer("DataGenerator.BuildBloomFilter"))
            {
                var filter  = new BloomFilter(parameters.FilterSizeBytes * 8, parameters.HashFunctionsCount);
                var counter = new TestExecutor.MatchCounter();
                foreach (var word in Words.Value)
                {
                    // if (word.Length < 3) continue;
                    if (!TestExecutor.Match(data, word, data.Parameters, counter, skipBloomFilter: true))
                    {
                        continue;
                    }

                    var processed = ProcessWordForBloomFilter(parameters, word);
                    filter.add(processed);
                }

                data.BloomFilter = filter;

                using (new Timer("DataGenerator.BuildBloomFilter[retouch]"))
                {
                    if (parameters.RetouchWordCount > 0)
                    {
                        var retouched = FakeWordsByFrequency.Value
                                        .Where(t => t.Item2 > 2)
                                        .Where(t => TestExecutor.Match(data, t.Item1, data.Parameters, counter))
                                        .Take(parameters.RetouchWordCount.Value)
                                        .ToArray();

                        foreach (var tuple in retouched)
                        {
                            var word      = tuple.Item1;
                            var processed = ProcessWordForBloomFilter(parameters, word);
                            filter.retouch(processed, parameters.RetouchMaxWeight ?? 0);
                        }

                        // WriteRetouched(data.Parameters.Id.Value, retouched);
                    }
                    else if (parameters.RetouchMinRelWeight != null)
                    {
                        var falsePositives = FakeWordsByFrequency.Value
                                             .Where(t => t.Item2 > 2)
                                             .Where(t => TestExecutor.Match(data, t.Item1, data.Parameters, counter))
                                             .ToArray();

                        var falsePositivesBF = new BloomFilter(filter.m, filter.k);
                        foreach (var fp in falsePositives)
                        {
                            var processed = ProcessWordForBloomFilter(parameters, fp.Item1);
                            falsePositivesBF.add(processed, fp.Item2);
                        }

                        filter.retouch(falsePositivesBF, parameters.RetouchMinRelWeight.Value);
                    }
                }
            }
        }