public void supports_adding_long_strings() { var charset = GenerateCharset(); var random = new Random(123); var longString = GenerateRandomString(10000, charset, random); Assert.IsFalse(_filter.MightContain(longString)); _filter.Add(longString); Assert.IsTrue(_filter.MightContain(longString)); }
public void can_close_and_reopen() { _filter.Add("hello"); _filter.Dispose(); using var newFilter = new MemoryMappedFileStreamBloomFilter(_path, create: false, MemoryMappedFileBloomFilter.MinSizeKB * 1000, hasher: null); Assert.IsTrue(newFilter.MightContain("hello")); }
public void has_false_positives_with_probability_p( [Values(MemoryMappedFileBloomFilter.MinSizeKB * 1000, 2 * MemoryMappedFileBloomFilter.MinSizeKB * 1000)] long size, [Values(0.001, 0.02, 0.05, 0.1, 0.2)] double p ) { using var filter = new MemoryMappedFileStreamBloomFilter(GetTempFilePath(), create: true, size, hasher: null); var n = (int)filter.CalculateOptimalNumItems(p); var random = new Random(123); var charset = GenerateCharset(); var list = new List <string>(); var selected = new HashSet <string>(); //generate 2n distinct items for (int i = 0; i < 2 * n; i++) { while (true) { var length = 1 + random.Next() % 10; var s = GenerateRandomString(length, charset, random); if (selected.Contains(s)) { continue; } list.Add(s); selected.Add(s); break; } } //add first n distinct items to the filter for (int i = 0; i < n; i++) { filter.Add(list[i]); } //expected number of false positives var expectedFalsePositives = Convert.ToInt32(Math.Ceiling(n * p)); //the second n distinct items should not exist but there may be some false positives var falsePositives = 0; for (var i = n; i < 2 * n; i++) { if (filter.MightContain(list[i])) { falsePositives++; } } //X = random variable that takes value 1 with probability p and value 0 with probability (1-p) //var(X) = E(X^2) - E(X)^2 = p - p*p; //var(X1 + X2 + X3 + ... + Xn) = n*var(Xi); //variance of n uncorrelated random variables //var(X1 + X2 + X3 + ... + Xn) = n*(p-p*p); var variance = n * (p - (p * p)); var standardDeviation = Math.Sqrt(variance); var threeStandardDeviations = 3 * standardDeviation; //99.7% if (falsePositives > 0) { Console.Out.WriteLine("n: {0:N0}, p:{1:N3}. Found {2:N0} false positives. Expected false positives: {3:N0}. Standard deviation: {4:N2}", n, p, falsePositives, expectedFalsePositives, standardDeviation); } Assert.LessOrEqual(falsePositives, expectedFalsePositives + threeStandardDeviations); Assert.GreaterOrEqual(falsePositives, Math.Max(0, expectedFalsePositives - threeStandardDeviations)); }