public void BloomFilterAddTest() { var addSize = 10000; var testData = DataGenerator.Generate().Take(addSize).ToArray(); var testData2 = DataGenerator.Generate().Skip(addSize).Take(addSize).ToArray(); var errorRate = 0.001F; var size = testData.Length; var configuration = new DefaultBloomFilterConfiguration(); var bloomFilter = new BloomFilter <TestEntity, long>(configuration); bloomFilter.Initialize(2 * size, errorRate); foreach (var itm in testData) { bloomFilter.Add(itm); } var bloomFilter2 = new BloomFilter <TestEntity, long>(configuration); bloomFilter2.Initialize(2 * size, errorRate); foreach (var itm in testData2) { bloomFilter2.Add(itm); } bloomFilter.Add(bloomFilter2); var contained = testData.Union(testData2).Count(item => bloomFilter.ContainsKey(item.Id)); Assert.AreEqual(contained, 2 * addSize, "Not all items found in added Bloom filters"); }
public void BloomFilterAddDifferentSizesTest() { var addSize = 10000; var testData = DataGenerator.Generate().Take(addSize).ToArray(); var testData2 = DataGenerator.Generate().Skip(addSize).Take(addSize).ToArray(); var errorRate = 0.001F; var size = testData.Length; var configuration = new DefaultBloomFilterConfiguration(); var bloomFilter = new BloomFilter <TestEntity, long>(configuration); bloomFilter.Initialize(4 * size, errorRate); foreach (var itm in testData) { bloomFilter.Add(itm); } var bloomFilter2 = new BloomFilter <TestEntity, long>(configuration); //We have to create a foldable version. var data = bloomFilter.Extract(); var foldFactor = configuration.FoldingStrategy.GetAllFoldFactors(data.BlockSize).Where(f => f > 1).OrderBy(f => f).First(); bloomFilter2.Initialize(addSize, data.BlockSize / foldFactor, data.HashFunctionCount); foreach (var itm in testData2) { bloomFilter2.Add(itm); } bloomFilter.Add(bloomFilter2); var contained = testData.Union(testData2).Count(item => bloomFilter.Contains(item)); Assert.AreEqual(2 * addSize, contained, "Not all items found in added Bloom filters"); }
public void BloomFilterRemoveItemTest() { var addSize = 10000; var testData = DataGenerator.Generate().Take(addSize).ToArray(); var errorRate = 0.001F; var size = testData.Length; var configuration = new DefaultBloomFilterConfiguration(); var bloomFilter = new BloomFilter <TestEntity, long>(configuration); bloomFilter.Initialize(2 * size, errorRate); foreach (var itm in testData) { bloomFilter.Add(itm); } var contained = testData.Count(item => bloomFilter.Contains(item)); foreach (var item in testData.Take(addSize / 2)) { bloomFilter.Remove(item); } var containedAfterRemove = testData.Count(item => bloomFilter.Contains(item)); //Bloom filter does not behave well under removal Assert.AreEqual(containedAfterRemove, 4137, "Wrong item count after removal."); }
public void BloomFilterIntersectDifferentFiltersTest() { var addSize = 10000; var testData = DataGenerator.Generate().Take(addSize).ToArray(); var errorRate = 0.001F; var size = testData.Length; var configuration = new DefaultBloomFilterConfiguration(); var bloomFilter = new BloomFilter <TestEntity, long>(configuration); bloomFilter.Initialize(2 * size, errorRate); foreach (var itm in testData) { bloomFilter.Add(itm); } var bloomFilter2 = new BloomFilter <TestEntity, long>(configuration); bloomFilter2.Initialize(2 * size, errorRate); foreach (var itm in testData.Skip(1000)) { bloomFilter2.Add(itm); } bloomFilter.Intersect(bloomFilter2); Assert.IsTrue(bloomFilter.ItemCount >= 9000); var count = testData.Skip(1000).Count(bloomFilter.Contains); Assert.AreEqual(9000, count); }
public void BloomFilterIntersectEqualFiltersTest() { var addSize = 10000; var testData = DataGenerator.Generate().Take(addSize).ToArray(); var errorRate = 0.001F; var size = testData.Length; var configuration = new DefaultBloomFilterConfiguration(); var bloomFilter = new BloomFilter <TestEntity, long>(configuration); bloomFilter.Initialize(2 * size, errorRate); foreach (var itm in testData) { bloomFilter.Add(itm); } var bloomFilter2 = new BloomFilter <TestEntity, long>(configuration); bloomFilter2.Initialize(2 * size, errorRate); foreach (var itm in testData) { bloomFilter2.Add(itm); } bloomFilter.Intersect(bloomFilter2); //item count will be off due to estimated size. Assert.IsTrue(bloomFilter.ItemCount >= addSize); Assert.IsTrue(testData.All(bloomFilter.Contains)); }
public void StandardCompressTest() { var addSize = 10000; var errorRate = 0.001F; var data = DataGenerator.Generate().Take(addSize).ToArray(); var filter = new BloomFilter <TestEntity, long>(new DefaultBloomFilterConfiguration()); filter.Initialize(data.Length, errorRate); foreach (var item in data) { filter.Add(item); } var basecount = DataGenerator .Generate() .Skip(addSize) .Take(addSize) .Count(itm => filter.ContainsKey(itm.Id)); Assert.IsTrue(basecount <= errorRate * addSize); filter.Initialize(50 * data.Length, errorRate); Assert.AreEqual(filter.Capacity, 500000, "Unexpected size of reverse Bloom filter."); foreach (var item in data) { filter.Add(item); } //check error rate. var notFoundCount = DataGenerator.Generate().Skip(addSize).Take(addSize).Count(itm => filter.Contains(itm)); Assert.IsTrue(notFoundCount <= basecount, "Uncompressed Bloom filter exceeded error rate."); Assert.IsTrue(data.All(d => filter.ContainsKey(d.Id)), "False negatives found in uncompressed filter"); filter = filter.Compress(true); Assert.AreEqual(filter.Capacity, 21739, "Unexpected size of compressed Bloom filter."); var compressNotFoundCount = DataGenerator.Generate().Skip(addSize).Take(addSize).Count(itm => filter.ContainsKey(itm.Id)); Assert.IsTrue(data.All(d => filter.ContainsKey(d.Id)), "False negatives found in compressed filter"); Assert.IsTrue(compressNotFoundCount <= basecount, "Compressed Bloom filter exceeded error rate."); }
public void BloomFilterQuasiDecodeTest() { var size = 100000; var data = DataGenerator.Generate().Take(size).ToList(); var errorRate = 0.001F; var configuration = new DefaultBloomFilterConfiguration(); var bloomFilter = new BloomFilter <TestEntity, long>(configuration); bloomFilter.Initialize(size, errorRate); foreach (var itm in data) { bloomFilter.Add(itm); } data = DataGenerator.Generate().Skip(500).Take(8000).ToList(); data.Modify(1000); var estimate = bloomFilter.QuasiDecode(data); Assert.IsTrue(estimate > 90500 && estimate < 95000, "Unexpected estimate for difference."); }
public void HybridFalsePositiveTest() { var addSize = 10000; var testData = DataGenerator.Generate().Take(addSize).ToArray(); var errorRate = 0.001F; var size = testData.Length; var configuration = new DefaultBloomFilterConfiguration(); var bloomFilter = new BloomFilter <TestEntity, long>(configuration); bloomFilter.Initialize(size, errorRate); foreach (var itm in testData) { bloomFilter.Add(itm); } var notFoundCount = testData.Count(itm => !bloomFilter.Contains(itm)); Assert.IsTrue(notFoundCount == 0, "False negative error rate violated"); notFoundCount = DataGenerator.Generate().Skip(addSize).Take(addSize).Count(itm => bloomFilter.Contains(itm)); Assert.IsTrue(notFoundCount <= errorRate * addSize, "False positive error rate violated"); }
public void BloomFilterSimpleFold() { var addSize = 50; var testData = DataGenerator.Generate().Take(addSize).ToArray(); var size = testData.Length; var configuration = new DefaultBloomFilterConfiguration(); var bloomFilter = new BloomFilter <TestEntity, long>(configuration); bloomFilter.Initialize(size, 1024, (uint)3); foreach (var itm in testData) { bloomFilter.Add(itm); } var positiveCount = DataGenerator.Generate().Take(500).Count(itm => bloomFilter.Contains(itm)); var folded = bloomFilter.Fold(4); var positiveCountAfterFold = DataGenerator.Generate().Take(500).Count(itm => bloomFilter.Contains(itm)); Assert.AreEqual(positiveCount, positiveCountAfterFold, "False positive count different after fold"); Assert.AreEqual(256, folded.BlockSize, "Folded block size is unexpected."); Assert.IsTrue(testData.All(bloomFilter.Contains), "False negative found"); }