Пример #1
0
        public void BloomFilterAddTest()
        {
            var addSize       = 10000;
            var testData      = DataGenerator.Generate().Take(addSize).ToArray();
            var testData2     = DataGenerator.Generate().Skip(addSize).Take(addSize).ToArray();
            var errorRate     = 0.001F;
            var size          = testData.Length;
            var configuration = new DefaultBloomFilterConfiguration();
            var bloomFilter   = new BloomFilter <TestEntity, long>(configuration);

            bloomFilter.Initialize(2 * size, errorRate);
            foreach (var itm in testData)
            {
                bloomFilter.Add(itm);
            }
            var bloomFilter2 = new BloomFilter <TestEntity, long>(configuration);

            bloomFilter2.Initialize(2 * size, errorRate);
            foreach (var itm in testData2)
            {
                bloomFilter2.Add(itm);
            }
            bloomFilter.Add(bloomFilter2);
            var contained = testData.Union(testData2).Count(item => bloomFilter.ContainsKey(item.Id));

            Assert.AreEqual(contained, 2 * addSize, "Not all items found in added Bloom filters");
        }
Пример #2
0
        public void BloomFilterAddDifferentSizesTest()
        {
            var addSize       = 10000;
            var testData      = DataGenerator.Generate().Take(addSize).ToArray();
            var testData2     = DataGenerator.Generate().Skip(addSize).Take(addSize).ToArray();
            var errorRate     = 0.001F;
            var size          = testData.Length;
            var configuration = new DefaultBloomFilterConfiguration();
            var bloomFilter   = new BloomFilter <TestEntity, long>(configuration);

            bloomFilter.Initialize(4 * size, errorRate);
            foreach (var itm in testData)
            {
                bloomFilter.Add(itm);
            }
            var bloomFilter2 = new BloomFilter <TestEntity, long>(configuration);
            //We have to create a foldable version.
            var data       = bloomFilter.Extract();
            var foldFactor = configuration.FoldingStrategy.GetAllFoldFactors(data.BlockSize).Where(f => f > 1).OrderBy(f => f).First();

            bloomFilter2.Initialize(addSize, data.BlockSize / foldFactor, data.HashFunctionCount);
            foreach (var itm in testData2)
            {
                bloomFilter2.Add(itm);
            }
            bloomFilter.Add(bloomFilter2);
            var contained = testData.Union(testData2).Count(item => bloomFilter.Contains(item));

            Assert.AreEqual(2 * addSize, contained, "Not all items found in added Bloom filters");
        }
Пример #3
0
        public void BloomFilterRemoveItemTest()
        {
            var addSize       = 10000;
            var testData      = DataGenerator.Generate().Take(addSize).ToArray();
            var errorRate     = 0.001F;
            var size          = testData.Length;
            var configuration = new DefaultBloomFilterConfiguration();
            var bloomFilter   = new BloomFilter <TestEntity, long>(configuration);

            bloomFilter.Initialize(2 * size, errorRate);
            foreach (var itm in testData)
            {
                bloomFilter.Add(itm);
            }
            var contained = testData.Count(item => bloomFilter.Contains(item));

            foreach (var item in testData.Take(addSize / 2))
            {
                bloomFilter.Remove(item);
            }
            var containedAfterRemove = testData.Count(item => bloomFilter.Contains(item));

            //Bloom filter does not behave well under removal
            Assert.AreEqual(containedAfterRemove, 4137, "Wrong item count after removal.");
        }
Пример #4
0
        public void BloomFilterIntersectDifferentFiltersTest()
        {
            var addSize       = 10000;
            var testData      = DataGenerator.Generate().Take(addSize).ToArray();
            var errorRate     = 0.001F;
            var size          = testData.Length;
            var configuration = new DefaultBloomFilterConfiguration();
            var bloomFilter   = new BloomFilter <TestEntity, long>(configuration);

            bloomFilter.Initialize(2 * size, errorRate);
            foreach (var itm in testData)
            {
                bloomFilter.Add(itm);
            }
            var bloomFilter2 = new BloomFilter <TestEntity, long>(configuration);

            bloomFilter2.Initialize(2 * size, errorRate);
            foreach (var itm in testData.Skip(1000))
            {
                bloomFilter2.Add(itm);
            }
            bloomFilter.Intersect(bloomFilter2);
            Assert.IsTrue(bloomFilter.ItemCount >= 9000);
            var count = testData.Skip(1000).Count(bloomFilter.Contains);

            Assert.AreEqual(9000, count);
        }
Пример #5
0
        public void BloomFilterIntersectEqualFiltersTest()
        {
            var addSize       = 10000;
            var testData      = DataGenerator.Generate().Take(addSize).ToArray();
            var errorRate     = 0.001F;
            var size          = testData.Length;
            var configuration = new DefaultBloomFilterConfiguration();
            var bloomFilter   = new BloomFilter <TestEntity, long>(configuration);

            bloomFilter.Initialize(2 * size, errorRate);
            foreach (var itm in testData)
            {
                bloomFilter.Add(itm);
            }
            var bloomFilter2 = new BloomFilter <TestEntity, long>(configuration);

            bloomFilter2.Initialize(2 * size, errorRate);
            foreach (var itm in testData)
            {
                bloomFilter2.Add(itm);
            }
            bloomFilter.Intersect(bloomFilter2);
            //item count will be off due to estimated size.
            Assert.IsTrue(bloomFilter.ItemCount >= addSize);
            Assert.IsTrue(testData.All(bloomFilter.Contains));
        }
Пример #6
0
        public void StandardCompressTest()
        {
            var addSize   = 10000;
            var errorRate = 0.001F;
            var data      = DataGenerator.Generate().Take(addSize).ToArray();
            var filter    = new BloomFilter <TestEntity, long>(new DefaultBloomFilterConfiguration());

            filter.Initialize(data.Length, errorRate);
            foreach (var item in data)
            {
                filter.Add(item);
            }
            var basecount = DataGenerator
                            .Generate()
                            .Skip(addSize)
                            .Take(addSize)
                            .Count(itm => filter.ContainsKey(itm.Id));

            Assert.IsTrue(basecount <= errorRate * addSize);
            filter.Initialize(50 * data.Length, errorRate);
            Assert.AreEqual(filter.Capacity, 500000, "Unexpected size of reverse Bloom filter.");
            foreach (var item in data)
            {
                filter.Add(item);
            }
            //check error rate.
            var notFoundCount = DataGenerator.Generate().Skip(addSize).Take(addSize).Count(itm => filter.Contains(itm));

            Assert.IsTrue(notFoundCount <= basecount, "Uncompressed Bloom filter exceeded error rate.");
            Assert.IsTrue(data.All(d => filter.ContainsKey(d.Id)), "False negatives found in uncompressed filter");
            filter = filter.Compress(true);
            Assert.AreEqual(filter.Capacity, 21739, "Unexpected size of compressed Bloom filter.");
            var compressNotFoundCount = DataGenerator.Generate().Skip(addSize).Take(addSize).Count(itm => filter.ContainsKey(itm.Id));

            Assert.IsTrue(data.All(d => filter.ContainsKey(d.Id)), "False negatives found in compressed filter");
            Assert.IsTrue(compressNotFoundCount <= basecount, "Compressed Bloom filter exceeded error rate.");
        }
Пример #7
0
        public void BloomFilterQuasiDecodeTest()
        {
            var size          = 100000;
            var data          = DataGenerator.Generate().Take(size).ToList();
            var errorRate     = 0.001F;
            var configuration = new DefaultBloomFilterConfiguration();
            var bloomFilter   = new BloomFilter <TestEntity, long>(configuration);

            bloomFilter.Initialize(size, errorRate);
            foreach (var itm in data)
            {
                bloomFilter.Add(itm);
            }
            data = DataGenerator.Generate().Skip(500).Take(8000).ToList();
            data.Modify(1000);
            var estimate = bloomFilter.QuasiDecode(data);

            Assert.IsTrue(estimate > 90500 && estimate < 95000, "Unexpected estimate for difference.");
        }
Пример #8
0
        public void HybridFalsePositiveTest()
        {
            var addSize       = 10000;
            var testData      = DataGenerator.Generate().Take(addSize).ToArray();
            var errorRate     = 0.001F;
            var size          = testData.Length;
            var configuration = new DefaultBloomFilterConfiguration();
            var bloomFilter   = new BloomFilter <TestEntity, long>(configuration);

            bloomFilter.Initialize(size, errorRate);
            foreach (var itm in testData)
            {
                bloomFilter.Add(itm);
            }
            var notFoundCount = testData.Count(itm => !bloomFilter.Contains(itm));

            Assert.IsTrue(notFoundCount == 0, "False negative error rate violated");
            notFoundCount = DataGenerator.Generate().Skip(addSize).Take(addSize).Count(itm => bloomFilter.Contains(itm));
            Assert.IsTrue(notFoundCount <= errorRate * addSize, "False positive error rate violated");
        }
Пример #9
0
        public void BloomFilterSimpleFold()
        {
            var addSize       = 50;
            var testData      = DataGenerator.Generate().Take(addSize).ToArray();
            var size          = testData.Length;
            var configuration = new DefaultBloomFilterConfiguration();
            var bloomFilter   = new BloomFilter <TestEntity, long>(configuration);

            bloomFilter.Initialize(size, 1024, (uint)3);
            foreach (var itm in testData)
            {
                bloomFilter.Add(itm);
            }
            var positiveCount          = DataGenerator.Generate().Take(500).Count(itm => bloomFilter.Contains(itm));
            var folded                 = bloomFilter.Fold(4);
            var positiveCountAfterFold = DataGenerator.Generate().Take(500).Count(itm => bloomFilter.Contains(itm));

            Assert.AreEqual(positiveCount, positiveCountAfterFold, "False positive count different after fold");
            Assert.AreEqual(256, folded.BlockSize, "Folded block size is unexpected.");
            Assert.IsTrue(testData.All(bloomFilter.Contains), "False negative found");
        }