Exemple #1
0
        public void ReverseRemoveKeyTest()
        {
            var addSize       = 10000;
            var testData      = DataGenerator.Generate().Take(addSize).ToArray();
            var errorRate     = 0.001F;
            var size          = testData.Length;
            var configuration = new KeyValueBloomFilterConfiguration();
            var bloomFilter   = new InvertibleReverseBloomFilter <TestEntity, long, sbyte>(configuration);

            bloomFilter.Initialize(2 * size, errorRate);
            foreach (var itm in testData)
            {
                bloomFilter.Add(itm);
            }
            var contained = testData.Count(item => bloomFilter.Contains(item));

            try
            {
                foreach (var item in testData.Take(addSize / 2))
                {
                    bloomFilter.RemoveKey(item.Id);
                }
                Assert.Fail("RemoveKey should not be supported by a reverse invertible Bloom filter");
            }
            catch (NotSupportedException)
            { };
        }
        public void HybridIntersectDifferentFiltersTest()
        {
            var addSize       = 10000;
            var testData      = DataGenerator.Generate().Take(addSize).ToArray();
            var errorRate     = 0.001F;
            var size          = testData.Length;
            var configuration = new KeyValueBloomFilterConfiguration();
            var bloomFilter   = new InvertibleReverseBloomFilter <TestEntity, long, sbyte>(configuration);

            bloomFilter.Initialize(2 * size, errorRate);
            foreach (var itm in testData)
            {
                bloomFilter.Add(itm);
            }
            var bloomFilter2 = new InvertibleReverseBloomFilter <TestEntity, long, sbyte>(configuration);

            bloomFilter2.Initialize(2 * size, errorRate);
            foreach (var itm in testData.Skip(1000))
            {
                bloomFilter2.Add(itm);
            }
            bloomFilter.Intersect(bloomFilter2);
            Assert.AreEqual(9000, bloomFilter.ItemCount);
            var count = testData.Skip(1000).Count(bloomFilter.Contains);

            //Note: intersect introduces a horrible error rate when utilizing XOR, so don't actually use intersect.
            //There are however definitions of operations possible where the intersect would not have this horrible effect.
            Assert.IsTrue(count > 6700);
            Assert.IsTrue(testData.Take(1000).All(d => !bloomFilter.Contains(d)));
        }
Exemple #3
0
        public void ReverseRemoveItemTest()
        {
            var addSize       = 10000;
            var testData      = DataGenerator.Generate().Take(addSize).ToArray();
            var errorRate     = 0.001F;
            var size          = testData.Length;
            var configuration = new KeyValueBloomFilterConfiguration();
            var bloomFilter   = new InvertibleReverseBloomFilter <TestEntity, long, sbyte>(configuration);

            bloomFilter.Initialize(2 * size, errorRate);
            foreach (var itm in testData)
            {
                bloomFilter.Add(itm);
            }
            var contained = testData.Count(item => bloomFilter.Contains(item));

            foreach (var item in testData.Take(addSize / 2))
            {
                bloomFilter.Remove(item);
            }
            var containedAfterRemove = testData.Count(item => bloomFilter.Contains(item));

            //tricky: assuming zero false positives.
            Assert.AreEqual(contained, containedAfterRemove * 2, "Wrong item count after removal.");
        }
        public void ReverseIntersectEqualFiltersTest()
        {
            var addSize       = 10000;
            var testData      = DataGenerator.Generate().Take(addSize).ToArray();
            var errorRate     = 0.001F;
            var size          = testData.Length;
            var configuration = new KeyValueBloomFilterConfiguration();
            var bloomFilter   = new InvertibleReverseBloomFilter <TestEntity, long, sbyte>(configuration);

            bloomFilter.Initialize(2 * size, errorRate);
            foreach (var itm in testData)
            {
                bloomFilter.Add(itm);
            }
            var bloomFilter2 = new InvertibleReverseBloomFilter <TestEntity, long, sbyte>(configuration);

            bloomFilter2.Initialize(2 * size, errorRate);
            foreach (var itm in testData)
            {
                bloomFilter2.Add(itm);
            }
            bloomFilter.Intersect(bloomFilter2);
            Assert.AreEqual(addSize, bloomFilter.ItemCount);
            Assert.IsTrue(testData.All(bloomFilter.Contains));
        }
Exemple #5
0
        public void ReverseAddDifferentSizesTest()
        {
            var addSize       = 10000;
            var testData      = DataGenerator.Generate().Take(addSize).ToArray();
            var testData2     = DataGenerator.Generate().Skip(addSize).Take(addSize).ToArray();
            var errorRate     = 0.001F;
            var size          = testData.Length;
            var configuration = new KeyValueBloomFilterConfiguration();
            var bloomFilter   = new InvertibleReverseBloomFilter <TestEntity, long, sbyte>(configuration);

            bloomFilter.Initialize(4 * size, errorRate);
            foreach (var itm in testData)
            {
                bloomFilter.Add(itm);
            }
            var bloomFilter2 = new InvertibleReverseBloomFilter <TestEntity, long, sbyte>(configuration);
            //We have to create a foldable version.
            var data       = bloomFilter.Extract();
            var foldFactor = configuration.FoldingStrategy.GetAllFoldFactors(data.BlockSize).Where(f => f > 1).OrderBy(f => f).First();

            bloomFilter2.Initialize(addSize, data.BlockSize / foldFactor, data.HashFunctionCount);
            foreach (var itm in testData2)
            {
                bloomFilter2.Add(itm);
            }
            bloomFilter.Add(bloomFilter2);
            var contained = testData.Union(testData2).Count(item => bloomFilter.Contains(item));

            Assert.AreEqual(contained, 2 * addSize, "Not all items found in added Bloom filters");
        }
Exemple #6
0
        public void ReverseAddTest()
        {
            var addSize       = 10000;
            var testData      = DataGenerator.Generate().Take(addSize).ToArray();
            var testData2     = DataGenerator.Generate().Skip(addSize).Take(addSize).ToArray();
            var errorRate     = 0.001F;
            var size          = testData.Length;
            var configuration = new KeyValueBloomFilterConfiguration();
            var bloomFilter   = new InvertibleReverseBloomFilter <TestEntity, long, sbyte>(configuration);

            bloomFilter.Initialize(2 * size, errorRate);
            foreach (var itm in testData)
            {
                bloomFilter.Add(itm);
            }
            var bloomFilter2 = new InvertibleReverseBloomFilter <TestEntity, long, sbyte>(configuration);

            bloomFilter2.Initialize(2 * size, errorRate);
            foreach (var itm in testData2)
            {
                bloomFilter2.Add(itm);
            }
            bloomFilter.Add(bloomFilter2);
            var contained = testData.Union(testData2).Count(item => bloomFilter.Contains(item));

            Assert.AreEqual(contained, 2 * addSize, "Not all items found in added Bloom filters");
        }
Exemple #7
0
        public void ReverseFalsePositiveTest()
        {
            var addSize       = 10000;
            var testData      = DataGenerator.Generate().Take(addSize).ToArray();
            var errorRate     = 0.001F;
            var size          = testData.Length;
            var configuration = new KeyValueBloomFilterConfiguration();
            var bloomFilter   = new InvertibleReverseBloomFilter <TestEntity, long, sbyte>(configuration);

            bloomFilter.Initialize(size, errorRate);
            foreach (var itm in testData)
            {
                bloomFilter.Add(itm);
            }
            var notFoundCount = testData.Count(itm => !bloomFilter.Contains(itm));

            Assert.IsTrue(notFoundCount == 0, "False negative error rate violated");
            try
            {
                notFoundCount = testData.Count(itm => !bloomFilter.ContainsKey(itm.Id));
                Assert.Fail("Invertible reverse Bloom filter does not support ContainsKey.");
            }
            catch (NotSupportedException) { };
            notFoundCount = DataGenerator.Generate().Skip(addSize).Take(addSize).Count(itm => bloomFilter.Contains(itm));
            Assert.IsTrue(notFoundCount <= 20 * errorRate * addSize, "False positive error rate violated");
            try
            {
                notFoundCount = DataGenerator.Generate().Skip(addSize).Take(addSize).Count(itm => bloomFilter.ContainsKey(itm.Id));
                Assert.Fail("Invertible reverse Bloom filter does not support ContainsKey.");
            }
            catch (NotSupportedException) { };
        }
Exemple #8
0
        public void ReverseSimpleFold()
        {
            var addSize       = 50;
            var testData      = DataGenerator.Generate().Take(addSize).ToArray();
            var size          = testData.Length;
            var configuration = new KeyValueBloomFilterConfiguration();
            var bloomFilter   = new InvertibleReverseBloomFilter <TestEntity, long, sbyte>(configuration);

            bloomFilter.Initialize(size, 1024, (uint)3);
            foreach (var itm in testData)
            {
                bloomFilter.Add(itm);
            }
            var positiveCount          = DataGenerator.Generate().Take(500).Count(itm => bloomFilter.Contains(itm));
            var folded                 = bloomFilter.Fold(4);
            var positiveCountAfterFold = DataGenerator.Generate().Take(500).Count(itm => bloomFilter.Contains(itm));

            Assert.AreEqual(positiveCount, positiveCountAfterFold, "False positive count different after fold");
            Assert.AreEqual(256, folded.Extract().BlockSize);
            Assert.IsTrue(testData.All(item => bloomFilter.Contains(item)), "False negative found");
        }
Exemple #9
0
        public void ReverseInvertibleBloomFilterEmptySetDiffTest()
        {
            var addSize  = 1000;
            var modCount = addSize;
            var dataSet1 = DataGenerator.Generate().Take(0).ToList();
            var dataSet2 = DataGenerator.Generate().Take(addSize).ToList();

            dataSet2.Modify(modCount);
            var configuration = new KeyValueBloomFilterConfiguration();
            var bloomFilter   = new InvertibleReverseBloomFilter <TestEntity, long, sbyte>(configuration);

            bloomFilter.Initialize(10 * modCount, 0.0001F);
            foreach (var itm in dataSet1)
            {
                bloomFilter.Add(itm);
            }
            var secondBloomFilter = new InvertibleReverseBloomFilter <TestEntity, long, sbyte>(configuration);

            secondBloomFilter.Initialize(10 * modCount, 0.0001F);
            foreach (var itm in dataSet2)
            {
                secondBloomFilter.Add(itm);
            }
            var changed      = new HashSet <long>();
            var onlyInFirst  = new HashSet <long>();
            var onlyInSecond = new HashSet <long>();
            var decoded      = bloomFilter
                               .SubtractAndDecode(secondBloomFilter, onlyInFirst, onlyInSecond, changed);
            var onlyInSet1 = dataSet1.Where(d => dataSet2.All(d2 => d2.Id != d.Id)).Select(d => d.Id).OrderBy(id => id).ToArray();
            var onlyInSet2 = dataSet2.Where(d => dataSet1.All(d1 => d1.Id != d.Id)).Select(d => d.Id).OrderBy(id => id).ToArray();
            var modified   = dataSet1.Where(d => dataSet2.Any(d2 => d2.Id == d.Id && d2.Value != d.Value)).Select(d => d.Id).OrderBy(id => id).ToArray();

            //fairly sensitive to decoding errors (due to the same reason as Contains is rather unreliable: the pure function does not check the  id value and hash value)
            Assert.IsTrue(decoded.HasValue, "Decoding failed");
            Assert.IsTrue(onlyInSet1.Length == onlyInFirst.Count, "Incorrect number of changes detected on 'only in set 1'");
            Assert.IsTrue(onlyInSet2.Length == onlyInSecond.Count, "Incorrect number of changes detected on 'only in set 2'");
            Assert.IsTrue(changed.Count == modified.Length, "Incorrect number of modified items detected");
        }
Exemple #10
0
        public void ReverseCompressTest()
        {
            var addSize   = 10000;
            var errorRate = 0.001F;
            var data      = DataGenerator.Generate().Take(addSize).ToArray();
            var filter    = new InvertibleReverseBloomFilter <TestEntity, long, sbyte>(new KeyValueBloomFilterConfiguration());

            filter.Initialize(50 * data.Length, errorRate);
            Assert.AreEqual(filter.Capacity, 500000, "Unexpected size of reverse Bloom filter.");
            foreach (var item in data)
            {
                filter.Add(item);
            }
            //check error rate.
            var notFoundCount = DataGenerator.Generate().Skip(addSize).Take(10000).Count(itm => filter.Contains(itm));

            Assert.IsTrue(notFoundCount <= 4 * errorRate * addSize, "Uncompressed reverse Bloom filter exceeded error rate.");
            filter.Compress(true);
            Assert.AreEqual(filter.Capacity, 12820, "Unexpected size of compressed reverse Bloom filter.");
            var compressNotFoundCount = DataGenerator.Generate().Skip(addSize).Take(10000).Count(itm => filter.Contains(itm));

            Assert.IsTrue(compressNotFoundCount <= 4 * errorRate * addSize, "Compressed reverse Bloom filter exceeded error rate.");
        }
        public void SplitRibfDecodePerformance()
        {
            var configuration = new KeyValueLargeBloomFilterConfiguration();

            var size          = new[] { 1000, 10000, 100000 };
            var modPercentage = new[] { 0, 0.01D, 0.1D, 0.2D, 0.5D, 1.0D };

            foreach (var s in size)
            {
                using (
                    var writer =
                        new StreamWriter(File.Open($"splitribfdecode-{s}.csv",
                                                   FileMode.Create)))
                {
                    writer.WriteLine("timeInMs,sizeInBytes,capacity,modCount,detectedModCount,countDiff,countDiffSd,decodeSuccessRate");

                    foreach (var mod in modPercentage)
                    {
                        foreach (var capacityPercentage in new[] { 0.5, 1, 2, 5, 10, 100 })
                        {
                            var sizeInBytes             = new long[100];
                            var timeSpan                = new long[50];
                            var countAggregate          = new int[50];
                            var modCountResultAggregate = new int[50];
                            var decodeResult            = new int[50];
                            for (var run = 0; run < 50; run++)
                            {
                                var dataSet1 = DataGenerator.Generate().Take(s).ToList();
                                var dataSet2 = DataGenerator.Generate().Take(s).ToList();
                                dataSet2.Modify((int)(s * mod));
                                var onlyInSet1    = dataSet1.Where(d => dataSet2.All(d2 => d2.Id != d.Id)).Select(d => d.Id).OrderBy(id => id).ToArray();
                                var onlyInSet2    = dataSet2.Where(d => dataSet1.All(d1 => d1.Id != d.Id)).Select(d => d.Id).OrderBy(id => id).ToArray();
                                var modified      = dataSet1.Where(d => dataSet2.Any(d2 => d2.Id == d.Id && d2.Value != d.Value)).Select(d => d.Id).OrderBy(id => id).ToArray();
                                var idealCapacity = Math.Max(15, onlyInSet1.Count() + onlyInSet2.Count() + modified.Count());
                                var stopWatch     = new Stopwatch();
                                stopWatch.Start();
                                var bloomFilter1 = new InvertibleReverseBloomFilter <TestEntity, long, int>(configuration);
                                bloomFilter1.Initialize((int)(idealCapacity * capacityPercentage), 0.01F);

                                foreach (var item in dataSet1)
                                {
                                    bloomFilter1.Add(item);
                                }
                                var bloomFilter2 = new InvertibleReverseBloomFilter <TestEntity, long, int>(configuration);
                                bloomFilter2.Initialize((int)(idealCapacity * capacityPercentage), 0.01F);
                                foreach (var item in dataSet2)
                                {
                                    bloomFilter2.Add(item);
                                }
                                var s1      = new HashSet <long>();
                                var s2      = new HashSet <long>();
                                var s3      = new HashSet <long>();
                                var success = bloomFilter1.SubtractAndDecode(bloomFilter2, s1, s2, s3);
                                stopWatch.Stop();
                                using (var stream = new MemoryStream())
                                {
                                    _protobufTypeModel.Serialize(stream, bloomFilter1.Extract());
                                    stream.Position  = 0;
                                    sizeInBytes[run] = stream.Length;
                                }
                                using (var stream = new MemoryStream())
                                {
                                    _protobufTypeModel.Serialize(stream, bloomFilter2.Extract());
                                    stream.Position       = 0;
                                    sizeInBytes[50 + run] = stream.Length;
                                }
                                timeSpan[run]                = stopWatch.ElapsedMilliseconds;
                                countAggregate[run]          = onlyInSet1.Count() + onlyInSet2.Count() + modified.Count();
                                modCountResultAggregate[run] = s1.Union(s2).Union(s3).Count(v => onlyInSet1.Contains(v) ||
                                                                                            onlyInSet2.Contains(v) || modified.Contains(v));
                                decodeResult[run] = success == true ? 1 : 0;
                            }
                            var countAvg         = (long)countAggregate.Average();
                            var modCountResult   = (long)modCountResultAggregate.Average();
                            var differenceResult =
                                modCountResultAggregate.Select((r, i) => r - countAggregate[i]).ToArray();
                            var differenceSd = Math.Sqrt(differenceResult.Variance());
                            writer
                            .WriteLine($"{timeSpan.Average()},{sizeInBytes.Average()},{Math.Max(15, capacityPercentage * (int)(s * mod))},{countAvg},{modCountResult},{(long)differenceResult.Average()},{differenceSd},{1.0D * decodeResult.Sum() / 50}");
                        }
                    }
                }
            }
        }
        public void RibfDecodePerSizePerformance()
        {
            var configuration = new KeyValueLargeBloomFilterConfiguration();

            var size          = new[] { 1000, 10000, 100000 };
            var modPercentage = new[] { 0, 0.01D, 0.1D, 0.2D, 0.5D, 1.0D };

            foreach (var s in size)
            {
                using (
                    var writer =
                        new StreamWriter(File.Open($"ribfdecodespersize-{s}.csv",
                                                   FileMode.Create)))
                {
                    writer.WriteLine("capacity,modCount,estimatedModCount,size,decodesPerSize,decodesPerSizeSd,decodeSuccessRate");

                    foreach (var mod in modPercentage)
                    {
                        foreach (var capacity in new[] { 10, 100, 500, 1000, 2000, 5000, 10000 })
                        {
                            var countSize               = 0;
                            var decodesPerSize          = new double[50];
                            var decodeResult            = new int[50];
                            var modCountResultAggregate = new int[50];
                            for (var run = 0; run < 50; run++)
                            {
                                var dataSet1 = DataGenerator.Generate().Take(s).ToList();
                                var dataSet2 = DataGenerator.Generate().Take(s).ToList();
                                dataSet2.Modify((int)(s * mod));
                                var onlyInSet1 = dataSet1.Where(d => dataSet2.All(d2 => d2.Id != d.Id)).Select(d => d.Id).OrderBy(id => id).ToArray();
                                var onlyInSet2 = dataSet2.Where(d => dataSet1.All(d1 => d1.Id != d.Id)).Select(d => d.Id).OrderBy(id => id).ToArray();
                                var modified   = dataSet1.Where(d => dataSet2.Any(d2 => d2.Id == d.Id && d2.Value != d.Value)).Select(d => d.Id).OrderBy(id => id).ToArray();

                                var bloomFilter1 = new InvertibleReverseBloomFilter <TestEntity, long, int>(configuration);
                                bloomFilter1.Initialize(capacity, 0.01F);

                                foreach (var item in dataSet1)
                                {
                                    bloomFilter1.Add(item);
                                }
                                var bloomFilter2 = new InvertibleReverseBloomFilter <TestEntity, long, int>(configuration);
                                bloomFilter2.Initialize(capacity, 0.01F);
                                foreach (var item in dataSet2)
                                {
                                    bloomFilter2.Add(item);
                                }
                                var s1 = new HashSet <long>();
                                var s2 = new HashSet <long>();
                                var s3 = new HashSet <long>();
                                decodeResult[run] = bloomFilter1.SubtractAndDecode(bloomFilter2, s1, s2, s3) == true ? 1 : 0;
                                var mods = s1.Union(s2).Union(s3).ToArray();
                                modCountResultAggregate[run] = mods.Count(v => onlyInSet1.Contains(v) ||
                                                                          onlyInSet2.Contains(v) || modified.Contains(v));
                                decodesPerSize[run] = 1.0D * modCountResultAggregate[run] / bloomFilter1.Extract().Counts.Length;
                                countSize           = bloomFilter1.Extract().Counts.Length;
                            }
                            writer
                            .WriteLine($"{capacity},{s * mod},{modCountResultAggregate.Average()},{countSize},{decodesPerSize.Average()},{Math.Sqrt(decodesPerSize.Variance())},{decodeResult.Average()}");
                        }
                    }
                }
            }
        }