public void OptimizeExistsAt() { if (_optimizeCache && _logPositionsBloomFilter == null) { _logPositionsBloomFilter = PopulateBloomFilter(); } }
public void DeOptimizeExistsAt() { if (_logPositionsBloomFilter != null) { _logPositionsBloomFilter = null; } }
public void always_return_true_if_an_item_was_added() { for (int n = 1; n <= 1000; n++) { for (double p = 0.1; p > 1.0e-7; p /= 10.0) { InMemoryBloomFilter filter = new InMemoryBloomFilter(n, p); //no items added yet for (int i = 0; i <= n; i++) { Assert.IsFalse(filter.MightContain(i)); } //add the items for (int i = 0; i <= n; i++) { filter.Add(i); } //all the items should exist for (int i = 0; i <= n; i++) { Assert.IsTrue(filter.MightContain(i)); } } } }
public void always_return_true_if_an_item_was_added_for_large_n() { int n = 1234567; double p = 1.0e-6; InMemoryBloomFilter filter = new InMemoryBloomFilter(n, p); //no items added yet for (int i = 0; i <= n; i++) { Assert.IsFalse(filter.MightContain(i)); } //add the items for (int i = 0; i <= n; i++) { filter.Add(i); } //all the items should exist for (int i = 0; i <= n; i++) { Assert.IsTrue(filter.MightContain(i)); } }
public void have_false_positives_with_probability_p_for_large_n() { int n = 1234567; for (double p = 0.1; p > 1.0e-7; p /= 10.0) { InMemoryBloomFilter filter = new InMemoryBloomFilter(n, p); //add only odd numbers for (int i = 1; i <= n; i += 2) { filter.Add(i); } //expected number of false positives int expectedFalsePositives = (int)Math.Ceiling(n * p / 2.0); //none of these items should exist but there may be some false positives int falsePositives = 0; for (int i = 2; i <= n; i += 2) { if (filter.MightContain(i)) { falsePositives++; } } if (falsePositives > 0) { Console.Out.WriteLine("n: {0}, p:{1}. Found {2} false positives. Expected false positives: {3}", n, p, falsePositives, expectedFalsePositives); } Assert.LessOrEqual(falsePositives, expectedFalsePositives); } }
private InMemoryBloomFilter PopulateBloomFilter() { var mapCount = Chunk.ChunkFooter.MapCount; if (mapCount <= 0) { return(null); } InMemoryBloomFilter bf = null; double p = 1e-4; //false positive probability while (p < 1.0) { try { bf = new InMemoryBloomFilter(mapCount, p); //Log.Debug("Created bloom filter with {numBits} bits and {numHashFunctions} hash functions for chunk {chunk} with map count: {mapCount}", bf.NumBits, bf.NumHashFunctions, Chunk.FileName, mapCount); break; } catch (ArgumentOutOfRangeException) { p *= 10.0; } } if (bf == null) { Log.Warning("Could not create bloom filter for chunk: {chunk}, map count: {mapCount}", Chunk.FileName, mapCount); return(null); } ReaderWorkItem workItem = null; try { workItem = Chunk.GetReaderWorkItem(); foreach (var posMap in ReadPosMap(workItem, 0, mapCount)) { bf.Add(posMap.LogPos); } //Log.Debug("{mapCount} items added to bloom filter for chunk {chunk}", mapCount, Chunk.FileName); return(bf); } catch (FileBeingDeletedException) { return(null); } catch (OutOfMemoryException) { return(null); } finally { if (workItem != null) { Chunk.ReturnReaderWorkItem(workItem); } } }
public void support_adding_large_values() { int n = 1234567; double p = 1.0e-6; InMemoryBloomFilter filter = new InMemoryBloomFilter(n, p); long[] items = { 192389123812L, 286928492L, 27582928698L, 72669175482L, 1738996371L, 939342020387L, 37253255484L, 346536436L, 123921398432L, 8324982394329432L, 183874782348723874L, long.MaxValue }; //no items added yet for (int i = 0; i < items.Length; i++) { Assert.IsFalse(filter.MightContain(items[i])); } //add the items for (int i = 0; i < items.Length; i++) { filter.Add(items[i]); } //all the items should exist for (int i = 0; i < items.Length; i++) { Assert.IsTrue(filter.MightContain(items[i])); } //all the neighbouring items should probably not exist for (int i = 0; i < items.Length; i++) { Assert.IsFalse(filter.MightContain(items[i] - 1)); Assert.IsFalse(filter.MightContain(items[i] + 1)); } }
public void correctly_convert_long_to_bytes() { for (long i = -1000; i <= 1000; i++) { byte[] bytes = InMemoryBloomFilter.toBytes(i); Assert.AreEqual(8, bytes.Length); long v = 0; for (int j = 7; j >= 0; j--) { v <<= 8; v |= bytes[j]; } Assert.AreEqual(i, v); } long[] nums = { long.MaxValue, long.MinValue, 0, 192389123812L, 286928492L, 27582928698L, 72669175482L, 1738996371L, 939342020387L, 37253255484L, 346536436L, 123921398432L, 8324982394329432L, 183874782348723874L }; for (long i = 0; i < nums.Length; i++) { byte[] bytes = InMemoryBloomFilter.toBytes(nums[i]); Assert.AreEqual(8, bytes.Length); long v = 0; for (int j = 7; j >= 0; j--) { v <<= 8; v |= bytes[j]; } Assert.AreEqual(nums[i], v); } }