/// <summary> /// Gets an enumerable whose items are probablisticly distinct, this may exclude some distinct items that a normal <see cref="Enumerable.Distinct{TSource}(IEnumerable{TSource})"/> would include /// </summary> /// <typeparam name="T">Item type</typeparam> /// <param name="enumerable">Enumerable to operate over</param> /// <param name="expectedItems">How many distinct items you expect to process</param> /// <param name="errorRate">Desired error (false positive) rate expressed as value between 0.0 and 1.0</param> /// <param name="h1">Hash function</param> /// <param name="h2">Another hash function</param> /// <returns></returns> public static IEnumerable <T> ProbabilisticDistinct <T>(this IEnumerable <T> enumerable, long expectedItems, double errorRate, Func <T, int> h1, Func <T, int> h2) { IBloomFilterParameters parameters = BloomUtils.CalculateBloomParameters(expectedItems, errorRate); Func <IBloomFilter <T> > filterFactory = () => new SparseFastBloomFilter <T>(parameters, h1, h2); return(new ProbabilisticDistinctEnumerable <T>(enumerable, filterFactory)); }
public void TestMethod1() { IBloomFilterParameters parameters = BloomUtils.CalculateBloomParameters(100, 0.01); Console.WriteLine(parameters.NumberOfBits); Console.WriteLine(parameters.NumberOfHashFunctions); }