/// <summary> /// Generates <paramref name="expectedCount" /> random (or sequential) elements and adds them to CardinalityEstimators, then asserts that /// the observed error rate is no more than <paramref name="maxAcceptedError" /> /// </summary> /// <param name="stdError">Expected standard error of the estimators (upper bound)</param> /// <param name="expectedCount">number of elements to generate in total</param> /// <param name="maxAcceptedError">Maximum allowed error rate. Default is 4 times <paramref name="stdError" /></param> /// <param name="numHllInstances">Number of estimators to create. Generated elements will be assigned to one of the estimators at random</param> /// <param name="sequential">When false, elements will be generated at random. When true, elements will be 0,1,2...</param> private void RunTest(double stdError, long expectedCount, double?maxAcceptedError = null, int numHllInstances = 1, bool sequential = false) { maxAcceptedError = maxAcceptedError ?? 4 * stdError; // should fail once in A LOT of runs int b = GetAccuracyInBits(stdError); var runStopwatch = new Stopwatch(); long gcMemoryAtStart = GetGcMemory(); // init HLLs var hlls = new CardinalityEstimator[numHllInstances]; for (var i = 0; i < numHllInstances; i++) { hlls[i] = new CardinalityEstimator(b); } var nextMember = new byte[ElementSizeInBytes]; runStopwatch.Start(); for (long i = 0; i < expectedCount; i++) { // pick random hll, add member int chosenHll = Rand.Next(numHllInstances); if (sequential) { hlls[chosenHll].Add(i); } else { Rand.NextBytes(nextMember); hlls[chosenHll].Add(nextMember); } } runStopwatch.Stop(); ReportMemoryCost(gcMemoryAtStart); // done here so references can't be GC'ed yet // Merge CardinalityEstimator mergedHll = CardinalityEstimator.Merge(hlls); Console.WriteLine("Run time: {0}", runStopwatch.Elapsed); Console.WriteLine("Expected {0}, got {1}", expectedCount, mergedHll.Count()); double obsError = Math.Abs(mergedHll.Count() / (double)(expectedCount) - 1.0); Console.WriteLine("StdErr: {0}. Observed error: {1}", stdError, obsError); Assert.True(obsError <= maxAcceptedError, string.Format("Observed error was over {0}", maxAcceptedError)); Console.WriteLine(); }
public void ReportAccuracy() { var hll = new CardinalityEstimator(); double maxError = 0; var worstMember = 0; var nextMember = new byte[ElementSizeInBytes]; for (var i = 0; i < 10000000; i++) { Rand.NextBytes(nextMember); hll.Add(nextMember); if (i % 1007 == 0) // just some interval to sample error at, can be any number { double error = (hll.Count() - (double)(i + 1)) / ((double)i + 1); if (error > maxError) { maxError = error; worstMember = i + 1; } } } Console.WriteLine("Worst: {0}", worstMember); Console.WriteLine("Max error: {0}", maxError); Assert.True(true); }
public JsonResult Get() { ICardinalityEstimator <string> estimator = new CardinalityEstimator(); foreach (Fingerprint finger in _counterRepo.GetFingerprints()) { estimator.Add(finger.Hash); } return(new JsonResult(new { Clicks = estimator.Count() })); }
public void SerializerCanDeserializeVersion2Point0() { var serializer = new CardinalityEstimatorSerializer(); CardinalityEstimator hllDirect = serializer.Deserialize(new MemoryStream(Resources.serializedDirect_v2_0)); CardinalityEstimator hllSparse = serializer.Deserialize(new MemoryStream(Resources.serializedSparse_v2_0)); CardinalityEstimator hllDense = serializer.Deserialize(new MemoryStream(Resources.serializedDense_v2_0)); Assert.AreEqual(50UL, hllDirect.Count()); Assert.AreEqual(151UL, hllSparse.Count()); Assert.AreEqual(5009UL, hllDense.Count()); }
public void EstimatorWorksAfterDeserialization() { ICardinalityEstimator <int> original = new CardinalityEstimator(); original.Add(5); original.Add(7); Assert.Equal(2UL, original.Count()); var binaryFormatter = new BinaryFormatter(); using (var memoryStream = new MemoryStream()) { binaryFormatter.Serialize(memoryStream, original); memoryStream.Seek(0, SeekOrigin.Begin); CardinalityEstimator copy = (CardinalityEstimator)binaryFormatter.Deserialize(memoryStream); Assert.Equal(2UL, copy.Count()); copy.Add(5); copy.Add(7); Assert.Equal(2UL, copy.Count()); } }
public void StaticMergeTest() { const int expectedBitsPerIndex = 11; var estimators = new CardinalityEstimator[10]; for (var i = 0; i < estimators.Length; i++) { estimators[i] = new CardinalityEstimator(expectedBitsPerIndex); estimators[i].Add(Rand.Next()); } CardinalityEstimator merged = CardinalityEstimator.Merge(estimators); Assert.Equal(10UL, merged.Count()); Assert.Equal(expectedBitsPerIndex, merged.GetState().BitsPerIndex); }
public void DeserializedEstimatorUsesSameHashAsOriginal() { // Prepare some elements IList <int> elements = new List <int>(); for (int i = 0; i < 150; i++) { elements.Add(Rand.Next()); } foreach (HashFunctionId hashFunctionId in Enum.GetValues(typeof(HashFunctionId))) { // Add elements to an estimator using the given hashFunctionId CardinalityEstimator original = new CardinalityEstimator(hashFunctionId: hashFunctionId); foreach (int element in elements) { original.Add(element); } // Serialize var serializer = new CardinalityEstimatorSerializer(); byte[] results; using (var memoryStream = new MemoryStream()) { serializer.Serialize(memoryStream, original, false); results = memoryStream.ToArray(); } // Deserialize CardinalityEstimator deserialized; using (var memoryStream = new MemoryStream(results)) { deserialized = serializer.Deserialize(memoryStream, false); } // Add the elements again, should have no effect on state foreach (int element in elements) { deserialized.Add(element); } Assert.AreEqual(original.Count(), deserialized.Count()); } }
[Ignore] // Test runtime is long public void ReportAccuracy() { var hll = new CardinalityEstimator(); double maxError = 0; var worstMember = 0; var nextMember = new byte[ElementSizeInBytes]; for (var i = 0; i < 10000000; i++) { Rand.NextBytes(nextMember); hll.Add(nextMember); if (i%1007 == 0) // just some interval to sample error at, can be any number { double error = (hll.Count() - (double) (i + 1))/((double) i + 1); if (error > maxError) { maxError = error; worstMember = i + 1; } } } Console.WriteLine("Worst: {0}", worstMember); Console.WriteLine("Max error: {0}", maxError); Assert.IsTrue(true); }
public ulong Count() { return(_hyperLogLog.Count()); }