public void DirectCountingIsResetWhenMergingAlmostFullEstimators() { var addedEstimator = new CardinalityEstimator(); var mergedEstimator = new CardinalityEstimator(); for (int i = 0; i < 10_000; i++) { var guid = Guid.NewGuid().ToString(); addedEstimator.Add(guid); // Simulate some intermediate estimators being merged together var temporaryEstimator = new CardinalityEstimator(); temporaryEstimator.Add(guid); mergedEstimator.Merge(temporaryEstimator); } var serializer = new CardinalityEstimatorSerializer(); var stream1 = new MemoryStream(); serializer.Serialize(stream1, addedEstimator, true); var stream2 = new MemoryStream(); serializer.Serialize(stream2, mergedEstimator, true); Assert.Equal(stream1.Length, stream2.Length); }
public void ReportAccuracy() { var hll = new CardinalityEstimator(); double maxError = 0; var worstMember = 0; var nextMember = new byte[ElementSizeInBytes]; for (var i = 0; i < 10000000; i++) { Rand.NextBytes(nextMember); hll.Add(nextMember); if (i % 1007 == 0) // just some interval to sample error at, can be any number { double error = (hll.Count() - (double)(i + 1)) / ((double)i + 1); if (error > maxError) { maxError = error; worstMember = i + 1; } } } Console.WriteLine("Worst: {0}", worstMember); Console.WriteLine("Max error: {0}", maxError); Assert.True(true); }
public void TestSize() { var estimator = new CardinalityEstimator(); Assert.AreEqual(0UL, estimator.CountElementsAdded); estimator.Add(0); estimator.Add(0); Assert.AreEqual(2UL, estimator.CountElementsAdded); var estimator2 = new CardinalityEstimator(); estimator2.Add(0); estimator.Merge(estimator2); Assert.AreEqual(3UL, estimator.CountElementsAdded); }
public void TestCountAdditions() { var estimator = new CardinalityEstimator(); Assert.Equal(0UL, estimator.CountAdditions); estimator.Add(0); estimator.Add(0); Assert.Equal(2UL, estimator.CountAdditions); var estimator2 = new CardinalityEstimator(); estimator2.Add(0); estimator.Merge(estimator2); Assert.Equal(3UL, estimator.CountAdditions); }
public JsonResult Get() { ICardinalityEstimator <string> estimator = new CardinalityEstimator(); foreach (Fingerprint finger in _counterRepo.GetFingerprints()) { estimator.Add(finger.Hash); } return(new JsonResult(new { Clicks = estimator.Count() })); }
public void EstimatorWorksAfterDeserialization() { ICardinalityEstimator <int> original = new CardinalityEstimator(); original.Add(5); original.Add(7); Assert.Equal(2UL, original.Count()); var binaryFormatter = new BinaryFormatter(); using (var memoryStream = new MemoryStream()) { binaryFormatter.Serialize(memoryStream, original); memoryStream.Seek(0, SeekOrigin.Begin); CardinalityEstimator copy = (CardinalityEstimator)binaryFormatter.Deserialize(memoryStream); Assert.Equal(2UL, copy.Count()); copy.Add(5); copy.Add(7); Assert.Equal(2UL, copy.Count()); } }
private CardinalityEstimator CreateAndFillCardinalityEstimator(int cardinality = 1000000, int bits = 14) { var hll = new CardinalityEstimator(bits); var nextMember = new byte[ElementSizeInBytes]; for (var i = 0; i < cardinality; i++) { Rand.NextBytes(nextMember); hll.Add(nextMember); } return(hll); }
public void DeserializedEstimatorUsesSameHashAsOriginal() { // Prepare some elements IList <int> elements = new List <int>(); for (int i = 0; i < 150; i++) { elements.Add(Rand.Next()); } foreach (HashFunctionId hashFunctionId in Enum.GetValues(typeof(HashFunctionId))) { // Add elements to an estimator using the given hashFunctionId CardinalityEstimator original = new CardinalityEstimator(hashFunctionId: hashFunctionId); foreach (int element in elements) { original.Add(element); } // Serialize var serializer = new CardinalityEstimatorSerializer(); byte[] results; using (var memoryStream = new MemoryStream()) { serializer.Serialize(memoryStream, original, false); results = memoryStream.ToArray(); } // Deserialize CardinalityEstimator deserialized; using (var memoryStream = new MemoryStream(results)) { deserialized = serializer.Deserialize(memoryStream, false); } // Add the elements again, should have no effect on state foreach (int element in elements) { deserialized.Add(element); } Assert.AreEqual(original.Count(), deserialized.Count()); } }
private void RunRecreationFromData(int cardinality = 1000000) { var hll = new CardinalityEstimator(); var nextMember = new byte[ElementSizeInBytes]; for (var i = 0; i < cardinality; i++) { Rand.NextBytes(nextMember); hll.Add(nextMember); } CardinalityEstimatorState data = hll.GetState(); var hll2 = new CardinalityEstimator(data); CardinalityEstimatorState data2 = hll2.GetState(); Assert.Equal(data.BitsPerIndex, data2.BitsPerIndex); Assert.Equal(data.IsSparse, data2.IsSparse); Assert.True((data.DirectCount != null && data2.DirectCount != null) || (data.DirectCount == null && data2.DirectCount == null)); Assert.True((data.LookupSparse != null && data2.LookupSparse != null) || (data.LookupSparse == null && data2.LookupSparse == null)); Assert.True((data.LookupDense != null && data2.LookupDense != null) || (data.LookupDense == null && data2.LookupDense == null)); if (data.DirectCount != null) { // DirectCount are subsets of each-other => they are the same set Assert.True(data.DirectCount.IsSubsetOf(data2.DirectCount) && data2.DirectCount.IsSubsetOf(data.DirectCount)); } if (data.LookupSparse != null) { Assert.True(data.LookupSparse.DictionaryEqual(data2.LookupSparse)); } if (data.LookupDense != null) { Assert.True(data.LookupDense.SequenceEqual(data2.LookupDense)); } }
[Ignore] // Test runtime is long public void ReportAccuracy() { var hll = new CardinalityEstimator(); double maxError = 0; var worstMember = 0; var nextMember = new byte[ElementSizeInBytes]; for (var i = 0; i < 10000000; i++) { Rand.NextBytes(nextMember); hll.Add(nextMember); if (i%1007 == 0) // just some interval to sample error at, can be any number { double error = (hll.Count() - (double) (i + 1))/((double) i + 1); if (error > maxError) { maxError = error; worstMember = i + 1; } } } Console.WriteLine("Worst: {0}", worstMember); Console.WriteLine("Max error: {0}", maxError); Assert.IsTrue(true); }
private void RunRecreationFromData(int cardinality = 1000000) { var hll = new CardinalityEstimator(); var nextMember = new byte[ElementSizeInBytes]; for (var i = 0; i < cardinality; i++) { Rand.NextBytes(nextMember); hll.Add(nextMember); } CardinalityEstimatorState data = hll.GetState(); var hll2 = new CardinalityEstimator(data); CardinalityEstimatorState data2 = hll2.GetState(); Assert.AreEqual(data.BitsPerIndex, data2.BitsPerIndex); Assert.AreEqual(data.IsSparse, data2.IsSparse); Assert.IsTrue((data.DirectCount != null && data2.DirectCount != null) || (data.DirectCount == null && data2.DirectCount == null)); Assert.IsTrue((data.LookupSparse != null && data2.LookupSparse != null) || (data.LookupSparse == null && data2.LookupSparse == null)); Assert.IsTrue((data.LookupDense != null && data2.LookupDense != null) || (data.LookupDense == null && data2.LookupDense == null)); if (data.DirectCount != null) { // DirectCount are subsets of each-other => they are the same set Assert.IsTrue(data.DirectCount.IsSubsetOf(data2.DirectCount) && data2.DirectCount.IsSubsetOf(data.DirectCount)); } if (data.LookupSparse != null) { Assert.IsTrue(data.LookupSparse.DictionaryEqual(data2.LookupSparse)); } if (data.LookupDense != null) { Assert.IsTrue(data.LookupDense.SequenceEqual(data2.LookupDense)); } }
public void Add(string value) { _hyperLogLog.Add(value); }
private CardinalityEstimator CreateAndFillCardinalityEstimator(int cardinality = 1000000) { var hll = new CardinalityEstimator(); var nextMember = new byte[ElementSizeInBytes]; for (var i = 0; i < cardinality; i++) { Rand.NextBytes(nextMember); hll.Add(nextMember); } return hll; }