/// <summary>
        ///     Generates <paramref name="expectedCount" /> random (or sequential) elements and adds them to CardinalityEstimators, then asserts that
        ///     the observed error rate is no more than <paramref name="maxAcceptedError" />
        /// </summary>
        /// <param name="stdError">Expected standard error of the estimators (upper bound)</param>
        /// <param name="expectedCount">number of elements to generate in total</param>
        /// <param name="maxAcceptedError">Maximum allowed error rate. Default is 4 times <paramref name="stdError" /></param>
        /// <param name="numHllInstances">Number of estimators to create. Generated elements will be assigned to one of the estimators at random</param>
        /// <param name="sequential">When false, elements will be generated at random. When true, elements will be 0,1,2...</param>
        private void RunTest(double stdError, long expectedCount, double?maxAcceptedError = null, int numHllInstances = 1,
                             bool sequential = false)
        {
            maxAcceptedError = maxAcceptedError ?? 4 * stdError; // should fail once in A LOT of runs
            int b = GetAccuracyInBits(stdError);

            var  runStopwatch    = new Stopwatch();
            long gcMemoryAtStart = GetGcMemory();

            // init HLLs
            var hlls = new CardinalityEstimator[numHllInstances];

            for (var i = 0; i < numHllInstances; i++)
            {
                hlls[i] = new CardinalityEstimator(b);
            }

            var nextMember = new byte[ElementSizeInBytes];

            runStopwatch.Start();
            for (long i = 0; i < expectedCount; i++)
            {
                // pick random hll, add member
                int chosenHll = Rand.Next(numHllInstances);
                if (sequential)
                {
                    hlls[chosenHll].Add(i);
                }
                else
                {
                    Rand.NextBytes(nextMember);
                    hlls[chosenHll].Add(nextMember);
                }
            }

            runStopwatch.Stop();
            ReportMemoryCost(gcMemoryAtStart); // done here so references can't be GC'ed yet

            // Merge
            CardinalityEstimator mergedHll = CardinalityEstimator.Merge(hlls);

            Console.WriteLine("Run time: {0}", runStopwatch.Elapsed);
            Console.WriteLine("Expected {0}, got {1}", expectedCount, mergedHll.Count());

            double obsError = Math.Abs(mergedHll.Count() / (double)(expectedCount) - 1.0);

            Console.WriteLine("StdErr: {0}.  Observed error: {1}", stdError, obsError);
            Assert.True(obsError <= maxAcceptedError, string.Format("Observed error was over {0}", maxAcceptedError));
            Console.WriteLine();
        }
        public void ReportAccuracy()
        {
            var    hll         = new CardinalityEstimator();
            double maxError    = 0;
            var    worstMember = 0;
            var    nextMember  = new byte[ElementSizeInBytes];

            for (var i = 0; i < 10000000; i++)
            {
                Rand.NextBytes(nextMember);
                hll.Add(nextMember);

                if (i % 1007 == 0) // just some interval to sample error at, can be any number
                {
                    double error = (hll.Count() - (double)(i + 1)) / ((double)i + 1);
                    if (error > maxError)
                    {
                        maxError    = error;
                        worstMember = i + 1;
                    }
                }
            }

            Console.WriteLine("Worst: {0}", worstMember);
            Console.WriteLine("Max error: {0}", maxError);

            Assert.True(true);
        }
Beispiel #3
0
        public JsonResult Get()
        {
            ICardinalityEstimator <string> estimator = new CardinalityEstimator();

            foreach (Fingerprint finger in _counterRepo.GetFingerprints())
            {
                estimator.Add(finger.Hash);
            }
            return(new JsonResult(new { Clicks = estimator.Count() }));
        }
Beispiel #4
0
        public void SerializerCanDeserializeVersion2Point0()
        {
            var serializer = new CardinalityEstimatorSerializer();

            CardinalityEstimator hllDirect = serializer.Deserialize(new MemoryStream(Resources.serializedDirect_v2_0));
            CardinalityEstimator hllSparse = serializer.Deserialize(new MemoryStream(Resources.serializedSparse_v2_0));
            CardinalityEstimator hllDense  = serializer.Deserialize(new MemoryStream(Resources.serializedDense_v2_0));

            Assert.AreEqual(50UL, hllDirect.Count());
            Assert.AreEqual(151UL, hllSparse.Count());
            Assert.AreEqual(5009UL, hllDense.Count());
        }
        public void EstimatorWorksAfterDeserialization()
        {
            ICardinalityEstimator <int> original = new CardinalityEstimator();

            original.Add(5);
            original.Add(7);
            Assert.Equal(2UL, original.Count());

            var binaryFormatter = new BinaryFormatter();

            using (var memoryStream = new MemoryStream())
            {
                binaryFormatter.Serialize(memoryStream, original);
                memoryStream.Seek(0, SeekOrigin.Begin);
                CardinalityEstimator copy = (CardinalityEstimator)binaryFormatter.Deserialize(memoryStream);

                Assert.Equal(2UL, copy.Count());
                copy.Add(5);
                copy.Add(7);
                Assert.Equal(2UL, copy.Count());
            }
        }
        public void StaticMergeTest()
        {
            const int expectedBitsPerIndex = 11;
            var       estimators           = new CardinalityEstimator[10];

            for (var i = 0; i < estimators.Length; i++)
            {
                estimators[i] = new CardinalityEstimator(expectedBitsPerIndex);
                estimators[i].Add(Rand.Next());
            }

            CardinalityEstimator merged = CardinalityEstimator.Merge(estimators);

            Assert.Equal(10UL, merged.Count());
            Assert.Equal(expectedBitsPerIndex, merged.GetState().BitsPerIndex);
        }
Beispiel #7
0
        public void DeserializedEstimatorUsesSameHashAsOriginal()
        {
            // Prepare some elements
            IList <int> elements = new List <int>();

            for (int i = 0; i < 150; i++)
            {
                elements.Add(Rand.Next());
            }

            foreach (HashFunctionId hashFunctionId in Enum.GetValues(typeof(HashFunctionId)))
            {
                // Add elements to an estimator using the given hashFunctionId
                CardinalityEstimator original = new CardinalityEstimator(hashFunctionId: hashFunctionId);
                foreach (int element in elements)
                {
                    original.Add(element);
                }

                // Serialize
                var    serializer = new CardinalityEstimatorSerializer();
                byte[] results;

                using (var memoryStream = new MemoryStream())
                {
                    serializer.Serialize(memoryStream, original, false);
                    results = memoryStream.ToArray();
                }

                // Deserialize
                CardinalityEstimator deserialized;
                using (var memoryStream = new MemoryStream(results))
                {
                    deserialized = serializer.Deserialize(memoryStream, false);
                }

                // Add the elements again, should have no effect on state
                foreach (int element in elements)
                {
                    deserialized.Add(element);
                }

                Assert.AreEqual(original.Count(), deserialized.Count());
            }
        }
        [Ignore] // Test runtime is long
        public void ReportAccuracy()
        {
            var hll = new CardinalityEstimator();
            double maxError = 0;
            var worstMember = 0;
            var nextMember = new byte[ElementSizeInBytes];
            for (var i = 0; i < 10000000; i++)
            {
                Rand.NextBytes(nextMember);
                hll.Add(nextMember);

                if (i%1007 == 0) // just some interval to sample error at, can be any number
                {
                    double error = (hll.Count() - (double) (i + 1))/((double) i + 1);
                    if (error > maxError)
                    {
                        maxError = error;
                        worstMember = i + 1;
                    }
                }
            }

            Console.WriteLine("Worst: {0}", worstMember);
            Console.WriteLine("Max error: {0}", maxError);

            Assert.IsTrue(true);
        }
 public ulong Count()
 {
     return(_hyperLogLog.Count());
 }