public CountMinSketchSpecForge(
     CountMinSketchSpecHashes hashesSpec,
     int? topkSpec,
     CountMinSketchAgentForge agent)
 {
     HashesSpec = hashesSpec;
     TopkSpec = topkSpec;
     Agent = agent;
 }
        public static CountMinSketchStateHashes MakeState(CountMinSketchSpecHashes spec)
        {
            var width = (int) Math.Ceiling(2 / spec.EpsOfTotalCount);
            var depth = (int) Math.Ceiling(-Math.Log(1 - spec.Confidence) / Math.Log(2));
            var table = new long[depth][]; // width
            var hash = new long[depth];
            var r = new Random(spec.Seed);
            for (var i = 0; i < depth; ++i) {
                hash[i] = r.Next(int.MaxValue);
            }

            table.Fill(_ => new long[width]);

            return new CountMinSketchStateHashes(depth, width, table, hash, 0);
        }
Example #3
0
        public void TestSpace()
        {
            var eps        = 0.001;
            var confidence = 0.999;

            var space  = 2000;
            var points = 100000;

            var randomized = true;

            var random = new Random();
            var spec   = new CountMinSketchSpecHashes(eps, confidence, 123456);
            var state  = CountMinSketchStateHashes.MakeState(spec);

            IDictionary <ByteBuffer, long> sent = new Dictionary <ByteBuffer, long>();

            for (var i = 0; i < points; i++)
            {
                ByteBuffer bytes;
                if (randomized)
                {
                    bytes = TestCountMinSketchStateTopK.GenerateBytesRandom(random, space);
                }
                else
                {
                    bytes = TestCountMinSketchStateTopK.GenerateBytesModulo(i, space);
                }

                state.Add(bytes.Array, 1);

                if (!sent.TryGetValue(bytes, out var count))
                {
                    sent.Put(bytes, 1L);
                }
                else
                {
                    sent.Put(bytes, count + 1);
                }

                if (i > 0 && i % 100000 == 0)
                {
                    Console.Out.WriteLine("Completed " + i);
                }
            }

            // compare
            var errors = 0;

            foreach (var entry in sent)
            {
                var frequency = state.EstimateCount(entry.Key.Array);
                if (frequency != entry.Value)
                {
                    Console.Out.WriteLine("Expected " + entry.Value + " received " + frequency);
                    errors++;
                }
            }

            Console.Out.WriteLine("Found " + errors + " errors at space " + space + " sent " + points);
            Assert.That(eps * points, Is.GreaterThan(errors));
        }