Пример #1
0
        public void TestSpace()
        {
            const double eps        = 0.001;
            const double confidence = 0.999;

            const int space  = 2000;
            const int points = 100000;

            const bool randomized = true;

            var random = new Random();
            var spec   = new CountMinSketchSpecHashes(eps, confidence, 123456);
            var state  = CountMinSketchStateHashes.MakeState(spec);

            var sent = new Dictionary <Blob, long?>();

            for (var i = 0; i < points; i++)
            {
                Blob bytes;
                if (randomized)
                {
                    bytes = TestCountMinSketchStateTopK.GenerateBytesRandom(random, space);
                }
                else
                {
                    bytes = TestCountMinSketchStateTopK.GenerateBytesModulo(i, space);
                }
                state.Add(bytes.Data, 1);

                var count = sent.Get(bytes);
                if (count == null)
                {
                    sent.Put(bytes, 1L);
                }
                else
                {
                    sent.Put(bytes, count + 1);
                }

                if (i > 0 && i % 100000 == 0)
                {
                    Console.WriteLine("Completed {0}", i);
                }
            }

            // compare
            var errors = 0;

            foreach (var entry in sent)
            {
                var frequency = state.EstimateCount(entry.Key.Data);
                if (frequency != entry.Value)
                {
                    Console.WriteLine("Expected {0} received {1}", entry.Value, frequency);
                    errors++;
                }
            }
            Console.WriteLine("Found {0} errors at space {1} sent {2}", errors, space, points);
            Assert.IsTrue(eps * points > errors);
        }
        public static CountMinSketchStateHashes MakeState(CountMinSketchSpecHashes spec)
        {
            var width = (int)Math.Ceiling(2 / spec.EpsOfTotalCount);
            var depth = (int)Math.Ceiling(-Math.Log(1 - spec.Confidence) / Math.Log(2));
            var table = new long[depth][];

            for (var ii = 0; ii < depth; ii++)
            {
                table[ii] = new long[width];
            }

            var hash = new long[depth];
            var r    = new Random(spec.Seed);

            for (var i = 0; i < depth; ++i)
            {
                hash[i] = r.Next(int.MaxValue);
            }
            return(new CountMinSketchStateHashes(depth, width, table, hash, 0));
        }
Пример #3
0
 public CountMinSketchSpec(CountMinSketchSpecHashes hashesSpec, int?topkSpec, CountMinSketchAgent agent)
 {
     HashesSpec = hashesSpec;
     TopkSpec   = topkSpec;
     Agent      = agent;
 }