示例#1
0
        public static CountMinSketchState MakeState(CountMinSketchSpec spec)
        {
            CountMinSketchStateHashes hashes = CountMinSketchStateHashes.MakeState(spec.HashesSpec);
            CountMinSketchStateTopk   topk   = null;

            if (spec.TopkSpec != null && spec.TopkSpec > 0)
            {
                topk = new CountMinSketchStateTopk(spec.TopkSpec.Value);
            }
            return(new CountMinSketchState(hashes, topk));
        }
示例#2
0
 public CountMinSketchState(CountMinSketchStateHashes hashes, CountMinSketchStateTopk topk)
 {
     Hashes = hashes;
     Topk   = topk;
 }
        public void TestTopK()
        {
            const int space   = 10000;
            const int points  = 100000;
            const int topkMax = 100;

            var random = new Random();
            var topk   = new CountMinSketchStateTopk(topkMax);
            var sent   = new Dictionary <Blob, long?>();

            for (var i = 0; i < points; i++)
            {
                // for simple population: Blob bytes = generateBytesModulo(i, space);
                var bytes = GenerateBytesRandom(random, space);
                var count = sent.Get(bytes);
                if (count == null)
                {
                    sent.Put(bytes, 1L);
                    topk.UpdateExpectIncreasing(bytes.Data, 1);
                }
                else
                {
                    sent.Put(bytes, count + 1);
                    topk.UpdateExpectIncreasing(bytes.Data, count.Value + 1);
                }

                if (i > 0 && i % 100000 == 0)
                {
                    Console.WriteLine("Completed {0}", i);
                }
            }

            // compare
            var top = topk.TopKValues;

            // assert filled
            if (sent.Count < topkMax)
            {
                Assert.AreEqual(sent.Count, top.Count);
            }
            else
            {
                Assert.AreEqual(topkMax, top.Count);
            }

            // assert no duplicate values
            var set = new HashSet <Blob>();

            foreach (var topBytes in top)
            {
                Assert.IsTrue(set.Add(topBytes));
            }

            // assert order descending
            long?lastFreq = null;

            foreach (var topBytes in top)
            {
                var freq = sent.Get(topBytes);
                if (lastFreq != null)
                {
                    Assert.IsTrue(freq <= lastFreq);
                }
                lastFreq = freq;
            }
        }