public CountMinSketchState( CountMinSketchStateHashes hashes, CountMinSketchStateTopk topk) { Hashes = hashes; Topk = topk; }
public static CountMinSketchState MakeState(CountMinSketchSpec spec) { var hashes = CountMinSketchStateHashes.MakeState(spec.HashesSpec); CountMinSketchStateTopk topk = null; if (spec.TopkSpec != null && spec.TopkSpec > 0) { topk = new CountMinSketchStateTopk(spec.TopkSpec.Value); } return new CountMinSketchState(hashes, topk); }
public void TestTopK() { var space = 10000; var points = 100000; var topkMax = 100; var random = new Random(); var topk = new CountMinSketchStateTopk(topkMax); var sent = new Dictionary <ByteBuffer, long>(); for (var i = 0; i < points; i++) { // for simple population: ByteBuffer bytes = generateBytesModulo(i, space); var bytes = GenerateBytesRandom(random, space); //var bytes = GenerateBytesModulo(i, space); if (!sent.TryGetValue(bytes, out var count)) { sent.Put(bytes, 1L); topk.UpdateExpectIncreasing(bytes.Array, 1); } else { sent.Put(bytes, count + 1); topk.UpdateExpectIncreasing(bytes.Array, count + 1); } if (i > 0 && i % 100000 == 0) { Console.Out.WriteLine("Completed " + i); } } // compare var top = topk.TopKValues; // assert filled if (sent.Count < topkMax) { Assert.AreEqual(sent.Count, top.Count); } else { Assert.AreEqual(topkMax, top.Count); } // assert no duplicate values ISet <ByteBuffer> set = new HashSet <ByteBuffer>(); foreach (var topBytes in top) { Assert.IsTrue(set.Add(topBytes)); } // assert order descending long?lastFreq = null; foreach (var topBytes in top) { var freq = sent.Get(topBytes); if (lastFreq != null) { Assert.IsTrue(freq <= lastFreq); } lastFreq = freq; } }