public static CountMinSketchState MakeState(CountMinSketchSpec spec) { CountMinSketchStateHashes hashes = CountMinSketchStateHashes.MakeState(spec.HashesSpec); CountMinSketchStateTopk topk = null; if (spec.TopkSpec != null && spec.TopkSpec > 0) { topk = new CountMinSketchStateTopk(spec.TopkSpec.Value); } return(new CountMinSketchState(hashes, topk)); }
public CountMinSketchState(CountMinSketchStateHashes hashes, CountMinSketchStateTopk topk) { Hashes = hashes; Topk = topk; }
public void TestTopK() { const int space = 10000; const int points = 100000; const int topkMax = 100; var random = new Random(); var topk = new CountMinSketchStateTopk(topkMax); var sent = new Dictionary <Blob, long?>(); for (var i = 0; i < points; i++) { // for simple population: Blob bytes = generateBytesModulo(i, space); var bytes = GenerateBytesRandom(random, space); var count = sent.Get(bytes); if (count == null) { sent.Put(bytes, 1L); topk.UpdateExpectIncreasing(bytes.Data, 1); } else { sent.Put(bytes, count + 1); topk.UpdateExpectIncreasing(bytes.Data, count.Value + 1); } if (i > 0 && i % 100000 == 0) { Console.WriteLine("Completed {0}", i); } } // compare var top = topk.TopKValues; // assert filled if (sent.Count < topkMax) { Assert.AreEqual(sent.Count, top.Count); } else { Assert.AreEqual(topkMax, top.Count); } // assert no duplicate values var set = new HashSet <Blob>(); foreach (var topBytes in top) { Assert.IsTrue(set.Add(topBytes)); } // assert order descending long?lastFreq = null; foreach (var topBytes in top) { var freq = sent.Get(topBytes); if (lastFreq != null) { Assert.IsTrue(freq <= lastFreq); } lastFreq = freq; } }