public CountMinSketchState( CountMinSketchStateHashes hashes, CountMinSketchStateTopk topk) { Hashes = hashes; Topk = topk; }
private void Add( CountMinSketchStateHashes state, string item, long count) { state.Add(GetBytes(item), count); }
public static CountMinSketchState MakeState(CountMinSketchSpec spec) { var hashes = CountMinSketchStateHashes.MakeState(spec.HashesSpec); CountMinSketchStateTopk topk = null; if (spec.TopkSpec != null && spec.TopkSpec > 0) { topk = new CountMinSketchStateTopk(spec.TopkSpec.Value); } return new CountMinSketchState(hashes, topk); }
public void TestSimpleFlow() { var state = CountMinSketchStateHashes.MakeState(DefaultSpec); Add(state, "hello", 100); Assert.AreEqual(100, EstimateCount(state, "hello")); Add(state, "text", 1); Assert.AreEqual(1, EstimateCount(state, "text")); Add(state, "hello", 3); Assert.AreEqual(103, EstimateCount(state, "hello")); Assert.AreEqual(1, EstimateCount(state, "text")); }
private long EstimateCount( CountMinSketchStateHashes state, string item) { return(state.EstimateCount(GetBytes(item))); }
public void TestSpace() { var eps = 0.001; var confidence = 0.999; var space = 2000; var points = 100000; var randomized = true; var random = new Random(); var spec = new CountMinSketchSpecHashes(eps, confidence, 123456); var state = CountMinSketchStateHashes.MakeState(spec); IDictionary <ByteBuffer, long> sent = new Dictionary <ByteBuffer, long>(); for (var i = 0; i < points; i++) { ByteBuffer bytes; if (randomized) { bytes = TestCountMinSketchStateTopK.GenerateBytesRandom(random, space); } else { bytes = TestCountMinSketchStateTopK.GenerateBytesModulo(i, space); } state.Add(bytes.Array, 1); if (!sent.TryGetValue(bytes, out var count)) { sent.Put(bytes, 1L); } else { sent.Put(bytes, count + 1); } if (i > 0 && i % 100000 == 0) { Console.Out.WriteLine("Completed " + i); } } // compare var errors = 0; foreach (var entry in sent) { var frequency = state.EstimateCount(entry.Key.Array); if (frequency != entry.Value) { Console.Out.WriteLine("Expected " + entry.Value + " received " + frequency); errors++; } } Console.Out.WriteLine("Found " + errors + " errors at space " + space + " sent " + points); Assert.That(eps * points, Is.GreaterThan(errors)); }