public CountMinSketchSpecForge( CountMinSketchSpecHashes hashesSpec, int? topkSpec, CountMinSketchAgentForge agent) { HashesSpec = hashesSpec; TopkSpec = topkSpec; Agent = agent; }
public static CountMinSketchStateHashes MakeState(CountMinSketchSpecHashes spec) { var width = (int) Math.Ceiling(2 / spec.EpsOfTotalCount); var depth = (int) Math.Ceiling(-Math.Log(1 - spec.Confidence) / Math.Log(2)); var table = new long[depth][]; // width var hash = new long[depth]; var r = new Random(spec.Seed); for (var i = 0; i < depth; ++i) { hash[i] = r.Next(int.MaxValue); } table.Fill(_ => new long[width]); return new CountMinSketchStateHashes(depth, width, table, hash, 0); }
public void TestSpace() { var eps = 0.001; var confidence = 0.999; var space = 2000; var points = 100000; var randomized = true; var random = new Random(); var spec = new CountMinSketchSpecHashes(eps, confidence, 123456); var state = CountMinSketchStateHashes.MakeState(spec); IDictionary <ByteBuffer, long> sent = new Dictionary <ByteBuffer, long>(); for (var i = 0; i < points; i++) { ByteBuffer bytes; if (randomized) { bytes = TestCountMinSketchStateTopK.GenerateBytesRandom(random, space); } else { bytes = TestCountMinSketchStateTopK.GenerateBytesModulo(i, space); } state.Add(bytes.Array, 1); if (!sent.TryGetValue(bytes, out var count)) { sent.Put(bytes, 1L); } else { sent.Put(bytes, count + 1); } if (i > 0 && i % 100000 == 0) { Console.Out.WriteLine("Completed " + i); } } // compare var errors = 0; foreach (var entry in sent) { var frequency = state.EstimateCount(entry.Key.Array); if (frequency != entry.Value) { Console.Out.WriteLine("Expected " + entry.Value + " received " + frequency); errors++; } } Console.Out.WriteLine("Found " + errors + " errors at space " + space + " sent " + points); Assert.That(eps * points, Is.GreaterThan(errors)); }