public void TestSpace() { const double eps = 0.001; const double confidence = 0.999; const int space = 2000; const int points = 100000; const bool randomized = true; var random = new Random(); var spec = new CountMinSketchSpecHashes(eps, confidence, 123456); var state = CountMinSketchStateHashes.MakeState(spec); var sent = new Dictionary <Blob, long?>(); for (var i = 0; i < points; i++) { Blob bytes; if (randomized) { bytes = TestCountMinSketchStateTopK.GenerateBytesRandom(random, space); } else { bytes = TestCountMinSketchStateTopK.GenerateBytesModulo(i, space); } state.Add(bytes.Data, 1); var count = sent.Get(bytes); if (count == null) { sent.Put(bytes, 1L); } else { sent.Put(bytes, count + 1); } if (i > 0 && i % 100000 == 0) { Console.WriteLine("Completed {0}", i); } } // compare var errors = 0; foreach (var entry in sent) { var frequency = state.EstimateCount(entry.Key.Data); if (frequency != entry.Value) { Console.WriteLine("Expected {0} received {1}", entry.Value, frequency); errors++; } } Console.WriteLine("Found {0} errors at space {1} sent {2}", errors, space, points); Assert.IsTrue(eps * points > errors); }
public static CountMinSketchStateHashes MakeState(CountMinSketchSpecHashes spec) { var width = (int)Math.Ceiling(2 / spec.EpsOfTotalCount); var depth = (int)Math.Ceiling(-Math.Log(1 - spec.Confidence) / Math.Log(2)); var table = new long[depth][]; for (var ii = 0; ii < depth; ii++) { table[ii] = new long[width]; } var hash = new long[depth]; var r = new Random(spec.Seed); for (var i = 0; i < depth; ++i) { hash[i] = r.Next(int.MaxValue); } return(new CountMinSketchStateHashes(depth, width, table, hash, 0)); }
public CountMinSketchSpec(CountMinSketchSpecHashes hashesSpec, int?topkSpec, CountMinSketchAgent agent) { HashesSpec = hashesSpec; TopkSpec = topkSpec; Agent = agent; }