public void TestCMSAddAndCount() { var cms = new CountMinSketch(0.001, 0.99); var addedCms = cms.Add(A_BYTES); Assert.AreSame(cms, addedCms); cms.Add(B_BYTES); cms.Add(C_BYTES); cms.Add(B_BYTES); cms.Add(D_BYTES); cms.Add(A_BYTES).Add(A_BYTES); var count = cms.Count(A_BYTES); Assert.AreEqual(3u, count); count = cms.Count(B_BYTES); Assert.AreEqual(2u, count); count = cms.Count(C_BYTES); Assert.AreEqual(1u, count); count = cms.Count(D_BYTES); Assert.AreEqual(1u, count); count = cms.Count(X_BYTES); Assert.AreEqual(0u, count); }
public void TestProbabilities() { var sketch = new CountMinSketch <int>(.01, .05); var random = new Random(); var insertedItems = new Dictionary <int, int>(); for (var i = 0; i < 10000; i++) { var item = random.Next(0, 1000000); sketch.Insert(item); if (insertedItems.ContainsKey(item)) { insertedItems[item]++; } else { insertedItems.Add(item, 1); } } var numMisses = 0; foreach (var item in insertedItems) { if (sketch.Query(item.Key) - item.Value > .01 * 100000) { numMisses++; } } (numMisses / (double)insertedItems.Count).Should().BeLessOrEqualTo(.05); }
public void TestCMSReset() { var cms = new CountMinSketch(0.001, 0.99); cms.Add(B_BYTES); cms.Add(C_BYTES); cms.Add(B_BYTES); cms.Add(D_BYTES); cms.Add(A_BYTES).Add(A_BYTES); var resetCms = cms.Reset(); Assert.AreSame(cms, resetCms); for (uint i = 0; i < cms.Depth; i++) { for (int j = 0; j < cms.Width; j++) { if (cms.Matrix[i][j] != 0) { Assert.Fail("Expected matrix to be completely empty."); } } } }
public void TotalCount_ShouldIncrement_WhenItemIsAdded() { var sketch = new CountMinSketch <int>(5d, 0.95d, 42); sketch.Add(31337); Assert.Equal(1, sketch.TotalCount); }
public void MergeInPlace_WithDifferentSeeds_ShouldThrowIncompatibleMergeException() { var sketch = new CountMinSketch <int>(20, 20, 42); var sketch2 = new CountMinSketch <int>(20, 20, 22); Assert.Throws <IncompatibleMergeException>(() => sketch.MergeInPlace(sketch2)); }
public static byte[] SerializeCountMin(this CountMinSketch estimator, CountMinSketchSerializer serializer) { using (var stream = new MemoryStream()) { serializer.Serialize(stream, estimator); return(stream.ToArray()); } }
public void EstimateCount_ShouldBeWithinConfidenceInterval_ForItemThatHasBeenAdded() { const string input = "Testing!!"; var sketch = new CountMinSketch <string>(5d, 0.95, 42); for (var i = 0; i < 1000; i++) { sketch.Add(input); } var estimate = sketch.EstimateCount(input); Assert.InRange(estimate, 1000, 1050); }
public void TestCMSTotalCount() { var cms = new CountMinSketch(0.001, 0.99); for (int i = 0; i < 100; i++) { cms.Add(Encoding.ASCII.GetBytes(i.ToString())); } var count = cms.TotalCount(); Assert.AreEqual(100u, count); }
public void TotalCount_AfterMergeInPlace_ShouldBeSumOfMergedTotals() { var sketch = new CountMinSketch <int>(5d, 0.95d, 42); var sketch2 = new CountMinSketch <int>(5d, 0.95d, 42); for (var i = 0; i < 100; i++) { sketch.Add(42); sketch2.Add(42); } sketch.MergeInPlace(sketch2); Assert.Equal(sketch.TotalCount, 200); }
public void TestInsertAndCount() { var obj1 = new SimpleObject("foo", 5); var obj2 = new SimpleObject("bar", 6); var sketch = new CountMinSketch <SimpleObject>(200, 5); for (var i = 0; i < 5000; i++) { sketch.Insert(obj1); sketch.Insert(obj2); } sketch.Query(obj1).Should().BeGreaterOrEqualTo(5000); sketch.Query(obj2).Should().BeGreaterOrEqualTo(5000); }
public void TestOptimalInitializer() { var obj1 = new SimpleObject("foo", 5); var obj2 = new SimpleObject("bar", 6); var sketch = new CountMinSketch <SimpleObject>(.001, .05); for (var i = 0; i < 5000; i++) { sketch.Insert(obj1); sketch.Insert(obj2); } sketch.Query(obj1).Should().BeGreaterOrEqualTo(5000); sketch.Query(obj2).Should().BeGreaterOrEqualTo(5000); }
public void BenchmarkCMSAdd() { var n = 100000; var cms = new CountMinSketch(0.001, 0.99); var data = new byte[n][]; for (int i = 0; i < n; i++) { data[i] = Encoding.ASCII.GetBytes(i.ToString()); } for (int i = 0; i < n; i++) { cms.Add(data[i]); } }
public void EstimateCount_AfterMergeInPlace_ShouldBeWithinConfidenceInterval() { const string input = "Testing!!"; var sketch = new CountMinSketch <string>(5d, 0.95, 42); var sketch2 = new CountMinSketch <string>(5d, 0.95, 42); for (var i = 0; i < 1000; i++) { sketch.Add(input); sketch2.Add(input); } sketch.MergeInPlace(sketch2); var estimate = sketch.EstimateCount(input); Assert.InRange(estimate, 2000, 2100); }
public static int Reset(IntPtr l) { int result; try { CountMinSketch countMinSketch = (CountMinSketch)LuaObject.checkSelf(l); CountMinSketch o = countMinSketch.Reset(); LuaObject.pushValue(l, true); LuaObject.pushValue(l, o); result = 2; } catch (Exception e) { result = LuaObject.error(l, e); } return(result); }
public void EstimateCount_ShouldBeWithinConfidenceInterval_ForDeserializedSketch() { using (var stream = new MemoryStream()) { const string input = "Testing!!"; var originalSketch = new CountMinSketch <string>(5d, 0.95, 42); for (var i = 0; i < 1000; i++) { originalSketch.Add(input); } IFormatter formatter = new BinaryFormatter(); formatter.Serialize(stream, originalSketch); stream.Flush(); stream.Position = 0; var newSketch = (CountMinSketch <string>)formatter.Deserialize(stream); var estimate = newSketch.EstimateCount(input); Assert.InRange(estimate, 1000, 1050); } }
public static int Count(IntPtr l) { int result; try { CountMinSketch countMinSketch = (CountMinSketch)LuaObject.checkSelf(l); ulong data; LuaObject.checkType(l, 2, out data); ulong o = countMinSketch.Count(data); LuaObject.pushValue(l, true); LuaObject.pushValue(l, o); result = 2; } catch (Exception e) { result = LuaObject.error(l, e); } return(result); }
public static int constructor(IntPtr l) { int result; try { double epsilon; LuaObject.checkType(l, 2, out epsilon); double delta; LuaObject.checkType(l, 3, out delta); CountMinSketch o = new CountMinSketch(epsilon, delta); LuaObject.pushValue(l, true); LuaObject.pushValue(l, o); result = 2; } catch (Exception e) { result = LuaObject.error(l, e); } return(result); }
public void TestMinSketch() { var rand = new MersenneTwister(1095807143); uint slots = 5113; uint algs = 5; //uint slots = 1279; //uint algs = 3; var inputs = Enumerable.Range(0, (int)slots).Select(ix => (uint)rand.NextUint32()).Distinct().ToList(); var uniqueInputs = inputs.Count(); var medianOvercountExpected = (double)uniqueInputs / slots; var peakOvercountPercentageAllowed = 1.03; var actualCount = 1; var allowedOvercount = Math.Max(actualCount, actualCount * medianOvercountExpected * peakOvercountPercentageAllowed); var s = new CountMinSketch <AddMulModHash>(slots, algs, AddMulModHash.DeterministicDefault()); foreach (var i in inputs) { s.InterlockedAdd(i, actualCount); } var errors = new List <KeyValuePair <uint, long> >(100); for (uint i = 0; i < uniqueInputs; i++) { var found = s.Estimate(i); if (found > allowedOvercount) { errors.Add(new KeyValuePair <uint, long>(i, found)); } } Assert.Equal(9, errors.Count()); }
public static int Hash(IntPtr l) { int result; try { CountMinSketch countMinSketch = (CountMinSketch)LuaObject.checkSelf(l); int index; LuaObject.checkType(l, 2, out index); ulong value; LuaObject.checkType(l, 3, out value); ulong o = countMinSketch.Hash(index, value); LuaObject.pushValue(l, true); LuaObject.pushValue(l, o); result = 2; } catch (Exception e) { result = LuaObject.error(l, e); } return(result); }
public void TestCMSMerge() { var cms = new CountMinSketch(0.001, 0.99); cms.Add(B_BYTES); cms.Add(C_BYTES); cms.Add(B_BYTES); cms.Add(D_BYTES); cms.Add(A_BYTES).Add(A_BYTES); var other = new CountMinSketch(0.001, 0.99); other.Add(B_BYTES); other.Add(C_BYTES); other.Add(B_BYTES); var wasMerged = cms.Merge(other); Assert.IsTrue(wasMerged); var count = cms.Count(A_BYTES); Assert.AreEqual(2u, count); count = cms.Count(B_BYTES); Assert.AreEqual(4u, count); count = cms.Count(C_BYTES); Assert.AreEqual(2u, count); count = cms.Count(D_BYTES); Assert.AreEqual(1u, count); count = cms.Count(X_BYTES); Assert.AreEqual(0u, count); }
public void Height_And_Width_Test() { CountMinSketch countMinSketch = new CountMinSketch(); var calculateHeight = countMinSketch.CalculateHeight(99); var calculateWidth = countMinSketch.CalculateWidth(.9); }
public StringCountMinSketch(Stream stream) { var formatter = new BinaryFormatter(); _sketch = (CountMinSketch <string>)formatter.Deserialize(stream); }
public StringCountMinSketch(double confidence, double errorRate) { _sketch = new CountMinSketch <string>(errorRate, confidence, 0); }
public void MergeInPlace_WithNullOther_ShouldThrowIncompatibleMergeException() { var sketch = new CountMinSketch <string>(5d, 0.95d, 42); Assert.Throws <IncompatibleMergeException>(() => sketch.MergeInPlace(null)); }