public void TestCMSAddAndCount()
        {
            var cms = new CountMinSketch(0.001, 0.99);

            var addedCms = cms.Add(A_BYTES);

            Assert.AreSame(cms, addedCms);

            cms.Add(B_BYTES);
            cms.Add(C_BYTES);
            cms.Add(B_BYTES);
            cms.Add(D_BYTES);
            cms.Add(A_BYTES).Add(A_BYTES);

            var count = cms.Count(A_BYTES);

            Assert.AreEqual(3u, count);

            count = cms.Count(B_BYTES);
            Assert.AreEqual(2u, count);

            count = cms.Count(C_BYTES);
            Assert.AreEqual(1u, count);

            count = cms.Count(D_BYTES);
            Assert.AreEqual(1u, count);

            count = cms.Count(X_BYTES);
            Assert.AreEqual(0u, count);
        }
Пример #2
0
        public void TestProbabilities()
        {
            var sketch        = new CountMinSketch <int>(.01, .05);
            var random        = new Random();
            var insertedItems = new Dictionary <int, int>();

            for (var i = 0; i < 10000; i++)
            {
                var item = random.Next(0, 1000000);
                sketch.Insert(item);
                if (insertedItems.ContainsKey(item))
                {
                    insertedItems[item]++;
                }
                else
                {
                    insertedItems.Add(item, 1);
                }
            }

            var numMisses = 0;

            foreach (var item in insertedItems)
            {
                if (sketch.Query(item.Key) - item.Value > .01 * 100000)
                {
                    numMisses++;
                }
            }

            (numMisses / (double)insertedItems.Count).Should().BeLessOrEqualTo(.05);
        }
        public void TestCMSReset()
        {
            var cms = new CountMinSketch(0.001, 0.99);

            cms.Add(B_BYTES);
            cms.Add(C_BYTES);
            cms.Add(B_BYTES);
            cms.Add(D_BYTES);
            cms.Add(A_BYTES).Add(A_BYTES);

            var resetCms = cms.Reset();

            Assert.AreSame(cms, resetCms);

            for (uint i = 0; i < cms.Depth; i++)
            {
                for (int j = 0; j < cms.Width; j++)
                {
                    if (cms.Matrix[i][j] != 0)
                    {
                        Assert.Fail("Expected matrix to be completely empty.");
                    }
                }
            }
        }
Пример #4
0
        public void TotalCount_ShouldIncrement_WhenItemIsAdded()
        {
            var sketch = new CountMinSketch <int>(5d, 0.95d, 42);

            sketch.Add(31337);
            Assert.Equal(1, sketch.TotalCount);
        }
Пример #5
0
        public void MergeInPlace_WithDifferentSeeds_ShouldThrowIncompatibleMergeException()
        {
            var sketch  = new CountMinSketch <int>(20, 20, 42);
            var sketch2 = new CountMinSketch <int>(20, 20, 22);

            Assert.Throws <IncompatibleMergeException>(() => sketch.MergeInPlace(sketch2));
        }
Пример #6
0
        public static byte[] SerializeCountMin(this CountMinSketch estimator, CountMinSketchSerializer serializer)
        {
            using (var stream = new MemoryStream())
            {
                serializer.Serialize(stream, estimator);

                return(stream.ToArray());
            }
        }
Пример #7
0
        public void EstimateCount_ShouldBeWithinConfidenceInterval_ForItemThatHasBeenAdded()
        {
            const string input  = "Testing!!";
            var          sketch = new CountMinSketch <string>(5d, 0.95, 42);

            for (var i = 0; i < 1000; i++)
            {
                sketch.Add(input);
            }
            var estimate = sketch.EstimateCount(input);

            Assert.InRange(estimate, 1000, 1050);
        }
        public void TestCMSTotalCount()
        {
            var cms = new CountMinSketch(0.001, 0.99);

            for (int i = 0; i < 100; i++)
            {
                cms.Add(Encoding.ASCII.GetBytes(i.ToString()));
            }

            var count = cms.TotalCount();

            Assert.AreEqual(100u, count);
        }
Пример #9
0
        public void TotalCount_AfterMergeInPlace_ShouldBeSumOfMergedTotals()
        {
            var sketch  = new CountMinSketch <int>(5d, 0.95d, 42);
            var sketch2 = new CountMinSketch <int>(5d, 0.95d, 42);

            for (var i = 0; i < 100; i++)
            {
                sketch.Add(42);
                sketch2.Add(42);
            }
            sketch.MergeInPlace(sketch2);

            Assert.Equal(sketch.TotalCount, 200);
        }
Пример #10
0
        public void TestInsertAndCount()
        {
            var obj1 = new SimpleObject("foo", 5);
            var obj2 = new SimpleObject("bar", 6);

            var sketch = new CountMinSketch <SimpleObject>(200, 5);

            for (var i = 0; i < 5000; i++)
            {
                sketch.Insert(obj1);
                sketch.Insert(obj2);
            }

            sketch.Query(obj1).Should().BeGreaterOrEqualTo(5000);
            sketch.Query(obj2).Should().BeGreaterOrEqualTo(5000);
        }
Пример #11
0
        public void TestOptimalInitializer()
        {
            var obj1 = new SimpleObject("foo", 5);
            var obj2 = new SimpleObject("bar", 6);

            var sketch = new CountMinSketch <SimpleObject>(.001, .05);

            for (var i = 0; i < 5000; i++)
            {
                sketch.Insert(obj1);
                sketch.Insert(obj2);
            }

            sketch.Query(obj1).Should().BeGreaterOrEqualTo(5000);
            sketch.Query(obj2).Should().BeGreaterOrEqualTo(5000);
        }
        public void BenchmarkCMSAdd()
        {
            var n    = 100000;
            var cms  = new CountMinSketch(0.001, 0.99);
            var data = new byte[n][];

            for (int i = 0; i < n; i++)
            {
                data[i] = Encoding.ASCII.GetBytes(i.ToString());
            }

            for (int i = 0; i < n; i++)
            {
                cms.Add(data[i]);
            }
        }
Пример #13
0
        public void EstimateCount_AfterMergeInPlace_ShouldBeWithinConfidenceInterval()
        {
            const string input   = "Testing!!";
            var          sketch  = new CountMinSketch <string>(5d, 0.95, 42);
            var          sketch2 = new CountMinSketch <string>(5d, 0.95, 42);

            for (var i = 0; i < 1000; i++)
            {
                sketch.Add(input);
                sketch2.Add(input);
            }
            sketch.MergeInPlace(sketch2);
            var estimate = sketch.EstimateCount(input);

            Assert.InRange(estimate, 2000, 2100);
        }
    public static int Reset(IntPtr l)
    {
        int result;

        try
        {
            CountMinSketch countMinSketch = (CountMinSketch)LuaObject.checkSelf(l);
            CountMinSketch o = countMinSketch.Reset();
            LuaObject.pushValue(l, true);
            LuaObject.pushValue(l, o);
            result = 2;
        }
        catch (Exception e)
        {
            result = LuaObject.error(l, e);
        }
        return(result);
    }
Пример #15
0
 public void EstimateCount_ShouldBeWithinConfidenceInterval_ForDeserializedSketch()
 {
     using (var stream = new MemoryStream())
     {
         const string input          = "Testing!!";
         var          originalSketch = new CountMinSketch <string>(5d, 0.95, 42);
         for (var i = 0; i < 1000; i++)
         {
             originalSketch.Add(input);
         }
         IFormatter formatter = new BinaryFormatter();
         formatter.Serialize(stream, originalSketch);
         stream.Flush();
         stream.Position = 0;
         var newSketch = (CountMinSketch <string>)formatter.Deserialize(stream);
         var estimate  = newSketch.EstimateCount(input);
         Assert.InRange(estimate, 1000, 1050);
     }
 }
    public static int Count(IntPtr l)
    {
        int result;

        try
        {
            CountMinSketch countMinSketch = (CountMinSketch)LuaObject.checkSelf(l);
            ulong          data;
            LuaObject.checkType(l, 2, out data);
            ulong o = countMinSketch.Count(data);
            LuaObject.pushValue(l, true);
            LuaObject.pushValue(l, o);
            result = 2;
        }
        catch (Exception e)
        {
            result = LuaObject.error(l, e);
        }
        return(result);
    }
    public static int constructor(IntPtr l)
    {
        int result;

        try
        {
            double epsilon;
            LuaObject.checkType(l, 2, out epsilon);
            double delta;
            LuaObject.checkType(l, 3, out delta);
            CountMinSketch o = new CountMinSketch(epsilon, delta);
            LuaObject.pushValue(l, true);
            LuaObject.pushValue(l, o);
            result = 2;
        }
        catch (Exception e)
        {
            result = LuaObject.error(l, e);
        }
        return(result);
    }
Пример #18
0
        public void TestMinSketch()
        {
            var  rand  = new MersenneTwister(1095807143);
            uint slots = 5113;
            uint algs  = 5;
            //uint slots = 1279;
            //uint algs = 3;

            var inputs = Enumerable.Range(0, (int)slots).Select(ix => (uint)rand.NextUint32()).Distinct().ToList();

            var uniqueInputs = inputs.Count();


            var medianOvercountExpected        = (double)uniqueInputs / slots;
            var peakOvercountPercentageAllowed = 1.03;
            var actualCount      = 1;
            var allowedOvercount = Math.Max(actualCount,
                                            actualCount * medianOvercountExpected * peakOvercountPercentageAllowed);


            var s = new CountMinSketch <AddMulModHash>(slots, algs, AddMulModHash.DeterministicDefault());

            foreach (var i in inputs)
            {
                s.InterlockedAdd(i, actualCount);
            }

            var errors = new List <KeyValuePair <uint, long> >(100);

            for (uint i = 0; i < uniqueInputs; i++)
            {
                var found = s.Estimate(i);
                if (found > allowedOvercount)
                {
                    errors.Add(new KeyValuePair <uint, long>(i, found));
                }
            }
            Assert.Equal(9, errors.Count());
        }
    public static int Hash(IntPtr l)
    {
        int result;

        try
        {
            CountMinSketch countMinSketch = (CountMinSketch)LuaObject.checkSelf(l);
            int            index;
            LuaObject.checkType(l, 2, out index);
            ulong value;
            LuaObject.checkType(l, 3, out value);
            ulong o = countMinSketch.Hash(index, value);
            LuaObject.pushValue(l, true);
            LuaObject.pushValue(l, o);
            result = 2;
        }
        catch (Exception e)
        {
            result = LuaObject.error(l, e);
        }
        return(result);
    }
        public void TestCMSMerge()
        {
            var cms = new CountMinSketch(0.001, 0.99);

            cms.Add(B_BYTES);
            cms.Add(C_BYTES);
            cms.Add(B_BYTES);
            cms.Add(D_BYTES);
            cms.Add(A_BYTES).Add(A_BYTES);

            var other = new CountMinSketch(0.001, 0.99);

            other.Add(B_BYTES);
            other.Add(C_BYTES);
            other.Add(B_BYTES);

            var wasMerged = cms.Merge(other);

            Assert.IsTrue(wasMerged);

            var count = cms.Count(A_BYTES);

            Assert.AreEqual(2u, count);

            count = cms.Count(B_BYTES);
            Assert.AreEqual(4u, count);

            count = cms.Count(C_BYTES);
            Assert.AreEqual(2u, count);

            count = cms.Count(D_BYTES);
            Assert.AreEqual(1u, count);

            count = cms.Count(X_BYTES);
            Assert.AreEqual(0u, count);
        }
 public void Height_And_Width_Test()
 {
     CountMinSketch countMinSketch  = new CountMinSketch();
     var            calculateHeight = countMinSketch.CalculateHeight(99);
     var            calculateWidth  = countMinSketch.CalculateWidth(.9);
 }
        public StringCountMinSketch(Stream stream)
        {
            var formatter = new BinaryFormatter();

            _sketch = (CountMinSketch <string>)formatter.Deserialize(stream);
        }
 public StringCountMinSketch(double confidence, double errorRate)
 {
     _sketch = new CountMinSketch <string>(errorRate, confidence, 0);
 }
Пример #24
0
        public void MergeInPlace_WithNullOther_ShouldThrowIncompatibleMergeException()
        {
            var sketch = new CountMinSketch <string>(5d, 0.95d, 42);

            Assert.Throws <IncompatibleMergeException>(() => sketch.MergeInPlace(null));
        }