Ejemplo n.º 1
0
        public void TestHyperLogLogMerge()
        {
            var           hll1      = new HyperLogLog <int>();
            var           hll2      = new HyperLogLog <int>();
            var           rand      = new Random();
            var           tolerance = .05;
            HashSet <int> actual    = new ();

            for (var i = 0; i < 5000; i++)
            {
                var k = rand.Next(20000);
                hll1.Add(k);
                actual.Add(k);
            }

            for (var i = 0; i < 5000; i++)
            {
                var k = rand.Next(20000);
                hll2.Add(k);
                actual.Add(k);
            }

            var hll = HyperLogLog <int> .Merge(hll1, hll2);

            hll.Cardinality().Should()
            .BeGreaterOrEqualTo((int)(actual.Count * (1 - tolerance)))
            .And
            .BeLessOrEqualTo((int)(actual.Count * (1 + tolerance)));
        }
Ejemplo n.º 2
0
        public void HashedVsRandom(int realCardinality)
        {
            const int precision = 18;

            var hllRandom = new HyperLogLog(precision);

            for (var i = 0; i < realCardinality; i++)
            {
                var randomBytes = new byte[8];
                RandomNumberGenerator.GetBytes(randomBytes);
                var hashValue = BitConverter.ToUInt64(randomBytes, 0);
                hllRandom.Add(hashValue);
            }

            var cardinalityEstimateRandom = hllRandom.Count();

            var hllHashed = new HyperLogLog(precision);

            using (var hashAlgorithm = new SHA1CryptoServiceProvider())
            {
                for (var i = 0; i < realCardinality; i++)
                {
                    var randomBytes = new byte[12];
                    RandomNumberGenerator.GetBytes(randomBytes);
                    var hashValue = HyperLogLog.Hash(hashAlgorithm, randomBytes);
                    hllHashed.Add(hashValue);
                }
            }

            var cardinalityEstimateHashed = hllHashed.Count();

            var percentageDifference = GetPercentageDifference(cardinalityEstimateRandom, cardinalityEstimateHashed);

            Assert.True(percentageDifference < ZeroFivePercent);
        }
Ejemplo n.º 3
0
        public void CountWithoutAdd()
        {
            var hll            = new HyperLogLog(18);
            var estimatedCount = hll.Count();

            Assert.Equal(189083, estimatedCount);
        }
 public void SerializeTo(HyperLogLog hyperLogLog, Stream stream)
 {
     using (var bw = new BinaryWriter(stream))
     {
         bw.Write((byte)hyperLogLog.B);
         bw.Write(hyperLogLog._registers);
         bw.Flush();
     }
 }
Ejemplo n.º 5
0
        public void AddMaxULong()
        {
            var hll = new HyperLogLog(18);

            hll.Add(ulong.MaxValue);
            var estimatedCount = hll.Count();

            Assert.Equal(189084, estimatedCount);
        }
 public void SerializeTo(HyperLogLog hyperLogLog, Stream stream)
 {
     using (var bw = new BinaryWriter(stream))
     {
         bw.Write((byte)hyperLogLog.B);
         bw.Write(hyperLogLog._registers);
         bw.Flush();
     }
 }
Ejemplo n.º 7
0
        public void Add5()
        {
            var hll = new HyperLogLog(18);

            hll.Add(5);
            var estimatedCount = hll.Count();

            Assert.Equal(189084, estimatedCount);
        }
Ejemplo n.º 8
0
        private static HyperLogLog CreateHyperLogLogWithHashedDateTimes(int n, byte b, int start = 0)
        {
            var hyperLogLog = new HyperLogLog(b);

            for (Int64 i = start; i < start + n; i++)
            {
                hyperLogLog.AddDateTime(new DateTime(2000, 1, 1).AddMinutes(i));
            }
            return(hyperLogLog);
        }
Ejemplo n.º 9
0
        private static HyperLogLog CreateHyperLogLogWithHashedGuids(int n, byte b, int start = 0)
        {
            var hyperLogLog = new HyperLogLog(b);

            for (Int64 i = start; i < start + n; i++)
            {
                hyperLogLog.AddGuid(Guid.NewGuid());
            }
            return(hyperLogLog);
        }
Ejemplo n.º 10
0
        private static HyperLogLog CreateHyperLogLogWithHashedIntegers64(int n, byte b, int start = 0)
        {
            var hyperLogLog = new HyperLogLog(b);

            for (Int64 i = start; i < start + n; i++)
            {
                hyperLogLog.AddInt64(i);
            }
            return(hyperLogLog);
        }
Ejemplo n.º 11
0
        private static HyperLogLog CreateHyperLogLogWithHashedIntegers16(short n, byte b)
        {
            var hyperLogLog = new HyperLogLog(b);

            for (Int16 i = 0; i < n; i++)
            {
                hyperLogLog.AddInt64(i);
            }
            return(hyperLogLog);
        }
Ejemplo n.º 12
0
        public void HashConstantWithMD5()
        {
            const ulong value = 0x2D51AF5C52FDE6B4ul;

            using (var hashAlgorithm = new MD5CryptoServiceProvider())
            {
                var hashValue = HyperLogLog.Hash(hashAlgorithm, BitConverter.GetBytes(value));

                Assert.Equal(16663394367412432550ul, hashValue);
            }
        }
Ejemplo n.º 13
0
        private static void Test_CalculateEstimatedCount_ShouldBeWithinAcceptableErrorRange(
            HyperLogLog hyperLogLog,
            int n,
            double acceptablePercentError)
        {
            // Act
            int estimatedCount = hyperLogLog.CalculateEstimatedCount();

            // Assert
            Assert.That(estimatedCount, Is.EqualTo(n).Within(acceptablePercentError).Percent);
        }
Ejemplo n.º 14
0
        private static HyperLogLog CreateHyperLogLogWithHashedStrings(int n, byte b)
        {
            HyperLogLog hyperLogLog = new HyperLogLog(b);

            for (int i = 0; i < n; i++)
            {
                hyperLogLog.AddUTF8String(i.ToString(CultureInfo.InvariantCulture));
            }

            return(hyperLogLog);
        }
Ejemplo n.º 15
0
        public void HashConstantWithSHA1()
        {
            const ulong value = 0x2D51AF5C52FDE6B4ul;

            using (var hashAlgorithm = new SHA1CryptoServiceProvider())
            {
                var hashValue = HyperLogLog.Hash(hashAlgorithm, BitConverter.GetBytes(value));

                Assert.Equal(17851087020509344997ul, hashValue);
            }
        }
Ejemplo n.º 16
0
        public void Merge_NullSet_ThrowsExeption()
        {
            // Arrange
            var hyperLogLog = new HyperLogLog(4);

            // Act
            TestDelegate action = () => hyperLogLog.Merge(null);

            // Assert
            ArgumentNullException exception = Assert.Throws <ArgumentNullException>(action);

            Assert.AreEqual("other", exception.ParamName);
        }
Ejemplo n.º 17
0
        public void Merge_SameSet_ThrowsExeption()
        {
            // Arrange
            var hyperLogLog = new HyperLogLog(4);

            // Act
            TestDelegate action = () => hyperLogLog.Merge(hyperLogLog);

            // Assert
            ArgumentException exception = Assert.Throws <ArgumentException>(action);

            Assert.AreEqual("other", exception.ParamName);
            StringAssert.StartsWith("Cannot merge instance of HyperLogLog to itself", exception.Message);
        }
Ejemplo n.º 18
0
        public void Merge_DifferentB_ThrowsExeption()
        {
            // Arrange
            var hyperLogLog = new HyperLogLog(4);
            var other       = new HyperLogLog(5);

            // Act
            TestDelegate action = () => hyperLogLog.Merge(other);

            // Assert
            ArgumentException exception = Assert.Throws <ArgumentException>(action);

            Assert.AreEqual("other", exception.ParamName);
            StringAssert.StartsWith("Cannot merge instance of HyperLogLog with b = 5 to instance with b = 4", exception.Message);
        }
        public HyperLogLog DeserializeTo(Stream stream, HashAlgorithm hashAlgorithm, IBytesConverter bytesConverter)
        {
            using (var br = new BinaryReader(stream))
            {
                var b = br.ReadByte();

                var result = new HyperLogLog(hashAlgorithm, bytesConverter, b);

                for (int i = 0; i < result.M; i++)
                {
                    result._registers[i] = br.ReadByte();
                }
                return(result);
            }
        }
Ejemplo n.º 20
0
        public void AddTwiceDoesNotChangeCount()
        {
            var hll1 = new HyperLogLog(18);

            hll1.Add(ulong.MaxValue);
            hll1.Add(ulong.MaxValue);
            var estimatedCount1 = hll1.Count();

            var hll2 = new HyperLogLog(18);

            hll2.Add(ulong.MaxValue);
            var estimatedCount2 = hll2.Count();

            Assert.Equal(estimatedCount1, estimatedCount2);
        }
        public HyperLogLog DeserializeTo(Stream stream, HashAlgorithm hashAlgorithm, IBytesConverter bytesConverter)
        {
            using (var br = new BinaryReader(stream))
            {
                var b = br.ReadByte();

                var result = new HyperLogLog(hashAlgorithm, bytesConverter, b);

                for (int i =0; i < result.M; i++)
                {
                    result._registers[i] = br.ReadByte();
                }
                return result;
            }
        }
Ejemplo n.º 22
0
        public void SerializeDeserialize_SameEstimate(int n, byte b)
        {
            // Arrange
            HyperLogLog     hyperLogLog = CreateHyperLogLogWithHashedStrings(n, b);
            BinaryFormatter formatter   = new BinaryFormatter();
            MemoryStream    stream      = new MemoryStream();

            // Act
            formatter.Serialize(stream, hyperLogLog);
            stream.Position = 0;
            HyperLogLog deserialized = (HyperLogLog)formatter.Deserialize(stream);

            // Assert
            int originalEstimate = hyperLogLog.CalculateEstimatedCount();

            Assert.That(originalEstimate, Is.GreaterThan(0.9 * n));
            Assert.That(originalEstimate, Is.EqualTo(deserialized.CalculateEstimatedCount()));
        }
        private void benchmarkCount(int registers)
        {
            var n     = 100000;
            var words = Words.Dictionary(0);
            var m     = (uint)Math.Pow(2, registers);

            var h = new HyperLogLog(m);

            foreach (var word in words)
            {
                h.Add(Encoding.ASCII.GetBytes(word));
            }

            for (int i = 0; i < n; i++)
            {
                h.Count();
            }
        }
Ejemplo n.º 24
0
        public void TheRandomTheory(int realCardinality)
        {
            const int precision = 18;

            var hll = new HyperLogLog(precision);

            for (var i = 0; i < realCardinality; i++)
            {
                var randomBytes = new byte[8];
                RandomNumberGenerator.GetBytes(randomBytes);
                var hashValue = BitConverter.ToUInt64(randomBytes, 0);
                hll.Add(hashValue);
            }

            var cardinalityEstimate = hll.Count();

            var percentageDifference = GetPercentageDifference(realCardinality, cardinalityEstimate);

            Assert.True(percentageDifference < OnePercent);
        }
Ejemplo n.º 25
0
        static void TestHyperLogLog()
        {
            var rds = new FullRedis("127.0.0.1", null, 1);

            rds.Remove("ips");
            var log = new HyperLogLog(rds, "ips");

            XTrace.WriteLine("log.Count={0:n0}", log.Count);

            var count = 1_000_000;

            XTrace.WriteLine("准备添加[{0:n0}]个IP地址", count);
            Parallel.For(0, count, k =>
            {
                var n  = Rand.Next();
                var ip = new IPAddress(n);
                log.Add(ip + "");
            });
            XTrace.WriteLine("log.Count={0:n0}", log.Count);
        }
        private void testHyperLogLog(int n, int lowB, int highB)
        {
            var words  = Words.Dictionary(n);
            var bad    = 0;
            var nWords = (UInt64)words.LongLength;

            var options = new ParallelOptions();

            options.MaxDegreeOfParallelism = 4;
            Parallel.For(lowB, highB, options, i =>
            {
                var m = (uint)Math.Pow(2, i);

                HyperLogLog h = null;
                try
                {
                    h = new HyperLogLog(m);
                }
                catch (Exception)
                {
                    Assert.Fail(string.Format("Can't make HyperLogLog({0})", m));
                }

                foreach (var word in words)
                {
                    h.Add(Encoding.ASCII.GetBytes(word));
                }

                var expectedError = 1.04 / Math.Sqrt(m);
                var actualError   = Math.Abs(this.geterror(nWords, h.Count()));

                if (actualError > expectedError)
                {
                    bad++;
                    //Assert.Fail(string.Format("Expected: {0}, Actual: {1}", expectedError, actualError));
                }
            });
        }
Ejemplo n.º 27
0
        public void TheHashedRandomTheory(int realCardinality)
        {
            const int precision = 18;

            var hll = new HyperLogLog(precision);

            using (var hashAlgorithm = new SHA1CryptoServiceProvider())
            {
                for (var i = 0; i < realCardinality; i++)
                {
                    var randomBytes = new byte[12];
                    RandomNumberGenerator.GetBytes(randomBytes);
                    var hashValue = HyperLogLog.Hash(hashAlgorithm, randomBytes);
                    hll.Add(hashValue);
                }
            }

            var cardinalityEstimate = hll.Count();

            var percentageDifference = GetPercentageDifference(realCardinality, cardinalityEstimate);

            Assert.True(percentageDifference < OnePercent);
        }
Ejemplo n.º 28
0
        public void HyperLogLog_Normal()
        {
            var key  = "hyper_key";
            var key2 = "hyper_key2";

            // 删除已有
            _redis.Remove(key);
            var hyper = new HyperLogLog(_redis, key);

            _redis.SetExpire(key, TimeSpan.FromSeconds(60));

            // 取出个数
            var count = hyper.Count;

            Assert.Equal(0, count);

            // 添加
            var vs = new[] { "1234", "abcd", "新生命团队", "ABEF" };

            hyper.Add(vs);

            // 对比个数
            var count2 = hyper.Count;

            Assert.Equal(count + vs.Length, count2);


            var hyper2 = new HyperLogLog(_redis, key2);

            hyper2.Add("567", "789");

            var rs = hyper.Merge(key2);

            Assert.True(rs);

            Assert.Equal(vs.Length + hyper2.Count, hyper.Count);
        }
        public void TestNewDefaultHyperLogLog()
        {
            var hll = HyperLogLog.NewDefaultHyperLogLog(0.1);

            Assert.AreEqual(128u, hll.M);
        }
Ejemplo n.º 30
0
 public void ConstructHyperLogLog4DoesNotThrow()
 {
     var hll = new HyperLogLog(7);
 }
Ejemplo n.º 31
0
 public CardinalityEstimateImpl(Func <TSource, TValue> selector, double stdError)
 {
     this.selector = selector;
     hyperLogLog   = new HyperLogLog(stdError);
 }
Ejemplo n.º 32
0
 public void ConstructHyperLogLog18DoesNotThrow()
 {
     var hll = new HyperLogLog(18);
 }