public void AnalyzeHashFunctionDistribution() { var seenHashes = new HashSet <int>(); var collisions = new List <int>(); const int hashCount = 200_000; const int sourceWidth = 40; const int minLength = 3; var remaining = hashCount; while (remaining > 0) { for (var start = 0; start < sourceWidth; start++) { var maxLength = sourceWidth - start; for (var length = minLength; length < maxLength && remaining != 0; length++, remaining--) { var s = CreateRandomString(sourceWidth); var hash = StringPool.GetSubstringHashCode(s, start, length); if (!seenHashes.Add(hash)) { collisions.Add(hash); } } } } var crowdedBucketCount = collisions.Distinct().Count(); var max = collisions.GroupBy(hash => hash).Select(g => g.Count()).Max(); Console.Out.WriteLine( $"Strings hashed {hashCount:#,0} with minimum length {minLength}\n" + $"{collisions.Count} collisions over {crowdedBucketCount} indices\n" + $"Most crowded bucket had {max} items\n" + $"Average clashing bucket length {(double)collisions.Count / crowdedBucketCount:0.###}"); }
public void GetSubstringHashCode() { const string s = "01234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ`¬¦!\"£$€%^&*()-_=+[]{};:'@#~/?,.<>\\|"; for (var i = 0; i < 10; i++) { Console.Out.WriteLine($"0x{StringPool.GetSubstringHashCode(s, 0, i):X2}"); } Assert.AreEqual(0x162A16FE, (uint)StringPool.GetSubstringHashCode(s, 0, 0)); Assert.AreEqual(0x05E19FCE, (uint)StringPool.GetSubstringHashCode(s, 0, 1)); Assert.AreEqual(0x05E4405D, (uint)StringPool.GetSubstringHashCode(s, 0, 2)); Assert.AreEqual(0xFC2C8D57, (uint)StringPool.GetSubstringHashCode(s, 0, 3)); Assert.AreEqual(0xFC833FEA, (uint)StringPool.GetSubstringHashCode(s, 0, 4)); Assert.AreEqual(0x36D35466, (uint)StringPool.GetSubstringHashCode(s, 0, 5)); Assert.AreEqual(0x42005971, (uint)StringPool.GetSubstringHashCode(s, 0, 6)); Assert.AreEqual(0x4B54D52B, (uint)StringPool.GetSubstringHashCode(s, 0, 7)); Assert.AreEqual(0xBC227B3E, (uint)StringPool.GetSubstringHashCode(s, 0, 8)); Assert.AreEqual(0xCB2B1F36, (uint)StringPool.GetSubstringHashCode(s, 0, 9)); Assert.AreEqual(3779978672, (uint)StringPool.GetSubstringHashCode(s, 0, s.Length)); }