Example #1
0
 internal bool IsBetterHash(PropertyHash otherHash)
 {
     if (otherHash.CollisionCount < CollisionCount)
     {
         return(true);
     }
     if (otherHash.CollisionCount > CollisionCount)
     {
         return(false);
     }
     //same number collisions, use the one with the smallest mod
     if (otherHash.ModValue > ModValue)
     {
         return(false);
     }
     return(true);
 }
        internal PropertyHash FindBestHash(string[] stringProperties, bool utf8)
        {
            var properties = stringProperties.Select(p => new Property(p, utf8)).ToArray();

            var bestHash = new PropertyHash()
            {
                Column         = 0,
                ModValue       = int.MaxValue,
                CollisionCount = int.MaxValue,
                Utf8           = utf8
            };

            //try column values
            var sortedColumnCollisions = FindColumnCollisions(properties).OrderBy(collisions => collisions.NumberOfCollisions);

            foreach (var columnCollision in sortedColumnCollisions.Take(3))
            {
                (var bestMod, var collisionCount) = FindBestMod(properties, property => property[columnCollision.ColumnIndex % property.Length]);
                var candidateHash = new PropertyHash()
                {
                    Column         = columnCollision.ColumnIndex,
                    ModValue       = bestMod,
                    CollisionCount = collisionCount,
                };
                if (bestHash.IsBetterHash(candidateHash))
                {
                    bestHash = candidateHash;
                }
            }

            //try length
            (var bestModForLength, var collisionCountLength) = FindBestMod(properties, property => property.Length);
            var lengthCandidateHash = new PropertyHash()
            {
                UseLength      = true,
                ModValue       = bestModForLength,
                CollisionCount = collisionCountLength
            };

            if (bestHash.IsBetterHash(lengthCandidateHash))
            {
                bestHash = lengthCandidateHash;
            }

            return(bestHash);
        }