public static ulong StrToHash(string key) { //WARNING! Never use GetHashCode here as it is platform-dependent, but this function must be 100% deterministic if (key == null) { return(0); } var sl = key.Length; if (sl == 0) { return(0); } ulong hash1 = 0; for (int i = sl - 1; i > sl - 1 - sizeof(ulong) && i >= 0; i--) //take 8 chars from end (string suffix), for most string the { //string tail is the most changing part (i.e. 'Alex Kozloff'/'Alex Richardson'/'System.A'/'System.B' if (i < sl - 1) { hash1 <<= 8; } var c = key[i]; var b1 = (c & 0xff00) >> 8; var b2 = c & 0xff; hash1 |= (byte)(b1 ^ b2); } ulong hash2 = 1566083941ul * (ulong)Adler32.ForString(key); return(hash1 ^ hash2); }
/// <summary> /// Gets sharding ID for string, that is - computes string hash as UInt64 . /// WARNING! Changing this function will render all existing sharding partitioning invalid. Use extreme care! /// </summary> public static ulong StringToShardingID(string key) { //WARNING! Never use GetHashCode here as it is platform-dependent, but this function must be 100% deterministic /* * From Microsoft on MSDN: * * Best Practices for Using Strings in the .NET Framework * * Recommendations for String Usage * * Use the String.ToUpperInvariant method instead of the String.ToLowerInvariant method when you normalize strings for comparison. * * Why? From Microsoft: * * Normalize strings to uppercase * * There is a small group of characters that when converted to lowercase cannot make a round trip. * * What is example of such a character that cannot make a round trip? * * Start: Greek Rho Symbol (U+03f1) ϱ * Uppercase: Capital Greek Rho (U+03a1) Ρ * Lowercase: Small Greek Rho (U+03c1) ρ * * ϱ , Ρ , ρ * * That is why, if your want to do case insensitive comparisons you convert the strings to uppercase, and not lowercase. */ if (key == null) { return(0); } #warning DANGER!!!!!!!!! This needs carefull review to not depend on ToUpperInvariant(): todo Dima review!!! key = key.ToUpperInvariant(); var sl = key.Length; if (sl == 0) { return(0); } ulong hash1 = 0; for (int i = sl - 1; i > sl - 1 - sizeof(ulong) && i >= 0; i--) //take 8 chars from end (string suffix), for most string the { //string tail is the most changing part (i.e. 'Alex Kozloff'/'Alex Richardson'/'System.A'/'System.B' if (i < sl - 1) { hash1 <<= 8; } var c = key[i]; var b1 = (c & 0xff00) >> 8; var b2 = c & 0xff; hash1 |= (byte)(b1 ^ b2); } ulong hash2 = 1566083941ul * (ulong)Adler32.ForString(key); return(hash1 ^ hash2); }
public void Adler32_5() { Assert.AreEqual(UInt32.Parse("11E60398", System.Globalization.NumberStyles.HexNumber), Adler32.ForString("Wikipedia")); }
public void Adler32_4() { Assert.AreEqual(UInt32.Parse("36E81466", System.Globalization.NumberStyles.HexNumber), Adler32.ForString("This is an example of a much longer string of characters")); }
public void Adler32_3() { Assert.AreEqual(UInt32.Parse("1BE9043A", System.Globalization.NumberStyles.HexNumber), Adler32.ForString("Hello Dolly!")); }