/// <summary> /// Gets the hash of a JSON string value. /// </summary> /// <param name="value">The value to hash.</param> /// <param name="seed">The seed to use.</param> /// <returns>The hash of a JSON string value.</returns> private static UInt128 GetStringHash(string value, UInt128 seed) { UInt128 hash = DistinctHash.GetHash(DistinctHash.StringHashSeed, seed); byte[] stringBytes = Encoding.UTF8.GetBytes(value); return(DistinctHash.GetHash(stringBytes, hash)); }
/// <summary> /// Gets the hash of a JSON number value. /// </summary> /// <param name="number">The number to hash.</param> /// <param name="seed">The seed to use.</param> /// <returns>The hash of a JSON number value.</returns> private static UInt128 GetNumberHash(double number, UInt128 seed) { UInt128 hash = DistinctHash.GetHash(DistinctHash.NumberHashSeed, seed); hash = DistinctHash.GetHash((UInt128)BitConverter.DoubleToInt64Bits(number), hash); return(hash); }
/// <summary> /// Gets the hash of a JSON object. /// </summary> /// <param name="cosmosObject">The object to hash.</param> /// <param name="seed">The seed to use.</param> /// <returns>The hash of a JSON object.</returns> private static UInt128 GetObjectHash(CosmosObject cosmosObject, UInt128 seed) { // Start the object with a distinct hash, so that empty object doesn't hash to another value. UInt128 hash = DistinctHash.GetHash(DistinctHash.ObjectHashSeed, seed); //// Intermediate hashes of all the properties, which we don't want to xor with the final hash //// otherwise the following will collide: ////{ //// "pet":{ //// "name":"alice", //// "age":5 //// }, //// "pet2":{ //// "name":"alice", //// "age":5 //// } ////} //// ////{ //// "pet":{ //// "name":"bob", //// "age":5 //// }, //// "pet2":{ //// "name":"bob", //// "age":5 //// } ////} //// because they only differ on the name, but it gets repeated meaning that //// hash({"name":"bob", "age":5}) ^ hash({"name":"bob", "age":5}) is the same as //// hash({"name":"alice", "age":5}) ^ hash({"name":"alice", "age":5}) UInt128 intermediateHash = 0; // Property order should not result in a different hash. // This is consistent with equality comparison. foreach (KeyValuePair <string, CosmosElement> kvp in cosmosObject) { UInt128 nameHash = DistinctHash.GetHash( CosmosString.Create(kvp.Key), DistinctHash.PropertyNameHashSeed); UInt128 propertyHash = DistinctHash.GetHash(kvp.Value, nameHash); //// xor is symmetric meaning that a ^ b = b ^ a //// Which is great since now we can add the property hashes to the intermediate hash //// in any order and get the same result, which upholds our definition of equality. //// Note that we don't have to worry about a ^ a = 0 = b ^ b for duplicate property values, //// since the hash of property values are seeded with the hash of property names, //// which are unique within an object. intermediateHash ^= propertyHash; } // Only if the object was not empty do we want to bring in the intermediate hash. if (intermediateHash > 0) { hash = DistinctHash.GetHash(intermediateHash, hash); } return(hash); }
private static UInt128 GetHash(CosmosElement cosmosElement, UInt128 seed) { if (cosmosElement == null) { return(DistinctHash.GetUndefinedHash(seed)); } return(cosmosElement.Accept(CosmosElementHasher.Singleton, seed)); }
/// <summary> /// Gets the hash of a JToken given a seed. /// </summary> /// <param name="cosmosElement">The cosmos element to hash.</param> /// <param name="seed">The seed to use.</param> /// <returns>The hash of the JToken.</returns> private static UInt128 GetHash(CosmosElement cosmosElement, UInt128 seed) { if (cosmosElement == null) { return(DistinctHash.GetUndefinedHash(seed)); } CosmosElementType cosmosElementType = cosmosElement.Type; UInt128 hash; switch (cosmosElementType) { case CosmosElementType.Array: hash = DistinctHash.GetArrayHash(cosmosElement as CosmosArray, seed); break; case CosmosElementType.Boolean: hash = DistinctHash.GetBooleanHash((cosmosElement as CosmosBoolean).Value, seed); break; case CosmosElementType.Null: hash = DistinctHash.GetNullHash(seed); break; case CosmosElementType.Number: // TODO: we need to differentiate between the different number types. CosmosNumber cosmosNumber = cosmosElement as CosmosNumber; double number; if (cosmosNumber.IsFloatingPoint) { number = cosmosNumber.AsFloatingPoint().Value; } else { number = cosmosNumber.AsInteger().Value; } hash = DistinctHash.GetNumberHash(number, seed); break; case CosmosElementType.Object: hash = DistinctHash.GetObjectHash(cosmosElement as CosmosObject, seed); break; case CosmosElementType.String: hash = DistinctHash.GetStringHash((cosmosElement as CosmosString).Value, seed); break; default: throw new ArgumentException($"Unexpected {nameof(CosmosElementType)} : {cosmosElementType}"); } return(hash); }
/// <summary> /// Adds a JToken to this map if it hasn't already been added. /// </summary> /// <param name="cosmosElement">The element to add.</param> /// <param name="hash">The hash of the token.</param> /// <returns>Whether or not the item was added to this Distinct Map.</returns> /// <remarks>This function assumes data is added in sorted order.</remarks> public override bool Add(CosmosElement cosmosElement, out UInt128 hash) { hash = DistinctHash.GetHash(cosmosElement); bool added; if (this.lastHash != hash) { this.lastHash = hash; added = true; } else { added = false; } return(added); }
/// <summary> /// Adds a string to the distinct map. /// </summary> /// <param name="value">The string to add.</param> /// <returns>Whether or not the value was successfully added.</returns> private bool AddStringValue(string value) { bool added = false; int utf8Length = Encoding.UTF8.GetByteCount(value); // If you can fit the string with full fidelity in 16 bytes, then you might as well just hash the string itself. if (utf8Length <= UnorderdDistinctMap.UInt128Length) { // Zero out the array since you want all trailing bytes to be 0 for the conversions that happen next. Array.Clear(this.utf8Buffer, 0, this.utf8Buffer.Length); Encoding.UTF8.GetBytes(value, 0, value.Length, this.utf8Buffer, 0); if (utf8Length == 0) { added = this.AddSimpleValue(SimpleValues.EmptyString); } else if (utf8Length <= UnorderdDistinctMap.UIntLength) { uint uintValue = BitConverter.ToUInt32(this.utf8Buffer, 0); added = this.stringsLength4.Add(uintValue); } else if (utf8Length <= UnorderdDistinctMap.ULongLength) { ulong uLongValue = BitConverter.ToUInt64(this.utf8Buffer, 0); added = this.stringsLength8.Add(uLongValue); } else { UInt128 uInt128Value = UInt128.FromByteArray(this.utf8Buffer); added = this.stringsLength16.Add(uInt128Value); } } else { // Else the string is too large and we will just store the hash. UInt128 uint128Value = DistinctHash.GetHash(CosmosString.Create(value)); added = this.stringsLength16Plus.Add(uint128Value); } return(added); }
/// <summary> /// Gets the hash of a JSON array. /// </summary> /// <param name="cosmosArray">The array to hash.</param> /// <param name="seed">The seed to use.</param> /// <returns>The hash of a JSON array.</returns> private static UInt128 GetArrayHash(CosmosArray cosmosArray, UInt128 seed) { // Start the array with a distinct hash, so that empty array doesn't hash to another value. UInt128 hash = DistinctHash.GetHash(DistinctHash.ArrayHashSeed, seed); // Incorporate all the array items into the hash. for (int index = 0; index < cosmosArray.Count; index++) { CosmosElement arrayItem = cosmosArray[index]; // Order of array items matter in equality check, so we add the index just to be safe. // For now we know that murmurhash will correctly give a different hash for // [true, false, true] and [true, true, false] // due to the way the seed works. // But we add the index just incase that property does not hold in the future. UInt128 arrayItemSeed = DistinctHash.ArrayIndexHashSeed + index; hash = DistinctHash.GetHash(hash, DistinctHash.GetHash(arrayItem, arrayItemSeed)); } return(hash); }
/// <summary> /// Adds a string to the distinct map. /// </summary> /// <param name="value">The string to add.</param> /// <returns>Whether or not the value was successfully added.</returns> private bool AddStringValue(string value) { bool added = false; int utf8Length = Encoding.UTF8.GetByteCount(value); // If you can fit the string with full fidelity in 16 bytes, then you might as well just hash the string itself. if (utf8Length <= UnorderdDistinctMap.UInt128Length) { Span <byte> utf8Buffer = stackalloc byte[UInt128Length]; Encoding.UTF8.GetBytes(value, utf8Buffer); if (utf8Length == 0) { added = this.AddSimpleValue(SimpleValues.EmptyString); } else if (utf8Length <= UnorderdDistinctMap.UIntLength) { uint uintValue = MemoryMarshal.Read <uint>(utf8Buffer); added = this.stringsLength4.Add(uintValue); } else if (utf8Length <= UnorderdDistinctMap.ULongLength) { ulong uLongValue = MemoryMarshal.Read <ulong>(utf8Buffer); added = this.stringsLength8.Add(uLongValue); } else { UInt128 uInt128Value = UInt128.FromByteArray(utf8Buffer); added = this.stringsLength16.Add(uInt128Value); } } else { // Else the string is too large and we will just store the hash. UInt128 uint128Value = DistinctHash.GetHash(CosmosString.Create(value)); added = this.stringsLength16Plus.Add(uint128Value); } return(added); }
/// <summary> /// Adds an object value to the distinct map. /// </summary> /// <param name="cosmosObject">The object to add.</param> /// <returns>Whether or not the value was successfully added.</returns> private bool AddObjectValue(CosmosObject cosmosObject) { UInt128 hash = DistinctHash.GetHash(cosmosObject); return(this.objects.Add(hash)); }
/// <summary> /// Adds an array value to the distinct map. /// </summary> /// <param name="array">The array to add.</param> /// <returns>Whether or not the value was successfully added.</returns> private bool AddArrayValue(CosmosArray array) { UInt128 hash = DistinctHash.GetHash(array); return(this.arrays.Add(hash)); }
public static UInt128 GetHash(CosmosElement cosmosElement) { return(DistinctHash.GetHash(cosmosElement, DistinctHash.RootHashSeed)); }
/// <summary> /// Gets the hash given a value and a seed. /// </summary> /// <param name="value">The value to hash.</param> /// <param name="seed">The seed.</param> /// <returns>The hash.</returns> public static UInt128 GetHash(UInt128 value, UInt128 seed) { return(DistinctHash.GetHash(UInt128.ToByteArray(value), seed)); }
/// <summary> /// Gets the hash of a boolean JSON value. /// </summary> /// <param name="boolean">The boolean to hash.</param> /// <param name="seed">The seed.</param> /// <returns>The hash of a boolean JSON value.</returns> private static UInt128 GetBooleanHash(bool boolean, UInt128 seed) { return(DistinctHash.GetHash(boolean ? DistinctHash.TrueHashSeed : DistinctHash.FalseHashSeed, seed)); }
/// <summary> /// Gets the hash of a null JSON value. /// </summary> /// <param name="seed">The seed to use.</param> /// <returns>The hash of a null JSON value given a seed.</returns> private static UInt128 GetNullHash(UInt128 seed) { return(DistinctHash.GetHash(DistinctHash.NullHashSeed, seed)); }