public void Value_HashCheck() { byte partitionBits = 1; int id = 1253432; // Get Value, Hash, and Partition Index Value idValue = Value.Create(id); int hash = idValue.GetHashCode(); int partition = PartitionMask.IndexOfHash(hash, partitionBits); // Verify Matches reports consistently with IndexOfHash PartitionMask mask = PartitionMask.BuildSet(partitionBits)[partition]; string maskName = mask.ToString(); Assert.IsTrue(mask.Matches(hash)); // Verify Value.Assign hash is consistent Value n = Value.Create(null); n.Assign(id); int hashViaAssign = n.GetHashCode(); Assert.AreEqual(hash, hashViaAssign); // Get Hash of unwrapped value [debuggability] int wrongHash = id.GetHashCode(); int wrongPartition = PartitionMask.IndexOfHash(wrongHash, partitionBits); }
/// <summary> /// Computes the target partition for each item in the ReadOnlyDataBlock /// </summary> /// <param name="table">Table where values will be added</param> /// <param name="values">DataBlock containing values to be added to the table</param> /// <param name="idColumnIndex">Index of the id column</param> /// <param name="partitionIds">[Out] array of the partition ids for each element</param> public void ComputePartition(Table table, DataBlock.ReadOnlyDataBlock values, int idColumnIndex, out int[] partitionIds, out TargetPartitionInfo[] partitionInfo) { int rowCount = values.RowCount; // TODO: [danny chen] it would be nice if I could get rid of this tunneling of GetColumn // from the ReadOnlyDataBlock (and avoid the special casing for non-projected blocks) // but I can't see a way to allow strongly types random access without a bunch of work // incurred on each access (fetch, cast the array). T[] idColumn = (T[])values.GetColumn(idColumnIndex); int[] localPartitionIds = new int[rowCount]; TargetPartitionInfo[] localPartitionInfo = new TargetPartitionInfo[table.PartitionCount]; var rangePartitioner = Partitioner.Create(0, rowCount); Parallel.ForEach(rangePartitioner, delegate(Tuple <int, int> range, ParallelLoopState unused) { ValueTypeReference <T> vtr = new ValueTypeReference <T>(); Value v = Value.Create(null); for (int i = range.Item1; i < range.Item2; ++i) { // Hash the ID for each item and compute the partition that the item belongs to vtr.Value = idColumn[i]; v.Assign(vtr); int idHash = v.GetHashCode(); int partitionId = PartitionMask.IndexOfHash(idHash, table._partitionBits); localPartitionIds[i] = partitionId; Interlocked.Increment(ref localPartitionInfo[partitionId].Count); } }); int nextStartIndex = 0; for (int i = 0; i < table.PartitionCount; ++i) { if (localPartitionInfo[i].Count == 0) { localPartitionInfo[i].StartIndex = -1; } else { localPartitionInfo[i].StartIndex = nextStartIndex; nextStartIndex += localPartitionInfo[i].Count; // NOTE: Count field is cleared here because it is // reused to track per-partition indexes when // building up the sort key data localPartitionInfo[i].Count = 0; } } partitionIds = localPartitionIds; partitionInfo = localPartitionInfo; }
public void PartitionMask_GetHashIndex() { const byte bitsPerByte = 8; byte bitCount = 2; Assert.AreEqual("00, 01, 10, 11", String.Join(", ", (IEnumerable <PartitionMask>)PartitionMask.BuildSet(bitCount))); for (byte b = 0; b < byte.MaxValue; ++b) { int value = b << (32 - bitsPerByte); // shift so the test value is in the highest bits int expectedIndex = b >> (8 - bitCount); Assert.AreEqual(expectedIndex, PartitionMask.IndexOfHash(value, bitCount)); } }
public void Value_HashDistribution() { int itemCount = 100 * 1000; byte bitCount = (byte)Math.Max(Math.Ceiling(Math.Log(itemCount, 2)) - 16, 0); int partitionCount = 1 << bitCount; int[] countPerPartition = new int[partitionCount]; for (int i = 0; i < itemCount; ++i) { Value v = Value.Create(String.Format("Table.{0}", i)); int partitionIndex = PartitionMask.IndexOfHash(v.GetHashCode(), bitCount); countPerPartition[partitionIndex]++; } int minCount = countPerPartition.Min(); int maxCount = countPerPartition.Max(); int difference = maxCount - minCount; Assert.IsTrue(difference / (float)minCount < 0.05, "Distribution of items was not very even."); }