public void PartitionMask_Basic() { PartitionMask mask = PartitionMask.All; // Verify 'All' is count zero, value zero Assert.AreEqual(0, mask.BitCount); Assert.AreEqual(0, mask.Value); // Everything matches a zero-width mask mask.BitCount = 0; Assert.IsTrue(mask.Matches(0)); Assert.IsTrue(mask.Matches(~0)); Assert.AreEqual("", mask.ToString()); // 1* should match values starting with 0x8 only mask.BitCount = 1; mask.Value = (0x1 << 31); Assert.IsFalse(mask.Matches(0)); Assert.IsTrue(mask.Matches(~0)); Assert.IsTrue(mask.Matches(unchecked ((int)0x80000000))); Assert.IsFalse(mask.Matches(0x7FFFFFFF)); Assert.AreEqual("1", mask.ToString()); // 0* should match values without the first bit set mask.BitCount = 1; mask.Value = 0; Assert.IsTrue(mask.Matches(0)); Assert.IsFalse(mask.Matches(~0)); Assert.IsFalse(mask.Matches(unchecked ((int)0x80000000))); Assert.IsTrue(mask.Matches(0x7FFFFFFF)); Assert.AreEqual("0", mask.ToString()); // 11* should match values with the first two bits mask.BitCount = 2; mask.Value = (0x3 << 30); Assert.IsFalse(mask.Matches(0)); Assert.IsTrue(mask.Matches(~0)); Assert.IsFalse(mask.Matches(unchecked ((int)0x80000000))); Assert.IsTrue(mask.Matches(unchecked ((int)0xC0000000))); Assert.IsTrue(mask.Matches(unchecked ((int)0xE0000000))); Assert.IsFalse(mask.Matches(0x7FFFFFFF)); Assert.AreEqual("11", mask.ToString()); // 1101* should match values with the first four bits mask.BitCount = 4; mask.Value = (0xD << 28); Assert.IsFalse(mask.Matches(0)); Assert.IsFalse(mask.Matches(~0)); Assert.IsFalse(mask.Matches(unchecked ((int)0xC0000000))); Assert.IsTrue(mask.Matches(unchecked ((int)0xD0000000))); Assert.IsTrue(mask.Matches(unchecked ((int)0xD7777777))); Assert.IsFalse(mask.Matches(unchecked ((int)0xE0000000))); Assert.IsFalse(mask.Matches(unchecked ((int)0xF0000000))); Assert.IsFalse(mask.Matches(unchecked ((int)0x7FFFFFFF))); Assert.AreEqual("1101", mask.ToString()); // Verify 'All' has not been altered by copy/use Assert.AreEqual(0, PartitionMask.All.BitCount); Assert.AreEqual(0, PartitionMask.All.Value); }
public void Value_HashCheck() { byte partitionBits = 1; int id = 1253432; // Get Value, Hash, and Partition Index Value idValue = Value.Create(id); int hash = idValue.GetHashCode(); int partition = PartitionMask.IndexOfHash(hash, partitionBits); // Verify Matches reports consistently with IndexOfHash PartitionMask mask = PartitionMask.BuildSet(partitionBits)[partition]; string maskName = mask.ToString(); Assert.IsTrue(mask.Matches(hash)); // Verify Value.Assign hash is consistent Value n = Value.Create(null); n.Assign(id); int hashViaAssign = n.GetHashCode(); Assert.AreEqual(hash, hashViaAssign); // Get Hash of unwrapped value [debuggability] int wrongHash = id.GetHashCode(); int wrongPartition = PartitionMask.IndexOfHash(wrongHash, partitionBits); }
/// <summary> /// Serialization-only constructor /// </summary> internal Partition() { _itemCount = 0; this.Columns = new SortedList <string, IUntypedColumn>(StringComparer.OrdinalIgnoreCase); this.DetailsByColumn = new SortedList <string, ColumnDetails>(StringComparer.OrdinalIgnoreCase); this.Mask = PartitionMask.All; }
/// <summary> /// Computes the target partition for each item in the ReadOnlyDataBlock /// </summary> /// <param name="table">Table where values will be added</param> /// <param name="values">DataBlock containing values to be added to the table</param> /// <param name="idColumnIndex">Index of the id column</param> /// <param name="partitionIds">[Out] array of the partition ids for each element</param> public void ComputePartition(Table table, DataBlock.ReadOnlyDataBlock values, int idColumnIndex, out int[] partitionIds, out TargetPartitionInfo[] partitionInfo) { int rowCount = values.RowCount; // TODO: [danny chen] it would be nice if I could get rid of this tunneling of GetColumn // from the ReadOnlyDataBlock (and avoid the special casing for non-projected blocks) // but I can't see a way to allow strongly types random access without a bunch of work // incurred on each access (fetch, cast the array). T[] idColumn = (T[])values.GetColumn(idColumnIndex); int[] localPartitionIds = new int[rowCount]; TargetPartitionInfo[] localPartitionInfo = new TargetPartitionInfo[table.PartitionCount]; var rangePartitioner = Partitioner.Create(0, rowCount); Parallel.ForEach(rangePartitioner, delegate(Tuple <int, int> range, ParallelLoopState unused) { ValueTypeReference <T> vtr = new ValueTypeReference <T>(); Value v = Value.Create(null); for (int i = range.Item1; i < range.Item2; ++i) { // Hash the ID for each item and compute the partition that the item belongs to vtr.Value = idColumn[i]; v.Assign(vtr); int idHash = v.GetHashCode(); int partitionId = PartitionMask.IndexOfHash(idHash, table._partitionBits); localPartitionIds[i] = partitionId; Interlocked.Increment(ref localPartitionInfo[partitionId].Count); } }); int nextStartIndex = 0; for (int i = 0; i < table.PartitionCount; ++i) { if (localPartitionInfo[i].Count == 0) { localPartitionInfo[i].StartIndex = -1; } else { localPartitionInfo[i].StartIndex = nextStartIndex; nextStartIndex += localPartitionInfo[i].Count; // NOTE: Count field is cleared here because it is // reused to track per-partition indexes when // building up the sort key data localPartitionInfo[i].Count = 0; } } partitionIds = localPartitionIds; partitionInfo = localPartitionInfo; }
/// <summary> /// Calculate number of words to fit complete instruction bytecode. /// </summary> /// <returns>Number of words in instruction bytecode.</returns> public override uint GetWordCount() { uint wordCount = 0; wordCount += IdResultType.GetWordCount(); wordCount += IdResult.GetWordCount(); wordCount += SrcCoord.GetWordCount(); wordCount += PartitionMask.GetWordCount(); wordCount += SADAdjustment.GetWordCount(); return(wordCount); }
public void PartitionMask_GetHashIndex() { const byte bitsPerByte = 8; byte bitCount = 2; Assert.AreEqual("00, 01, 10, 11", String.Join(", ", (IEnumerable <PartitionMask>)PartitionMask.BuildSet(bitCount))); for (byte b = 0; b < byte.MaxValue; ++b) { int value = b << (32 - bitsPerByte); // shift so the test value is in the highest bits int expectedIndex = b >> (8 - bitCount); Assert.AreEqual(expectedIndex, PartitionMask.IndexOfHash(value, bitCount)); } }
/// <summary> /// Creates a new table large enough to hold the number of items specified /// </summary> /// <param name="tableName">name of the table</param> /// <param name="requiredItemCount">number of items the table is required to hold (it may be capable of holding more); this will dictate the partition count</param> public Table(string tableName, long requiredItemCount) : this() { this.Name = tableName; // Translate the item limit to a number of partition bits (64k items per partition) _partitionBits = (byte)Math.Max(Math.Ceiling(Math.Log(requiredItemCount, 2)) - 16, 0); // Create the partitions PartitionMask[] maskSet = PartitionMask.BuildSet(_partitionBits); _partitions.Clear(); for (int i = 0; i < maskSet.Length; ++i) { Partition p = new Partition(maskSet[i]); _partitions.Add(p); } }
/// <summary> /// Creates a new table large enough to hold the number of items specified /// </summary> /// <param name="tableName">name of the table</param> /// <param name="requiredItemCount">number of items the table is required to hold (it may be capable of holding more); this will dictate the partition count</param> public Table(string tableName, long requiredItemCount) : this() { this.Name = tableName; // Pad the min row count by 5% to account for imperfect distribution of items // based on the hashing algorithm. long paddedMinItemCount = (long)(requiredItemCount * 1.05); // Translate the item limit to a number of partition bits (64k items per partition) _partitionBits = (byte)Math.Max(Math.Ceiling(Math.Log(requiredItemCount, 2)) - 16, 0); // Create the partitions PartitionMask[] maskSet = PartitionMask.BuildSet(_partitionBits); _partitions.Clear(); for (int i = 0; i < maskSet.Length; ++i) { Partition p = new Partition(maskSet[i]); _partitions.Add(p); } }
public void Value_HashDistribution() { int itemCount = 100 * 1000; byte bitCount = (byte)Math.Max(Math.Ceiling(Math.Log(itemCount, 2)) - 16, 0); int partitionCount = 1 << bitCount; int[] countPerPartition = new int[partitionCount]; for (int i = 0; i < itemCount; ++i) { Value v = Value.Create(String.Format("Table.{0}", i)); int partitionIndex = PartitionMask.IndexOfHash(v.GetHashCode(), bitCount); countPerPartition[partitionIndex]++; } int minCount = countPerPartition.Min(); int maxCount = countPerPartition.Max(); int difference = maxCount - minCount; Assert.IsTrue(difference / (float)minCount < 0.05, "Distribution of items was not very even."); }
public void PartitionMask_BuildSet() { Assert.AreEqual("", String.Join(", ", (IEnumerable <PartitionMask>)PartitionMask.BuildSet(0))); Assert.AreEqual("0, 1", String.Join(", ", (IEnumerable <PartitionMask>)PartitionMask.BuildSet(1))); Assert.AreEqual("00, 01, 10, 11", String.Join(", ", (IEnumerable <PartitionMask>)PartitionMask.BuildSet(2))); }
internal Partition(PartitionMask mask) : this() { this.Mask = mask; }
/// <summary> /// Write instruction operands into bytecode stream. /// </summary> /// <param name="writer">Bytecode writer.</param> public override void WriteOperands(WordWriter writer) { SrcCoord.Write(writer); PartitionMask.Write(writer); SADAdjustment.Write(writer); }