Beispiel #1
0
        public void Value_HashCheck()
        {
            byte partitionBits = 1;

            int id = 1253432;

            // Get Value, Hash, and Partition Index
            Value idValue   = Value.Create(id);
            int   hash      = idValue.GetHashCode();
            int   partition = PartitionMask.IndexOfHash(hash, partitionBits);

            // Verify Matches reports consistently with IndexOfHash
            PartitionMask mask     = PartitionMask.BuildSet(partitionBits)[partition];
            string        maskName = mask.ToString();

            Assert.IsTrue(mask.Matches(hash));

            // Verify Value.Assign hash is consistent
            Value n = Value.Create(null);

            n.Assign(id);
            int hashViaAssign = n.GetHashCode();

            Assert.AreEqual(hash, hashViaAssign);

            // Get Hash of unwrapped value [debuggability]
            int wrongHash      = id.GetHashCode();
            int wrongPartition = PartitionMask.IndexOfHash(wrongHash, partitionBits);
        }
Beispiel #2
0
            /// <summary>
            /// Computes the target partition for each item in the ReadOnlyDataBlock
            /// </summary>
            /// <param name="table">Table where values will be added</param>
            /// <param name="values">DataBlock containing values to be added to the table</param>
            /// <param name="idColumnIndex">Index of the id column</param>
            /// <param name="partitionIds">[Out] array of the partition ids for each element</param>
            public void ComputePartition(Table table, DataBlock.ReadOnlyDataBlock values, int idColumnIndex, out int[] partitionIds, out TargetPartitionInfo[] partitionInfo)
            {
                int rowCount = values.RowCount;

                // TODO: [danny chen] it would be nice if I could get rid of this tunneling of GetColumn
                // from the ReadOnlyDataBlock (and avoid the special casing for non-projected blocks)
                // but I can't see a way to allow strongly types random access without a bunch of work
                // incurred on each access (fetch, cast the array).
                T[] idColumn = (T[])values.GetColumn(idColumnIndex);

                int[] localPartitionIds = new int[rowCount];
                TargetPartitionInfo[] localPartitionInfo = new TargetPartitionInfo[table.PartitionCount];

                var rangePartitioner = Partitioner.Create(0, rowCount);

                Parallel.ForEach(rangePartitioner,
                                 delegate(Tuple <int, int> range, ParallelLoopState unused)
                {
                    ValueTypeReference <T> vtr = new ValueTypeReference <T>();
                    Value v = Value.Create(null);
                    for (int i = range.Item1; i < range.Item2; ++i)
                    {
                        // Hash the ID for each item and compute the partition that the item belongs to
                        vtr.Value = idColumn[i];
                        v.Assign(vtr);
                        int idHash      = v.GetHashCode();
                        int partitionId = PartitionMask.IndexOfHash(idHash, table._partitionBits);

                        localPartitionIds[i] = partitionId;
                        Interlocked.Increment(ref localPartitionInfo[partitionId].Count);
                    }
                });

                int nextStartIndex = 0;

                for (int i = 0; i < table.PartitionCount; ++i)
                {
                    if (localPartitionInfo[i].Count == 0)
                    {
                        localPartitionInfo[i].StartIndex = -1;
                    }
                    else
                    {
                        localPartitionInfo[i].StartIndex = nextStartIndex;
                        nextStartIndex += localPartitionInfo[i].Count;

                        // NOTE: Count field is cleared here because it is
                        //   reused to track per-partition indexes when
                        //   building up the sort key data
                        localPartitionInfo[i].Count = 0;
                    }
                }

                partitionIds  = localPartitionIds;
                partitionInfo = localPartitionInfo;
            }
        public void PartitionMask_GetHashIndex()
        {
            const byte bitsPerByte = 8;
            byte       bitCount    = 2;

            Assert.AreEqual("00, 01, 10, 11", String.Join(", ", (IEnumerable <PartitionMask>)PartitionMask.BuildSet(bitCount)));

            for (byte b = 0; b < byte.MaxValue; ++b)
            {
                int value         = b << (32 - bitsPerByte); // shift so the test value is in the highest bits
                int expectedIndex = b >> (8 - bitCount);
                Assert.AreEqual(expectedIndex, PartitionMask.IndexOfHash(value, bitCount));
            }
        }
Beispiel #4
0
        public void Value_HashDistribution()
        {
            int  itemCount      = 100 * 1000;
            byte bitCount       = (byte)Math.Max(Math.Ceiling(Math.Log(itemCount, 2)) - 16, 0);
            int  partitionCount = 1 << bitCount;

            int[] countPerPartition = new int[partitionCount];

            for (int i = 0; i < itemCount; ++i)
            {
                Value v = Value.Create(String.Format("Table.{0}", i));
                int   partitionIndex = PartitionMask.IndexOfHash(v.GetHashCode(), bitCount);
                countPerPartition[partitionIndex]++;
            }

            int minCount   = countPerPartition.Min();
            int maxCount   = countPerPartition.Max();
            int difference = maxCount - minCount;

            Assert.IsTrue(difference / (float)minCount < 0.05, "Distribution of items was not very even.");
        }