Ejemplo n.º 1
0
            void CopyAndSortWithBitonic(uint cachedLength)
            {
                var start     = _startPtr;
                var tmp       = _tempStart;
                var byteCount = cachedLength * sizeof(int);

                var adjustedLength = cachedLength & ~0b111;

                Store(tmp + adjustedLength, Vector256.Create(int.MaxValue));
                Unsafe.CopyBlockUnaligned(tmp, start, byteCount);
                BitonicSort <int> .Sort(tmp, (int)Math.Min(adjustedLength + 8, BitonicSort <int> .MaxBitonicSortSize));

                Unsafe.CopyBlockUnaligned(start, tmp, byteCount);
            }
Ejemplo n.º 2
0
            internal void HybridSort(int *left, int *right, long realignHint, int depthLimit)
            {
                Debug.Assert(left <= right);

                var length = (int)(right - left + 1);

                int *mid;

                switch (length)
                {
                case 0:
                case 1:
                    return;

                case 2:
                    SwapIfGreater(left, right);
                    return;

                case 3:
                    mid = right - 1;
                    SwapIfGreater(left, mid);
                    SwapIfGreater(left, right);
                    SwapIfGreater(mid, right);
                    return;
                }

                _depth++;

                // SMALL_SORT_THRESHOLD_ELEMENTS is guaranteed (and asserted) to be a multiple of 8
                // So we can check if length is strictly smaller, knowing that we will round up to
                // SMALL_SORT_THRESHOLD_ELEMENTS exactly and no more
                // This is kind of critical given that we only limited # of implementation of
                // vectorized bitonic sort
                if (length < SMALL_SORT_THRESHOLD_ELEMENTS)
                {
                    var nextLength = (length & 7) > 0 ? (length + V256_N) & ~7: length;

                    Debug.Assert(nextLength <= BitonicSort <int> .MaxBitonicSortSize);
                    var extraSpaceNeeded = nextLength - length;
                    var fakeLeft         = left - extraSpaceNeeded;
                    if (fakeLeft >= _startPtr)
                    {
                        BitonicSort <int> .Sort(fakeLeft, nextLength);
                    }
                    else
                    {
                        InsertionSort(left, right);
                    }
                    _depth--;
                    return;
                }

                // Detect a whole bunch of bad cases where partitioning
                // will not do well:
                // 1. Reverse sorted array
                // 2. High degree of repeated values (dutch flag problem, one value)
                if (depthLimit == 0)
                {
                    HeapSort(new Span <int>(left, (int)(right - left + 1)));
                    _depth--;
                    return;
                }
                depthLimit--;

                // This is going to be a bit weird:
                // Pre/Post alignment calculations happen here: we prepare hints to the
                // partition function of how much to align and in which direction (pre/post).
                // The motivation to do these calculations here and the actual alignment inside the partitioning code is
                // that here, we can cache those calculations.
                // As we recurse to the left we can reuse the left cached calculation, And when we recurse
                // to the right we reuse the right calculation, so we can avoid re-calculating the same aligned addresses
                // throughout the recursion, at the cost of a minor code complexity
                // Since we branch on the magi values REALIGN_LEFT & REALIGN_RIGHT its safe to assume
                // the we are not torturing the branch predictor.'

                // We use a long as a "struct" to pass on alignment hints to the partitioning
                // By packing 2 32 bit elements into it, as the JIT seem to not do this.
                // In reality  we need more like 2x 4bits for each side, but I don't think
                // there is a real difference'

                var preAlignedLeft     = (int *)((ulong)left & ~ALIGN_MASK);
                var cannotPreAlignLeft = (preAlignedLeft - _startPtr) >> 63;
                var preAlignLeftOffset = (preAlignedLeft - left) + (V256_N & cannotPreAlignLeft);

                if ((realignHint & REALIGN_LEFT) != 0)
                {
                    // Alignment flow:
                    // * Calculate pre-alignment on the left
                    // * See it would cause us an out-of bounds read
                    // * Since we'd like to avoid that, we adjust for post-alignment
                    // * There are no branches since we do branch->arithmetic
                    realignHint &= unchecked ((long)0xFFFFFFFF00000000UL);
                    realignHint |= preAlignLeftOffset;
                }

                var preAlignedRight     = (int *)(((ulong)right - 1 & ~ALIGN_MASK) + ALIGN);
                var cannotPreAlignRight = (_endPtr - preAlignedRight) >> 63;
                var preAlignRightOffset = (preAlignedRight - right - (V256_N & cannotPreAlignRight));

                if ((realignHint & REALIGN_RIGHT) != 0)
                {
                    // right is pointing just PAST the last element we intend to partition (where we also store the pivot)
                    // So we calculate alignment based on right - 1, and YES: I am casting to ulong before doing the -1, this
                    // is intentional since the whole thing is either aligned to 32 bytes or not, so decrementing the POINTER value
                    // by 1 is sufficient for the alignment, an the JIT sucks at this anyway
                    realignHint &= 0xFFFFFFFF;
                    realignHint |= preAlignRightOffset << 32;
                }

                Debug.Assert(((ulong)(left + (realignHint & 0xFFFFFFFF)) & ALIGN_MASK) == 0);
                Debug.Assert(((ulong)(right + (realignHint >> 32)) & ALIGN_MASK) == 0);

                // Compute median-of-three, of:
                // the first, mid and one before last elements
                mid = left + (right - left) / 2;
                SwapIfGreater(left, mid);
                SwapIfGreater(left, right - 1);
                SwapIfGreater(mid, right - 1);

                // Pivot is mid, place it in the right hand side
                Swap(mid, right);

                var sep = length < PARTITION_TMP_SIZE_IN_ELEMENTS?
                          Partition1VectorInPlace(left, right, realignHint) :
                              Partition8VectorsInPlace(left, right, realignHint);

                HybridSort(left, sep - 2, realignHint | REALIGN_RIGHT, depthLimit);
                HybridSort(sep, right, realignHint | REALIGN_LEFT, depthLimit);
                _depth--;
            }