void CopyAndSortWithBitonic(uint cachedLength) { var start = _startPtr; var tmp = _tempStart; var byteCount = cachedLength * sizeof(int); var adjustedLength = cachedLength & ~0b111; Store(tmp + adjustedLength, Vector256.Create(int.MaxValue)); Unsafe.CopyBlockUnaligned(tmp, start, byteCount); BitonicSort <int> .Sort(tmp, (int)Math.Min(adjustedLength + 8, BitonicSort <int> .MaxBitonicSortSize)); Unsafe.CopyBlockUnaligned(start, tmp, byteCount); }
internal void HybridSort(int *left, int *right, long realignHint, int depthLimit) { Debug.Assert(left <= right); var length = (int)(right - left + 1); int *mid; switch (length) { case 0: case 1: return; case 2: SwapIfGreater(left, right); return; case 3: mid = right - 1; SwapIfGreater(left, mid); SwapIfGreater(left, right); SwapIfGreater(mid, right); return; } _depth++; // SMALL_SORT_THRESHOLD_ELEMENTS is guaranteed (and asserted) to be a multiple of 8 // So we can check if length is strictly smaller, knowing that we will round up to // SMALL_SORT_THRESHOLD_ELEMENTS exactly and no more // This is kind of critical given that we only limited # of implementation of // vectorized bitonic sort if (length < SMALL_SORT_THRESHOLD_ELEMENTS) { var nextLength = (length & 7) > 0 ? (length + V256_N) & ~7: length; Debug.Assert(nextLength <= BitonicSort <int> .MaxBitonicSortSize); var extraSpaceNeeded = nextLength - length; var fakeLeft = left - extraSpaceNeeded; if (fakeLeft >= _startPtr) { BitonicSort <int> .Sort(fakeLeft, nextLength); } else { InsertionSort(left, right); } _depth--; return; } // Detect a whole bunch of bad cases where partitioning // will not do well: // 1. Reverse sorted array // 2. High degree of repeated values (dutch flag problem, one value) if (depthLimit == 0) { HeapSort(new Span <int>(left, (int)(right - left + 1))); _depth--; return; } depthLimit--; // This is going to be a bit weird: // Pre/Post alignment calculations happen here: we prepare hints to the // partition function of how much to align and in which direction (pre/post). // The motivation to do these calculations here and the actual alignment inside the partitioning code is // that here, we can cache those calculations. // As we recurse to the left we can reuse the left cached calculation, And when we recurse // to the right we reuse the right calculation, so we can avoid re-calculating the same aligned addresses // throughout the recursion, at the cost of a minor code complexity // Since we branch on the magi values REALIGN_LEFT & REALIGN_RIGHT its safe to assume // the we are not torturing the branch predictor.' // We use a long as a "struct" to pass on alignment hints to the partitioning // By packing 2 32 bit elements into it, as the JIT seem to not do this. // In reality we need more like 2x 4bits for each side, but I don't think // there is a real difference' var preAlignedLeft = (int *)((ulong)left & ~ALIGN_MASK); var cannotPreAlignLeft = (preAlignedLeft - _startPtr) >> 63; var preAlignLeftOffset = (preAlignedLeft - left) + (V256_N & cannotPreAlignLeft); if ((realignHint & REALIGN_LEFT) != 0) { // Alignment flow: // * Calculate pre-alignment on the left // * See it would cause us an out-of bounds read // * Since we'd like to avoid that, we adjust for post-alignment // * There are no branches since we do branch->arithmetic realignHint &= unchecked ((long)0xFFFFFFFF00000000UL); realignHint |= preAlignLeftOffset; } var preAlignedRight = (int *)(((ulong)right - 1 & ~ALIGN_MASK) + ALIGN); var cannotPreAlignRight = (_endPtr - preAlignedRight) >> 63; var preAlignRightOffset = (preAlignedRight - right - (V256_N & cannotPreAlignRight)); if ((realignHint & REALIGN_RIGHT) != 0) { // right is pointing just PAST the last element we intend to partition (where we also store the pivot) // So we calculate alignment based on right - 1, and YES: I am casting to ulong before doing the -1, this // is intentional since the whole thing is either aligned to 32 bytes or not, so decrementing the POINTER value // by 1 is sufficient for the alignment, an the JIT sucks at this anyway realignHint &= 0xFFFFFFFF; realignHint |= preAlignRightOffset << 32; } Debug.Assert(((ulong)(left + (realignHint & 0xFFFFFFFF)) & ALIGN_MASK) == 0); Debug.Assert(((ulong)(right + (realignHint >> 32)) & ALIGN_MASK) == 0); // Compute median-of-three, of: // the first, mid and one before last elements mid = left + (right - left) / 2; SwapIfGreater(left, mid); SwapIfGreater(left, right - 1); SwapIfGreater(mid, right - 1); // Pivot is mid, place it in the right hand side Swap(mid, right); var sep = length < PARTITION_TMP_SIZE_IN_ELEMENTS? Partition1VectorInPlace(left, right, realignHint) : Partition8VectorsInPlace(left, right, realignHint); HybridSort(left, sep - 2, realignHint | REALIGN_RIGHT, depthLimit); HybridSort(sep, right, realignHint | REALIGN_LEFT, depthLimit); _depth--; }