private static void RadixSortMsdLongParInner(long[] a, int first, int length, int shiftRightAmount, Action <long[], int, int> baseCaseInPlaceSort) { int last = first + length - 1; const long bitMask = PowerOfTwoRadix - 1; const byte halfOfPowerOfTwoRadix = PowerOfTwoRadix / 2; //Stopwatch stopwatch = new Stopwatch(); //long frequency = Stopwatch.Frequency; //Console.WriteLine(" Timer frequency in ticks per second = {0}", frequency); //long nanosecPerTick = (1000L * 1000L * 1000L) / frequency; //stopwatch.Restart(); var count = ParallelAlgorithm.HistogramOneByteComponentPar(a, first, last, shiftRightAmount); //stopwatch.Stop(); //double timeForCounting = stopwatch.ElapsedTicks * nanosecPerTick / 1000000000.0; //Console.WriteLine("Time for counting: {0}", timeForCounting); var startOfBin = new int[PowerOfTwoRadix + 1]; var endOfBin = new int[PowerOfTwoRadix]; int nextBin = 1; startOfBin[0] = endOfBin[0] = first; startOfBin[PowerOfTwoRadix] = -1; // sentinal for (int i = 1; i < PowerOfTwoRadix; i++) { startOfBin[i] = endOfBin[i] = startOfBin[i - 1] + count[i - 1]; } int bucketsUsed = 0; for (int i = 0; i < count.Length; i++) { if (count[i] > 0) { bucketsUsed++; } } //stopwatch.Restart(); if (bucketsUsed > 1) { if (shiftRightAmount == 56) // Most significant digit { for (int _current = first; _current <= last;) { byte digit; byte halfptr = halfOfPowerOfTwoRadix; while (endOfBin[digit = (byte)((byte)(a[_current] >> shiftRightAmount) ^ halfptr)] != _current) { long temp = a[_current]; // inlining Swap() increased performance about 5-10% a[_current] = a[endOfBin[digit]]; a[endOfBin[digit]++] = temp; } endOfBin[digit]++; while (endOfBin[nextBin - 1] == startOfBin[nextBin]) { nextBin++; // skip over empty and full bins, when the end of the current bin reaches the start of the next bin } _current = endOfBin[nextBin - 1]; } } else { for (int _current = first; _current <= last;) { byte digit; while (endOfBin[digit = (byte)(a[_current] >> shiftRightAmount)] != _current) { long temp = a[_current]; // inlining Swap() increased performance about 5-10% a[_current] = a[endOfBin[digit]]; a[endOfBin[digit]++] = temp; } endOfBin[digit]++; while (endOfBin[nextBin - 1] == startOfBin[nextBin]) { nextBin++; // skip over empty and full bins, when the end of the current bin reaches the start of the next bin } _current = endOfBin[nextBin - 1]; } } //stopwatch.Stop(); //double timeForPermuting = stopwatch.ElapsedTicks * nanosecPerTick / 1000000000.0; //Console.WriteLine("Size = {0}, Time for counting: {1}, Time for permuting: {2}, Ratio = {3:0.00}", length, timeForCounting, timeForPermuting, timeForCounting/timeForPermuting); if (shiftRightAmount > 0) // end recursion when all the bits have been processes { shiftRightAmount = shiftRightAmount >= Log2ofPowerOfTwoRadix ? shiftRightAmount -= Log2ofPowerOfTwoRadix : 0; for (int i = 0; i < PowerOfTwoRadix; i++) { int numElements = endOfBin[i] - startOfBin[i]; if (numElements >= SortRadixMsdLongThreshold) { RadixSortMsdLongParInner(a, startOfBin[i], numElements, shiftRightAmount, baseCaseInPlaceSort); } else if (numElements >= 2) { //InsertionSort(a, startOfBin[i], numElements); baseCaseInPlaceSort(a, startOfBin[i], numElements); } } } } else { if (shiftRightAmount > 0) // end recursion when all the bits have been processes { shiftRightAmount = shiftRightAmount >= Log2ofPowerOfTwoRadix ? shiftRightAmount -= Log2ofPowerOfTwoRadix : 0; if (length >= SortRadixMsdLongThreshold) { RadixSortMsdLongParInner(a, first, length, shiftRightAmount, baseCaseInPlaceSort); } else if (length >= 2) { //InsertionSort(a, first, length); baseCaseInPlaceSort(a, first, length); } } } }