/// <summary> /// Select either the Top N or Bottom N items in sorted order from the given collection, serially (not in parallel). /// /// This only performs a partial sort. /// </summary> /// <typeparam name="TElement">Type of element in the collection.</typeparam> /// <param name="items">Collection of items to sort and select.</param> /// <param name="topN">If true, find the Top N items in descending order, otherwise the Bottom N items in ascending order.</param> /// <param name="k">Number of items to select.</param> /// <param name="comparisonDelegate">If null, assume the items are IComparable and sort them according to their natural ordering. /// If not null, use this in the comparisons to establish the ordering.</param> /// <returns>The Top N or Bottom N items, as requested, sorted appropriately</returns> public static IEnumerable <TElement> SelectSerial <TElement>(this IEnumerable <TElement> items, bool topN, int k, IComparer <TElement> comparisonDelegate = null) { // Seems counterintuitive, but when looking for the Top N we use a Min Heap, and when // looking for the Bottom N we use a Max Heap. var heap = new BinaryHeap <TElement>(topN ? BinaryHeapType.MinHeap : BinaryHeapType.MaxHeap, k, comparisonDelegate); foreach (var item in items) { heap.AddRemove(item); } var resultsCount = heap.Count; for (var i = 0; i < resultsCount; i++) { yield return(heap.Remove()); } }
/// <summary> /// Select either the Top N or Bottom N items in sorted order from the given collection, in parallel. /// /// This only performs a partial sort. /// </summary> /// <typeparam name="TElement">Type of element in the collection.</typeparam> /// <param name="items">Collection of items to sort and select.</param> /// <param name="topN">If true, find the Top N items in descending order, otherwise the Bottom N items in ascending order.</param> /// <param name="k">Number of items to select.</param> /// <param name="comparisonDelegate">If null, assume the items are IComparable and sort them according to their natural ordering. /// If not null, use this in the comparisons to establish the ordering.</param> /// <param name="options">If null, use the default values, otherwise use these options to control the parallelism.</param> /// <returns>The Top N or Bottom N items, as requested, sorted appropriately</returns> static IEnumerable <TElement> SelectParallel <TElement>(IEnumerable <TElement> items, bool topN, int k, IComparer <TElement> comparisonDelegate = null, SelectParallelOptions options = null) { options = options ?? new SelectParallelOptions(); // If we are only dedicating a single task to the operation, do it serially to save on Task overhead. if (options.TaskCount == 1) { return(SelectSerial(items, topN, k, comparisonDelegate)); } var tasks = new Task[options.TaskCount]; var extremeItems = new List <TElement>(); var enumerator = items.GetEnumerator(); for (var i = 0; i < options.TaskCount; i++) { var iTask = i; var batch = new TElement[options.BatchSize]; tasks[iTask] = Task.Factory.StartNew(() => { var heap = new BinaryHeap <TElement>(topN ? BinaryHeapType.MinHeap : BinaryHeapType.MaxHeap, k + 1, comparisonDelegate); var moreItems = true; var batchSize = options.BatchSize; while (moreItems) { var iReadCount = 0; lock (enumerator) { for (var iBatch = 0; iBatch < batchSize && moreItems; iBatch++) { if (enumerator.MoveNext()) { batch[iReadCount++] = enumerator.Current; } else { moreItems = false; } } } for (var iBatch = 0; iBatch < iReadCount; iBatch++) { var item = batch[iBatch]; if (k + 1 > heap.Count) { heap.Add(item); } else if (heap.IsLessExtreme(item)) { heap.Remove(); heap.Add(item); } } } lock (extremeItems) { extremeItems.AddRange(heap.RemoveAll()); } }); } Task.WaitAll(tasks); // At this point we have as many as k*TaskCount items left. Take the k most extreme. return(SelectSerial(extremeItems, topN, k, comparisonDelegate)); }