public static IEnumerable <TMeasurable> TopN <TMeasurable, TMeasure>(this IEnumerable <TMeasurable> items, TMeasurable reference, int k, SelectParallelOptions options = null) where TMeasure : IComparable <TMeasure> where TMeasurable : IMeasurable <TMeasurable, TMeasure> { if (options == null) { options = new SelectParallelOptions(); } MeasuredItem <TMeasurable, TMeasure>[] measuredItems; if (options.TaskCount == 1) { measuredItems = items.Select(item => new MeasuredItem <TMeasurable, TMeasure>(item, reference.Measure(item), true)) .ToArray(); } else { var itemsArray = items as TMeasurable[] ?? items.ToArray(); measuredItems = new MeasuredItem <TMeasurable, TMeasure> [itemsArray.Length]; // See http://www.codeproject.com/Articles/451628/Efficient-Map-Operations-for-Arrays-in-Csharp // Parallel.ForEach with range partitioner may be faster than other ways for > 10000 items. // It certainly was faster than using Task.Factory.StartNew and Task.WaitAll. Parallel.ForEach(Partitioner.Create(0, itemsArray.Length), range => { for (int i = range.Item1; i < range.Item2; ++i) { var item = itemsArray[i]; // ReSharper disable once AccessToModifiedClosure measuredItems[i] = new MeasuredItem <TMeasurable, TMeasure>(item, reference.Measure(item), true); } } ); } return(measuredItems .TopNParallel(k, null, options) .Select(measuredItem => measuredItem.Item) .Reverse() ); }
/// <summary> /// Select either the Top N or Bottom N items in sorted order from the given collection, in parallel. /// /// This only performs a partial sort. /// </summary> /// <typeparam name="TElement">Type of element in the collection.</typeparam> /// <param name="items">Collection of items to sort and select.</param> /// <param name="topN">If true, find the Top N items in descending order, otherwise the Bottom N items in ascending order.</param> /// <param name="k">Number of items to select.</param> /// <param name="comparisonDelegate">If null, assume the items are IComparable and sort them according to their natural ordering. /// If not null, use this in the comparisons to establish the ordering.</param> /// <param name="options">If null, use the default values, otherwise use these options to control the parallelism.</param> /// <returns>The Top N or Bottom N items, as requested, sorted appropriately</returns> static IEnumerable <TElement> SelectParallel <TElement>(IEnumerable <TElement> items, bool topN, int k, IComparer <TElement> comparisonDelegate = null, SelectParallelOptions options = null) { options = options ?? new SelectParallelOptions(); // If we are only dedicating a single task to the operation, do it serially to save on Task overhead. if (options.TaskCount == 1) { return(SelectSerial(items, topN, k, comparisonDelegate)); } var tasks = new Task[options.TaskCount]; var extremeItems = new List <TElement>(); var enumerator = items.GetEnumerator(); for (var i = 0; i < options.TaskCount; i++) { var iTask = i; var batch = new TElement[options.BatchSize]; tasks[iTask] = Task.Factory.StartNew(() => { var heap = new BinaryHeap <TElement>(topN ? BinaryHeapType.MinHeap : BinaryHeapType.MaxHeap, k + 1, comparisonDelegate); var moreItems = true; var batchSize = options.BatchSize; while (moreItems) { var iReadCount = 0; lock (enumerator) { for (var iBatch = 0; iBatch < batchSize && moreItems; iBatch++) { if (enumerator.MoveNext()) { batch[iReadCount++] = enumerator.Current; } else { moreItems = false; } } } for (var iBatch = 0; iBatch < iReadCount; iBatch++) { var item = batch[iBatch]; if (k + 1 > heap.Count) { heap.Add(item); } else if (heap.IsLessExtreme(item)) { heap.Remove(); heap.Add(item); } } } lock (extremeItems) { extremeItems.AddRange(heap.RemoveAll()); } }); } Task.WaitAll(tasks); // At this point we have as many as k*TaskCount items left. Take the k most extreme. return(SelectSerial(extremeItems, topN, k, comparisonDelegate)); }
/// <summary> /// Find the K largest items using a serial procedure, sorted from smallest to largest. /// </summary> /// <param name="items">Items to sort.</param> /// <param name="k">Number of items desired.</param> /// <param name="comparisonDelegate">If omitted, assume the elements are IComparable and use their default collation order. /// Otherwise use this method to compare the items.</param> /// <param name="options">If null, use the default values, otherwise use these options to control the parallelism.</param> /// <returns>The largest items or last in the collation order.</returns> public static IEnumerable <TElement> TopNParallel <TElement>(this IEnumerable <TElement> items, int k, IComparer <TElement> comparisonDelegate = null, SelectParallelOptions options = null) { return(SelectParallel(items, true, k, comparisonDelegate, options)); }