예제 #1
0
        /// <summary>
        /// Select either the Top N or Bottom N items in sorted order from the given collection, serially (not in parallel).
        ///
        /// This only performs a partial sort.
        /// </summary>
        /// <typeparam name="TElement">Type of element in the collection.</typeparam>
        /// <param name="items">Collection of items to sort and select.</param>
        /// <param name="topN">If true, find the Top N items in descending order, otherwise the Bottom N items in ascending order.</param>
        /// <param name="k">Number of items to select.</param>
        /// <param name="comparisonDelegate">If null, assume the items are IComparable and sort them according to their natural ordering.
        /// If not null, use this in the comparisons to establish the ordering.</param>
        /// <returns>The Top N or Bottom N items, as requested, sorted appropriately</returns>
        public static IEnumerable <TElement> SelectSerial <TElement>(this IEnumerable <TElement> items, bool topN, int k,
                                                                     IComparer <TElement> comparisonDelegate = null)
        {
            // Seems counterintuitive, but when looking for the Top N we use a Min Heap, and when
            // looking for the Bottom N we use a Max Heap.
            var heap = new BinaryHeap <TElement>(topN ? BinaryHeapType.MinHeap : BinaryHeapType.MaxHeap, k, comparisonDelegate);

            foreach (var item in items)
            {
                heap.AddRemove(item);
            }
            var resultsCount = heap.Count;

            for (var i = 0; i < resultsCount; i++)
            {
                yield return(heap.Remove());
            }
        }
예제 #2
0
        /// <summary>
        /// Select either the Top N or Bottom N items in sorted order from the given collection, in parallel.
        ///
        /// This only performs a partial sort.
        /// </summary>
        /// <typeparam name="TElement">Type of element in the collection.</typeparam>
        /// <param name="items">Collection of items to sort and select.</param>
        /// <param name="topN">If true, find the Top N items in descending order, otherwise the Bottom N items in ascending order.</param>
        /// <param name="k">Number of items to select.</param>
        /// <param name="comparisonDelegate">If null, assume the items are IComparable and sort them according to their natural ordering.
        /// If not null, use this in the comparisons to establish the ordering.</param>
        /// <param name="options">If null, use the default values, otherwise use these options to control the parallelism.</param>
        /// <returns>The Top N or Bottom N items, as requested, sorted appropriately</returns>
        static IEnumerable <TElement> SelectParallel <TElement>(IEnumerable <TElement> items, bool topN, int k,
                                                                IComparer <TElement> comparisonDelegate = null, SelectParallelOptions options = null)
        {
            options = options ?? new SelectParallelOptions();

            // If we are only dedicating a single task to the operation, do it serially to save on Task overhead.
            if (options.TaskCount == 1)
            {
                return(SelectSerial(items, topN, k, comparisonDelegate));
            }

            var tasks        = new Task[options.TaskCount];
            var extremeItems = new List <TElement>();
            var enumerator   = items.GetEnumerator();

            for (var i = 0; i < options.TaskCount; i++)
            {
                var iTask = i;
                var batch = new TElement[options.BatchSize];
                tasks[iTask] = Task.Factory.StartNew(() =>
                {
                    var heap      = new BinaryHeap <TElement>(topN ? BinaryHeapType.MinHeap : BinaryHeapType.MaxHeap, k + 1, comparisonDelegate);
                    var moreItems = true;
                    var batchSize = options.BatchSize;
                    while (moreItems)
                    {
                        var iReadCount = 0;
                        lock (enumerator)
                        {
                            for (var iBatch = 0; iBatch < batchSize && moreItems; iBatch++)
                            {
                                if (enumerator.MoveNext())
                                {
                                    batch[iReadCount++] = enumerator.Current;
                                }
                                else
                                {
                                    moreItems = false;
                                }
                            }
                        }
                        for (var iBatch = 0; iBatch < iReadCount; iBatch++)
                        {
                            var item = batch[iBatch];
                            if (k + 1 > heap.Count)
                            {
                                heap.Add(item);
                            }
                            else if (heap.IsLessExtreme(item))
                            {
                                heap.Remove();
                                heap.Add(item);
                            }
                        }
                    }
                    lock (extremeItems)
                    {
                        extremeItems.AddRange(heap.RemoveAll());
                    }
                });
            }
            Task.WaitAll(tasks);
            //  At this point we have as many as k*TaskCount items left. Take the k most extreme.
            return(SelectSerial(extremeItems, topN, k, comparisonDelegate));
        }