Example #1
0
 public static IEnumerable <TMeasurable> TopN <TMeasurable, TMeasure>(this IEnumerable <TMeasurable> items,
                                                                      TMeasurable reference, int k, SelectParallelOptions options = null)
     where TMeasure : IComparable <TMeasure>
     where TMeasurable : IMeasurable <TMeasurable, TMeasure>
 {
     if (options == null)
     {
         options = new SelectParallelOptions();
     }
     MeasuredItem <TMeasurable, TMeasure>[] measuredItems;
     if (options.TaskCount == 1)
     {
         measuredItems =
             items.Select(item => new MeasuredItem <TMeasurable, TMeasure>(item, reference.Measure(item), true))
             .ToArray();
     }
     else
     {
         var itemsArray = items as TMeasurable[] ?? items.ToArray();
         measuredItems = new MeasuredItem <TMeasurable, TMeasure> [itemsArray.Length];
         // See http://www.codeproject.com/Articles/451628/Efficient-Map-Operations-for-Arrays-in-Csharp
         // Parallel.ForEach with range partitioner may be faster than other ways for > 10000 items.
         // It certainly was faster than using Task.Factory.StartNew and Task.WaitAll.
         Parallel.ForEach(Partitioner.Create(0, itemsArray.Length),
                          range =>
         {
             for (int i = range.Item1; i < range.Item2; ++i)
             {
                 var item = itemsArray[i];
                 // ReSharper disable once AccessToModifiedClosure
                 measuredItems[i] = new MeasuredItem <TMeasurable, TMeasure>(item, reference.Measure(item), true);
             }
         }
                          );
     }
     return(measuredItems
            .TopNParallel(k, null, options)
            .Select(measuredItem => measuredItem.Item)
            .Reverse()
            );
 }
Example #2
0
        /// <summary>
        /// Select either the Top N or Bottom N items in sorted order from the given collection, in parallel.
        ///
        /// This only performs a partial sort.
        /// </summary>
        /// <typeparam name="TElement">Type of element in the collection.</typeparam>
        /// <param name="items">Collection of items to sort and select.</param>
        /// <param name="topN">If true, find the Top N items in descending order, otherwise the Bottom N items in ascending order.</param>
        /// <param name="k">Number of items to select.</param>
        /// <param name="comparisonDelegate">If null, assume the items are IComparable and sort them according to their natural ordering.
        /// If not null, use this in the comparisons to establish the ordering.</param>
        /// <param name="options">If null, use the default values, otherwise use these options to control the parallelism.</param>
        /// <returns>The Top N or Bottom N items, as requested, sorted appropriately</returns>
        static IEnumerable <TElement> SelectParallel <TElement>(IEnumerable <TElement> items, bool topN, int k,
                                                                IComparer <TElement> comparisonDelegate = null, SelectParallelOptions options = null)
        {
            options = options ?? new SelectParallelOptions();

            // If we are only dedicating a single task to the operation, do it serially to save on Task overhead.
            if (options.TaskCount == 1)
            {
                return(SelectSerial(items, topN, k, comparisonDelegate));
            }

            var tasks        = new Task[options.TaskCount];
            var extremeItems = new List <TElement>();
            var enumerator   = items.GetEnumerator();

            for (var i = 0; i < options.TaskCount; i++)
            {
                var iTask = i;
                var batch = new TElement[options.BatchSize];
                tasks[iTask] = Task.Factory.StartNew(() =>
                {
                    var heap      = new BinaryHeap <TElement>(topN ? BinaryHeapType.MinHeap : BinaryHeapType.MaxHeap, k + 1, comparisonDelegate);
                    var moreItems = true;
                    var batchSize = options.BatchSize;
                    while (moreItems)
                    {
                        var iReadCount = 0;
                        lock (enumerator)
                        {
                            for (var iBatch = 0; iBatch < batchSize && moreItems; iBatch++)
                            {
                                if (enumerator.MoveNext())
                                {
                                    batch[iReadCount++] = enumerator.Current;
                                }
                                else
                                {
                                    moreItems = false;
                                }
                            }
                        }
                        for (var iBatch = 0; iBatch < iReadCount; iBatch++)
                        {
                            var item = batch[iBatch];
                            if (k + 1 > heap.Count)
                            {
                                heap.Add(item);
                            }
                            else if (heap.IsLessExtreme(item))
                            {
                                heap.Remove();
                                heap.Add(item);
                            }
                        }
                    }
                    lock (extremeItems)
                    {
                        extremeItems.AddRange(heap.RemoveAll());
                    }
                });
            }
            Task.WaitAll(tasks);
            //  At this point we have as many as k*TaskCount items left. Take the k most extreme.
            return(SelectSerial(extremeItems, topN, k, comparisonDelegate));
        }
Example #3
0
 /// <summary>
 /// Find the K largest items using a serial procedure, sorted from smallest to largest.
 /// </summary>
 /// <param name="items">Items to sort.</param>
 /// <param name="k">Number of items desired.</param>
 /// <param name="comparisonDelegate">If omitted, assume the elements are IComparable and use their default collation order.
 /// Otherwise use this method to compare the items.</param>
 /// <param name="options">If null, use the default values, otherwise use these options to control the parallelism.</param>
 /// <returns>The largest items or last in the collation order.</returns>
 public static IEnumerable <TElement> TopNParallel <TElement>(this IEnumerable <TElement> items, int k,
                                                              IComparer <TElement> comparisonDelegate = null, SelectParallelOptions options = null)
 {
     return(SelectParallel(items, true, k, comparisonDelegate, options));
 }