//--------------------------------------------------------------------------------------- // MoveNext implements all the hash-join logic noted earlier. When it is called first, it // will execute the entire inner query tree, and build a hash-table lookup. This is the // Building phase. Then for the first call and all subsequent calls to MoveNext, we will // incrementally perform the Probing phase. We'll keep getting elements from the outer // data source, looking into the hash-table we built, and enumerating the full results. // // This routine supports both inner and outer (group) joins. An outer join will yield a // (possibly empty) list of matching elements from the inner instead of one-at-a-time, // as we do for inner joins. // internal override bool MoveNext(ref TOutput currentElement, ref TLeftKey currentKey) { Debug.Assert(_singleResultSelector != null || _groupResultSelector != null, "expected a compiled result selector"); Debug.Assert(_leftSource != null); Debug.Assert(_rightSource != null); // BUILD phase: If we haven't built the hash-table yet, create that first. Mutables mutables = _mutables; if (mutables == null) { mutables = _mutables = new Mutables(); #if DEBUG int hashLookupCount = 0; int hashKeyCollisions = 0; #endif mutables._rightHashLookup = new HashLookup <THashKey, Pair <TRightInput, ListChunk <TRightInput> > >(_keyComparer); Pair <TRightInput, THashKey> rightPair = default(Pair <TRightInput, THashKey>); int rightKeyUnused = default(int); int i = 0; while (_rightSource.MoveNext(ref rightPair, ref rightKeyUnused)) { if ((i++ & CancellationState.POLL_INTERVAL) == 0) { CancellationState.ThrowIfCanceled(_cancellationToken); } TRightInput rightElement = rightPair.First; THashKey rightHashKey = rightPair.Second; // We ignore null keys. if (rightHashKey != null) { #if DEBUG hashLookupCount++; #endif // See if we've already stored an element under the current key. If not, we // lazily allocate a pair to hold the elements mapping to the same key. const int INITIAL_CHUNK_SIZE = 2; Pair <TRightInput, ListChunk <TRightInput> > currentValue = default(Pair <TRightInput, ListChunk <TRightInput> >); if (!mutables._rightHashLookup.TryGetValue(rightHashKey, ref currentValue)) { currentValue = new Pair <TRightInput, ListChunk <TRightInput> >(rightElement, null); if (_groupResultSelector != null) { // For group joins, we also add the element to the list. This makes // it easier later to yield the list as-is. currentValue.Second = new ListChunk <TRightInput>(INITIAL_CHUNK_SIZE); currentValue.Second.Add(rightElement); } mutables._rightHashLookup.Add(rightHashKey, currentValue); } else { if (currentValue.Second == null) { // Lazily allocate a list to hold all but the 1st value. We need to // re-store this element because the pair is a value type. currentValue.Second = new ListChunk <TRightInput>(INITIAL_CHUNK_SIZE); mutables._rightHashLookup[rightHashKey] = currentValue; } currentValue.Second.Add(rightElement); #if DEBUG hashKeyCollisions++; #endif } } } #if DEBUG TraceHelpers.TraceInfo("ParallelJoinQueryOperator::MoveNext - built hash table [count = {0}, collisions = {1}]", hashLookupCount, hashKeyCollisions); #endif } // PROBE phase: So long as the source has a next element, return the match. ListChunk <TRightInput> currentRightChunk = mutables._currentRightMatches; if (currentRightChunk != null && mutables._currentRightMatchesIndex == currentRightChunk.Count) { currentRightChunk = mutables._currentRightMatches = currentRightChunk.Next; mutables._currentRightMatchesIndex = 0; } if (mutables._currentRightMatches == null) { // We have to look up the next list of matches in the hash-table. Pair <TLeftInput, THashKey> leftPair = default(Pair <TLeftInput, THashKey>); TLeftKey leftKey = default(TLeftKey); while (_leftSource.MoveNext(ref leftPair, ref leftKey)) { if ((mutables._outputLoopCount++ & CancellationState.POLL_INTERVAL) == 0) { CancellationState.ThrowIfCanceled(_cancellationToken); } // Find the match in the hash table. Pair <TRightInput, ListChunk <TRightInput> > matchValue = default(Pair <TRightInput, ListChunk <TRightInput> >); TLeftInput leftElement = leftPair.First; THashKey leftHashKey = leftPair.Second; // Ignore null keys. if (leftHashKey != null) { if (mutables._rightHashLookup.TryGetValue(leftHashKey, ref matchValue)) { // We found a new match. For inner joins, we remember the list in case // there are multiple value under this same key -- the next iteration will pick // them up. For outer joins, we will use the list momentarily. if (_singleResultSelector != null) { mutables._currentRightMatches = matchValue.Second; Debug.Assert(mutables._currentRightMatches == null || mutables._currentRightMatches.Count > 0, "we were expecting that the list would be either null or empty"); mutables._currentRightMatchesIndex = 0; // Yield the value. currentElement = _singleResultSelector(leftElement, matchValue.First); currentKey = leftKey; // If there is a list of matches, remember the left values for next time. if (matchValue.Second != null) { mutables._currentLeft = leftElement; mutables._currentLeftKey = leftKey; } return(true); } } } // For outer joins, we always yield a result. if (_groupResultSelector != null) { // Grab the matches, or create an empty list if there are none. IEnumerable <TRightInput> matches = matchValue.Second; if (matches == null) { matches = ParallelEnumerable.Empty <TRightInput>(); } // Generate the current value. currentElement = _groupResultSelector(leftElement, matches); currentKey = leftKey; return(true); } } // If we've reached the end of the data source, we're done. return(false); } // Produce the next element and increment our index within the matches. Debug.Assert(_singleResultSelector != null); Debug.Assert(mutables._currentRightMatches != null); Debug.Assert(0 <= mutables._currentRightMatchesIndex && mutables._currentRightMatchesIndex < mutables._currentRightMatches.Count); currentElement = _singleResultSelector( mutables._currentLeft, mutables._currentRightMatches._chunk[mutables._currentRightMatchesIndex]); currentKey = mutables._currentLeftKey; mutables._currentRightMatchesIndex++; return(true); }
//----------------------------------------------------------------------------------- // Internal helper method to dequeue a whole chunk. This version of the API is used // when the caller will wait for a new chunk to be enqueued. // // Arguments: // chunk - a byref for the dequeued chunk // waitEvent - a byref for the event used to signal blocked consumers // // Return Value: // True if a chunk was found, false otherwise. // // Notes: // If the return value is false, it doesn't always mean waitEvent will be non- // null. If the producer is done enqueueing, the return will be false and the // event will remain null. A caller must check for this condition. // // If the return value is false and an event is returned, there have been // side-effects on the channel. Namely, the flag telling producers a consumer // might be waiting will have been set. DequeueEndAfterWait _must_ be called // eventually regardless of whether the caller actually waits or not. // private bool TryDequeueChunk(ref T[] chunk, ref bool isDone) { isDone = false; // We will register our interest in waiting, and then return an event // that the caller can use to wait. while (IsChunkBufferEmpty) { // If the producer is done and we've drained the queue, we can bail right away. if (IsDone) { // We have to see if the buffer is empty AFTER we've seen that it's done. // Otherwise, we would possibly miss the elements enqueued before the // producer signaled that it's done. This is done with a volatile load so // that the read of empty doesn't move before the read of done. if (IsChunkBufferEmpty) { // Return isDone=true so callers know not to wait isDone = true; return(false); } } // We have to handle the case where a producer and consumer are racing to // wait simultaneously. For instance, a consumer might see an empty queue (by // reading IsChunkBufferEmpty just above), but meanwhile a producer might fill the queue // very quickly, suddenly seeing a full queue. This would lead to deadlock // if we aren't careful. Therefore we check the empty/full state AGAIN after // setting our flag to see if a real wait is warranted. #pragma warning disable 0420 Interlocked.Exchange(ref _consumerIsWaiting, 1); #pragma warning restore 0420 // (We have to prevent the reads that go into determining whether the buffer // is full from moving before the write to the producer-wait flag. Hence the CAS.) // Because we might be racing with a producer that is transitioning the // buffer from empty to non-full, we must check that the queue is empty once // more. Similarly, if the queue has been marked as done, we must not wait // because we just reset the event, possibly losing as signal. In both cases, // we would otherwise decide to wait and never be woken up (i.e. deadlock). if (IsChunkBufferEmpty && !IsDone) { // Note that the caller must eventually call DequeueEndAfterWait to set the // flags back to a state where no consumer is waiting, whether they choose // to wait or not. TraceHelpers.TraceInfo("AsynchronousChannel::DequeueChunk - consumer possibly waiting"); return(false); } else { // Reset the wait flags, we don't need to wait after all. We loop back around // and recheck that the queue isn't empty, done, etc. _consumerIsWaiting = 0; } } Debug.Assert(!IsChunkBufferEmpty, "single-consumer should never witness an empty queue here"); chunk = InternalDequeueChunk(); return(true); }
//----------------------------------------------------------------------------------- // The slow path used when a quick loop through the channels didn't come up // with anything. We may need to block and/or mark channels as done. // private bool MoveNextSlowPath() { int doneChannels = 0; // Remember the first channel we are looking at. If we pass through all of the // channels without finding an element, we will go to sleep. int firstChannelIndex = _channelIndex; int currChannelIndex; while ((currChannelIndex = _channelIndex) != _channels.Length) { AsynchronousChannel <T> current = _channels[currChannelIndex]; bool isDone = _done[currChannelIndex]; if (!isDone && current.TryDequeue(ref _currentElement)) { // The channel has an item to be processed. We already remembered the current // element (Dequeue stores it as an out-parameter), so we just return true // after advancing to the next channel. _channelIndex = (currChannelIndex + 1) % _channels.Length; return(true); } else { // There isn't an element in the current channel. Check whether the channel // is done before possibly waiting for an element to arrive. if (!isDone && current.IsDone) { // We must check to ensure an item didn't get enqueued after originally // trying to dequeue above and reading the IsDone flag. If there are still // elements, the producer may have marked the channel as done but of course // we still need to continue processing them. if (!current.IsChunkBufferEmpty) { bool dequeueResult = current.TryDequeue(ref _currentElement); Debug.Assert(dequeueResult, "channel isn't empty, yet the dequeue failed, hmm"); return(true); } // Mark this channel as being truly done. We won't consider it any longer. _done[currChannelIndex] = true; isDone = true; current.Dispose(); } if (isDone) { Debug.Assert(_channels[currChannelIndex].IsDone, "thought this channel was done"); Debug.Assert(_channels[currChannelIndex].IsChunkBufferEmpty, "thought this channel was empty"); // Increment the count of done channels that we've seen. If this reaches the // total number of channels, we know we're finally done. if (++doneChannels == _channels.Length) { // Remember that we are done by setting the index past the end. _channelIndex = currChannelIndex = _channels.Length; break; } } // Still no element. Advance to the next channel and continue searching. _channelIndex = currChannelIndex = (currChannelIndex + 1) % _channels.Length; // If the channels aren't done, and we've inspected all of the queues and still // haven't found anything, we will go ahead and wait on all the queues. if (currChannelIndex == firstChannelIndex) { // On our first pass through the queues, we didn't have any side-effects // that would let a producer know we are waiting. Now we go through and // accumulate a set of events to wait on. try { // Reset our done channels counter; we need to tally them again during the // second pass through. doneChannels = 0; for (int i = 0; i < _channels.Length; i++) { bool channelIsDone = false; if (!_done[i] && _channels[i].TryDequeue(ref _currentElement, ref channelIsDone)) { // The channel has received an item since the last time we checked. // Just return and let the consumer process the element returned. return(true); } else if (channelIsDone) { if (!_done[i]) { _done[i] = true; } if (++doneChannels == _channels.Length) { // No need to wait. All channels are done. Remember this by setting // the index past the end of the channel list. _channelIndex = currChannelIndex = _channels.Length; break; } } } // If all channels are done, we can break out of the loop entirely. if (currChannelIndex == _channels.Length) { break; } //This Wait() does not require cancellation support as it will wake up when all the producers into the //channel have finished. Hence, if all the producers wake up on cancellation, so will this. _consumerEvent.Wait(); _channelIndex = currChannelIndex = _consumerEvent.Value; _consumerEvent.Reset(); // // We have woken up, and the channel that caused this is contained in the // returned index. This could be due to one of two reasons. Either the channel's // producer has notified that it is done, in which case we just have to take it // out of our current wait-list and redo the wait, or a channel actually has an // item which we will go ahead and process. // // We just go back 'round the loop to accomplish this logic. Reset the channel // index and # of done channels. Go back to the beginning, starting with the channel // that caused us to wake up. // firstChannelIndex = currChannelIndex; doneChannels = 0; } finally { // We have to guarantee that any waits we said we would perform are undone. for (int i = 0; i < _channels.Length; i++) { // If we retrieved an event from a channel, we need to reset the wait. if (!_done[i]) { // We may be calling DoneWithDequeueWait() unnecessarily here, since some of these // are not necessarily set as waiting. Unnecessary calls to DoneWithDequeueWait() // must be accepted by the channel. _channels[i].DoneWithDequeueWait(); } } } } } } TraceHelpers.TraceInfo("[timing]: {0}: Completed the merge", DateTime.Now.Ticks); // If we got this far, it means we've exhausted our channels. Debug.Assert(currChannelIndex == _channels.Length); // If any tasks failed, propagate the failure now. We must do it here, because the merge // executor returns control back to the caller before the query has completed; contrast // this with synchronous enumeration where we can wait before returning. _taskGroupState.QueryEnd(false); return(false); }
//--------------------------------------------------------------------------------------- // This method just creates the individual partitions given a data source. // // Notes: // We check whether the data source is an IList<T> and, if so, we can partition // "in place" by calculating a set of indexes. Otherwise, we return an enumerator that // performs partitioning lazily. Depending on which case it is, the enumerator may // contain synchronization (i.e. the latter case), meaning callers may occasionally // block when enumerating it. // private void InitializePartitions(IEnumerable <T> source, int partitionCount, bool useStriping) { Debug.Assert(source != null); Debug.Assert(partitionCount > 0); // If this is a wrapper, grab the internal wrapped data source so we can uncover its real type. if (source is ParallelEnumerableWrapper <T> wrapper) { source = wrapper.WrappedEnumerable; Debug.Assert(source != null); } // Check whether we have an indexable data source. if (source is IList <T> sourceAsList) { QueryOperatorEnumerator <T, int>[] partitions = new QueryOperatorEnumerator <T, int> [partitionCount]; // We use this below to specialize enumerators when possible. T[]? sourceAsArray = source as T[]; // If range partitioning is used, chunk size will be unlimited, i.e. -1. int maxChunkSize = -1; if (useStriping) { maxChunkSize = Scheduling.GetDefaultChunkSize <T>(); // The minimum chunk size is 1. if (maxChunkSize < 1) { maxChunkSize = 1; } } // Calculate indexes and construct enumerators that walk a subset of the input. for (int i = 0; i < partitionCount; i++) { if (sourceAsArray != null) { // If the source is an array, we can use a fast path below to index using // 'ldelem' instructions rather than making interface method calls. if (useStriping) { partitions[i] = new ArrayIndexRangeEnumerator(sourceAsArray, partitionCount, i, maxChunkSize); } else { partitions[i] = new ArrayContiguousIndexRangeEnumerator(sourceAsArray, partitionCount, i); } TraceHelpers.TraceInfo("ContiguousRangePartitionExchangeStream::MakePartitions - (array) #{0} {1}", i, maxChunkSize); } else { // Create a general purpose list enumerator object. if (useStriping) { partitions[i] = new ListIndexRangeEnumerator(sourceAsList, partitionCount, i, maxChunkSize); } else { partitions[i] = new ListContiguousIndexRangeEnumerator(sourceAsList, partitionCount, i); } TraceHelpers.TraceInfo("ContiguousRangePartitionExchangeStream::MakePartitions - (list) #{0} {1})", i, maxChunkSize); } } Debug.Assert(partitions.Length == partitionCount); _partitions = partitions; } else { // We couldn't use an in-place partition. Shucks. Defer to the other overload which // accepts an enumerator as input instead. _partitions = MakePartitions(source.GetEnumerator(), partitionCount); } }
//--------------------------------------------------------------------------------------- // This method just creates the individual partitions given a data source. // // Notes: // We check whether the data source is an IList<T> and, if so, we can partition // "in place" by calculating a set of indexes. Otherwise, we return an enumerator that // performs partitioning lazily. Depending on which case it is, the enumerator may // contain synchronization (i.e. the latter case), meaning callers may occassionally // block when enumerating it. // private void InitializePartitions(IEnumerable <T> source, int partitionCount, bool useStriping) { Contract.Assert(source != null); Contract.Assert(partitionCount > 0); // If this is a wrapper, grab the internal wrapped data source so we can uncover its real type. ParallelEnumerableWrapper <T> wrapper = source as ParallelEnumerableWrapper <T>; if (wrapper != null) { source = wrapper.WrappedEnumerable; Contract.Assert(source != null); } // Check whether we have an indexable data source. IList <T> sourceAsList = source as IList <T>; if (sourceAsList != null) { QueryOperatorEnumerator <T, int>[] partitions = new QueryOperatorEnumerator <T, int> [partitionCount]; int listCount = sourceAsList.Count; // We use this below to specialize enumerators when possible. T[] sourceAsArray = source as T[]; // If range partitioning is used, chunk size will be unlimited, i.e. -1. int maxChunkSize = -1; if (useStriping) { maxChunkSize = Scheduling.GetDefaultChunkSize <T>(); // @TODO: @PERF: @BUG#517: though we choose a chunk size that is a multiple of a cache line, // the CLR makes no guarantees about alignment, so in truth the actual elements // could overlap cache lines. We could try to align, but sadly the CLR's GC // gives no guarantees that things will remain aligned. We should look into // this in the future, possibly talking to the CLR team about a new feature. // The minimum chunk size is 1. if (maxChunkSize < 1) { maxChunkSize = 1; } } // Calculate indexes and construct enumerators that walk a subset of the input. for (int i = 0; i < partitionCount; i++) { if (sourceAsArray != null) { // If the source is an array, we can use a fast path below to index using // 'ldelem' instructions rather than making interface method calls. if (useStriping) { partitions[i] = new ArrayIndexRangeEnumerator(sourceAsArray, partitionCount, i, maxChunkSize); } else { partitions[i] = new ArrayContiguousIndexRangeEnumerator(sourceAsArray, partitionCount, i); } TraceHelpers.TraceInfo("ContigousRangePartitionExchangeStream::MakePartitions - (array) #{0} {1}", i, maxChunkSize); } else { // Create a general purpose list enumerator object. if (useStriping) { partitions[i] = new ListIndexRangeEnumerator(sourceAsList, partitionCount, i, maxChunkSize); } else { partitions[i] = new ListContiguousIndexRangeEnumerator(sourceAsList, partitionCount, i); } TraceHelpers.TraceInfo("ContigousRangePartitionExchangeStream::MakePartitions - (list) #{0} {1})", i, maxChunkSize); } } Contract.Assert(partitions.Length == partitionCount); m_partitions = partitions; } else { // We couldn't use an in-place partition. Shucks. Defer to the other overload which // accepts an enumerator as input instead. m_partitions = MakePartitions(source.GetEnumerator(), partitionCount); } }