/// <summary> /// No more elements will be added, so we can sort the group now. /// </summary> internal void DoneAdding() { Debug.Assert(_values != null); int count = _values.Count; ListChunk <Pair <TOrderKey, TElement> >?curChunk = _values; while ((curChunk = curChunk.Next) != null) { count += curChunk.Count; } TElement[] values = new TElement[count]; TOrderKey[] orderKeys = new TOrderKey[count]; int idx = 0; foreach (Pair <TOrderKey, TElement> p in _values) { orderKeys[idx] = p.First; values[idx] = p.Second; idx++; } Array.Sort(orderKeys, values, _orderComparer); _sortedValues = values; #if DEBUG _values = null; // Any future calls to Add() or DoneAdding() will fail #endif }
/// <summary> /// Allocates a new root chunk of a particular size. /// </summary> internal ListChunk(int size) { Contract.Assert(size > 0); _chunk = new TInputOutput[size]; _chunkCount = 0; _tailChunk = this; }
private readonly IComparer <TOrderKey> _orderComparer; // Comparer for order keys /// <summary> /// Constructs a new grouping /// </summary> internal OrderedGroupByGrouping( TGroupKey groupKey, IComparer <TOrderKey> orderComparer) { _groupKey = groupKey; _values = new ListChunk <Pair <TOrderKey, TElement> >(INITIAL_CHUNK_SIZE); _orderComparer = orderComparer; }
public IEnumerator <TInputOutput> GetEnumerator() { for (ListChunk <TInputOutput> iteratorVariable0 = (ListChunk <TInputOutput>) this; iteratorVariable0 != null; iteratorVariable0 = iteratorVariable0.m_nextChunk) { for (int i = 0; i < iteratorVariable0.m_chunkCount; i++) { yield return(iteratorVariable0.m_chunk[i]); } } }
internal HashRepartitionEnumerator(QueryOperatorEnumerator <TInputOutput, TIgnoreKey> source, int partitionCount, int partitionIndex, Func <TInputOutput, THashKey> keySelector, HashRepartitionStream <TInputOutput, THashKey, int> repartitionStream, CountdownEvent barrier, ListChunk <Pair <TInputOutput, THashKey> >[,] valueExchangeMatrix, CancellationToken cancellationToken) { this.m_source = source; this.m_partitionCount = partitionCount; this.m_partitionIndex = partitionIndex; this.m_keySelector = keySelector; this.m_repartitionStream = repartitionStream; this.m_barrier = barrier; this.m_valueExchangeMatrix = valueExchangeMatrix; this.m_cancellationToken = cancellationToken; }
internal void Add(TInputOutput e) { ListChunk <TInputOutput> tailChunk = this.m_tailChunk; if (tailChunk.m_chunkCount == tailChunk.m_chunk.Length) { this.m_tailChunk = new ListChunk <TInputOutput>(tailChunk.m_chunkCount * 2); tailChunk = tailChunk.m_nextChunk = this.m_tailChunk; } tailChunk.m_chunk[tailChunk.m_chunkCount++] = e; }
internal UnorderedHashRepartitionStream(PartitionedStream <TInputOutput, TIgnoreKey> inputStream, Func <TInputOutput, THashKey> keySelector, IEqualityComparer <THashKey> keyComparer, IEqualityComparer <TInputOutput> elementComparer, CancellationToken cancellationToken) : base(inputStream.PartitionCount, Util.GetDefaultComparer <int>(), keyComparer, elementComparer) { base.m_partitions = (QueryOperatorEnumerator <Pair <TInputOutput, THashKey>, int>[]) new HashRepartitionEnumerator <TInputOutput, THashKey, TIgnoreKey> [inputStream.PartitionCount]; CountdownEvent barrier = new CountdownEvent(inputStream.PartitionCount); ListChunk <Pair <TInputOutput, THashKey> >[,] valueExchangeMatrix = new ListChunk <Pair <TInputOutput, THashKey> > [inputStream.PartitionCount, inputStream.PartitionCount]; for (int i = 0; i < inputStream.PartitionCount; i++) { base.m_partitions[i] = new HashRepartitionEnumerator <TInputOutput, THashKey, TIgnoreKey>(inputStream[i], inputStream.PartitionCount, i, keySelector, this, barrier, valueExchangeMatrix, cancellationToken); } }
/// <summary> /// Adds an element to this chunk. Only ever called on the root. /// </summary> /// <param name="e">The new element.</param> internal void Add(TInputOutput e) { ListChunk <TInputOutput> tail = _tailChunk; if (tail._chunkCount == tail._chunk.Length) { _tailChunk = new ListChunk <TInputOutput>(tail._chunkCount * 2); tail = (tail._nextChunk = _tailChunk); } tail._chunk[tail._chunkCount++] = e; }
/// <summary> /// Adds a value/ordering key pair to the list. /// </summary> /// <param name="value">value to add</param> /// <param name="orderKey">ordering key</param> /// <returns>if true, the internal memory has changed</returns> /// <remarks> /// As this is a value type, if the internal memory changes, /// then the changes need to be reflected (to a HashLookup, for example) /// as necessary /// </remarks> internal bool Add(TElement value, TOrderKey orderKey) { bool requiresMemoryChange = (_tail == null); if (requiresMemoryChange) { _tail = new ListChunk <Pair <TElement, TOrderKey> >(INITIAL_CHUNK_SIZE); } _tail.Add(CreatePair(value, orderKey)); return(requiresMemoryChange); }
/// <summary> /// Fetches an enumerator to walk the elements in all chunks rooted from this one. /// </summary> public IEnumerator <TInputOutput> GetEnumerator() { ListChunk <TInputOutput> curr = this; while (curr != null) { for (int i = 0; i < curr._chunkCount; i++) { yield return(curr._chunk[i]); } Contract.Assert(curr._chunkCount == curr._chunk.Length || curr._nextChunk == null); curr = curr._nextChunk; } }
private void EnumerateAndRedistributeElements() { Mutables <TInputOutput, THashKey, TOrderKey> mutables = this.m_mutables; ListChunk <Pair <TInputOutput, THashKey> >[] chunkArray = new ListChunk <Pair <TInputOutput, THashKey> > [this.m_partitionCount]; ListChunk <TOrderKey>[] chunkArray2 = new ListChunk <TOrderKey> [this.m_partitionCount]; TInputOutput currentElement = default(TInputOutput); TOrderKey currentKey = default(TOrderKey); int num = 0; while (this.m_source.MoveNext(ref currentElement, ref currentKey)) { int num2; if ((num++ & 0x3f) == 0) { CancellationState.ThrowIfCanceled(this.m_cancellationToken); } THashKey key = default(THashKey); if (this.m_keySelector != null) { key = this.m_keySelector(currentElement); num2 = this.m_repartitionStream.GetHashCode(key) % this.m_partitionCount; } else { num2 = this.m_repartitionStream.GetHashCode(currentElement) % this.m_partitionCount; } ListChunk <Pair <TInputOutput, THashKey> > chunk = chunkArray[num2]; ListChunk <TOrderKey> chunk2 = chunkArray2[num2]; if (chunk == null) { chunkArray[num2] = chunk = new ListChunk <Pair <TInputOutput, THashKey> >(0x80); chunkArray2[num2] = chunk2 = new ListChunk <TOrderKey>(0x80); } chunk.Add(new Pair <TInputOutput, THashKey>(currentElement, key)); chunk2.Add(currentKey); } for (int i = 0; i < this.m_partitionCount; i++) { this.m_valueExchangeMatrix[this.m_partitionIndex, i] = chunkArray[i]; this.m_keyExchangeMatrix[this.m_partitionIndex, i] = chunkArray2[i]; } this.m_barrier.Signal(); mutables.m_currentBufferIndex = this.m_partitionIndex; mutables.m_currentBuffer = chunkArray[this.m_partitionIndex]; mutables.m_currentKeyBuffer = chunkArray2[this.m_partitionIndex]; mutables.m_currentIndex = -1; }
public bool Add(THashKey hashKey, TElement element, TOrderKey orderKey) { bool hasCollision = true; ListChunk <TElement>?currentValue = default(ListChunk <TElement>); if (!_base.TryGetValue(hashKey, ref currentValue)) { const int INITIAL_CHUNK_SIZE = 2; currentValue = new ListChunk <TElement>(INITIAL_CHUNK_SIZE); _base.Add(hashKey, currentValue); hasCollision = false; } currentValue.Add(element); return(hasCollision); }
//--------------------------------------------------------------------------------------- // Creates a new repartitioning enumerator. // // Arguments: // source - the data stream from which to pull elements // useOrdinalOrderPreservation - whether order preservation is required // partitionCount - total number of partitions // partitionIndex - this operator's unique partition index // repartitionStream - the stream object to use for partition selection // barrier - a latch used to signal task completion // buffers - a set of buffers for inter-task communication // internal HashRepartitionEnumerator( QueryOperatorEnumerator <TInputOutput, TIgnoreKey> source, int partitionCount, int partitionIndex, Func <TInputOutput, THashKey> keySelector, HashRepartitionStream <TInputOutput, THashKey, int> repartitionStream, CountdownEvent barrier, ListChunk <Pair <TInputOutput, THashKey> >[,] valueExchangeMatrix, CancellationToken cancellationToken) { Contract.Assert(source != null); Contract.Assert(keySelector != null || typeof(THashKey) == typeof(NoKeyMemoizationRequired)); Contract.Assert(repartitionStream != null); Contract.Assert(barrier != null); Contract.Assert(valueExchangeMatrix != null); Contract.Assert(valueExchangeMatrix.GetLength(0) == partitionCount, "expected square matrix of buffers (NxN)"); Contract.Assert(valueExchangeMatrix.GetLength(1) == partitionCount, "expected square matrix of buffers (NxN)"); Contract.Assert(0 <= partitionIndex && partitionIndex < partitionCount); m_source = source; m_partitionCount = partitionCount; m_partitionIndex = partitionIndex; m_keySelector = keySelector; m_repartitionStream = repartitionStream; m_barrier = barrier; m_valueExchangeMatrix = valueExchangeMatrix; m_cancellationToken = cancellationToken; }
//----------------------------------------------------------------------------------- // Builds the hash lookup, transforming from TSource to TElement through whatever means is appropriate. // protected override HashLookup <Wrapper <TGroupKey>, ListChunk <TElement> > BuildHashLookup() { HashLookup <Wrapper <TGroupKey>, ListChunk <TElement> > hashlookup = new HashLookup <Wrapper <TGroupKey>, ListChunk <TElement> >(new WrapperEqualityComparer <TGroupKey>(_keyComparer)); Pair <TSource, TGroupKey> sourceElement = default(Pair <TSource, TGroupKey>); TOrderKey sourceKeyUnused = default(TOrderKey) !; int i = 0; while (_source.MoveNext(ref sourceElement, ref sourceKeyUnused)) { if ((i++ & CancellationState.POLL_INTERVAL) == 0) { _cancellationToken.ThrowIfCancellationRequested(); } ; // Generate a key and place it into the hashtable. Wrapper <TGroupKey> key = new Wrapper <TGroupKey>(sourceElement.Second); // If the key already exists, we just append it to the existing list -- // otherwise we will create a new one and add it to that instead. ListChunk <TElement>?currentValue = null; if (!hashlookup.TryGetValue(key, ref currentValue)) { const int INITIAL_CHUNK_SIZE = 2; currentValue = new ListChunk <TElement>(INITIAL_CHUNK_SIZE); hashlookup.Add(key, currentValue); } Debug.Assert(currentValue != null); // Call to the base class to yield the current value. currentValue.Add(_elementSelector(sourceElement.First)); } return(hashlookup); }
protected override HashLookup <Wrapper <TGroupKey>, ListChunk <TSource> > BuildHashLookup() { HashLookup <Wrapper <TGroupKey>, ListChunk <TSource> > lookup = new HashLookup <Wrapper <TGroupKey>, ListChunk <TSource> >(new WrapperEqualityComparer <TGroupKey>(base.m_keyComparer)); Pair <TSource, TGroupKey> currentElement = new Pair <TSource, TGroupKey>(); TOrderKey currentKey = default(TOrderKey); int num = 0; while (base.m_source.MoveNext(ref currentElement, ref currentKey)) { if ((num++ & 0x3f) == 0) { CancellationState.ThrowIfCanceled(base.m_cancellationToken); } Wrapper <TGroupKey> key = new Wrapper <TGroupKey>(currentElement.Second); ListChunk <TSource> chunk = null; if (!lookup.TryGetValue(key, ref chunk)) { chunk = new ListChunk <TSource>(2); lookup.Add(key, chunk); } chunk.Add(currentElement.First); } return(lookup); }
//--------------------------------------------------------------------------------------- // Creates a new partition exchange operator. // internal UnorderedHashRepartitionStream( PartitionedStream <TInputOutput, TIgnoreKey> inputStream, Func <TInputOutput, THashKey> keySelector, IEqualityComparer <THashKey> keyComparer, IEqualityComparer <TInputOutput> elementComparer, CancellationToken cancellationToken) : base(inputStream.PartitionCount, Util.GetDefaultComparer <int>(), keyComparer, elementComparer) { // Create our array of partitions. m_partitions = new HashRepartitionEnumerator <TInputOutput, THashKey, TIgnoreKey> [inputStream.PartitionCount]; // Initialize state shared among the partitions. A latch and a matrix of buffers. Note that // the actual elements in the buffer array are lazily allocated if needed. CountdownEvent barrier = new CountdownEvent(inputStream.PartitionCount); ListChunk <Pair <TInputOutput, THashKey> >[,] valueExchangeMatrix = new ListChunk <Pair <TInputOutput, THashKey> > [inputStream.PartitionCount, inputStream.PartitionCount]; // Now construct each partition object. for (int i = 0; i < inputStream.PartitionCount; i++) { m_partitions[i] = new HashRepartitionEnumerator <TInputOutput, THashKey, TIgnoreKey>( inputStream[i], inputStream.PartitionCount, i, keySelector, this, barrier, valueExchangeMatrix, cancellationToken); } }
internal ListChunk(int size) { this.m_chunk = new TInputOutput[size]; this.m_chunkCount = 0; this.m_tailChunk = (ListChunk <TInputOutput>) this; }
//--------------------------------------------------------------------------------------- // MoveNext implements all the hash-join logic noted earlier. When it is called first, it // will execute the entire inner query tree, and build a hash-table lookup. This is the // Building phase. Then for the first call and all subsequent calls to MoveNext, we will // incrementally perform the Probing phase. We'll keep getting elements from the outer // data source, looking into the hash-table we built, and enumerating the full results. // // This routine supports both inner and outer (group) joins. An outer join will yield a // (possibly empty) list of matching elements from the inner instead of one-at-a-time, // as we do for inner joins. // internal override bool MoveNext(ref TOutput currentElement, ref TOutputKey currentKey) { Debug.Assert(_resultSelector != null, "expected a compiled result selector"); Debug.Assert(_leftSource != null); Debug.Assert(_rightLookupBuilder != null); // BUILD phase: If we haven't built the hash-table yet, create that first. Mutables mutables = _mutables; if (mutables == null) { mutables = _mutables = new Mutables(); mutables._rightHashLookup = _rightLookupBuilder.BuildHashLookup(_cancellationToken); } // PROBE phase: So long as the source has a next element, return the match. ListChunk <Pair <TRightInput, TRightKey> > currentRightChunk = mutables._currentRightMatches; if (currentRightChunk != null && mutables._currentRightMatchesIndex == currentRightChunk.Count) { mutables._currentRightMatches = currentRightChunk.Next; mutables._currentRightMatchesIndex = 0; } if (mutables._currentRightMatches == null) { // We have to look up the next list of matches in the hash-table. Pair <TLeftInput, THashKey> leftPair = default(Pair <TLeftInput, THashKey>); TLeftKey leftKey = default(TLeftKey); while (_leftSource.MoveNext(ref leftPair, ref leftKey)) { if ((mutables._outputLoopCount++ & CancellationState.POLL_INTERVAL) == 0) { CancellationState.ThrowIfCanceled(_cancellationToken); } // Find the match in the hash table. HashLookupValueList <TRightInput, TRightKey> matchValue = default(HashLookupValueList <TRightInput, TRightKey>); TLeftInput leftElement = leftPair.First; THashKey leftHashKey = leftPair.Second; // Ignore null keys. if (leftHashKey != null) { if (mutables._rightHashLookup.TryGetValue(leftHashKey, ref matchValue)) { // We found a new match. We remember the list in case there are multiple // values under this same key -- the next iteration will pick them up. mutables._currentRightMatches = matchValue.Tail; Debug.Assert(mutables._currentRightMatches == null || mutables._currentRightMatches.Count > 0, "we were expecting that the list would be either null or empty"); mutables._currentRightMatchesIndex = 0; // Yield the value. currentElement = _resultSelector(leftElement, matchValue.Head.First); currentKey = _outputKeyBuilder.Combine(leftKey, matchValue.Head.Second); // If there is a list of matches, remember the left values for next time. if (matchValue.Tail != null) { mutables._currentLeft = leftElement; mutables._currentLeftKey = leftKey; } return(true); } } } // If we've reached the end of the data source, we're done. return(false); } // Produce the next element. Debug.Assert(mutables._currentRightMatches != null); Debug.Assert(0 <= mutables._currentRightMatchesIndex && mutables._currentRightMatchesIndex < mutables._currentRightMatches.Count); Pair <TRightInput, TRightKey> rightMatch = mutables._currentRightMatches._chunk[mutables._currentRightMatchesIndex]; currentElement = _resultSelector(mutables._currentLeft, rightMatch.First); currentKey = _outputKeyBuilder.Combine(mutables._currentLeftKey, rightMatch.Second); mutables._currentRightMatchesIndex++; return(true); }
internal override bool MoveNext(ref TOutput currentElement, ref TLeftKey currentKey) { Mutables <TLeftInput, TLeftKey, TRightInput, THashKey, TOutput> mutables = this.m_mutables; if (mutables == null) { mutables = this.m_mutables = new Mutables <TLeftInput, TLeftKey, TRightInput, THashKey, TOutput>(); mutables.m_rightHashLookup = new HashLookup <THashKey, Pair <TRightInput, ListChunk <TRightInput> > >(this.m_keyComparer); Pair <TRightInput, THashKey> pair = new Pair <TRightInput, THashKey>(); int num = 0; int num2 = 0; while (this.m_rightSource.MoveNext(ref pair, ref num)) { if ((num2++ & 0x3f) == 0) { CancellationState.ThrowIfCanceled(this.m_cancellationToken); } TRightInput first = pair.First; THashKey second = pair.Second; if (second != null) { Pair <TRightInput, ListChunk <TRightInput> > pair2 = new Pair <TRightInput, ListChunk <TRightInput> >(); if (!mutables.m_rightHashLookup.TryGetValue(second, ref pair2)) { pair2 = new Pair <TRightInput, ListChunk <TRightInput> >(first, null); if (this.m_groupResultSelector != null) { pair2.Second = new ListChunk <TRightInput>(2); pair2.Second.Add(first); } mutables.m_rightHashLookup.Add(second, pair2); } else { if (pair2.Second == null) { pair2.Second = new ListChunk <TRightInput>(2); mutables.m_rightHashLookup[second] = pair2; } pair2.Second.Add(first); } } } } ListChunk <TRightInput> currentRightMatches = mutables.m_currentRightMatches; if ((currentRightMatches != null) && (mutables.m_currentRightMatchesIndex == currentRightMatches.Count)) { currentRightMatches = mutables.m_currentRightMatches = currentRightMatches.Next; mutables.m_currentRightMatchesIndex = 0; } if (mutables.m_currentRightMatches == null) { Pair <TLeftInput, THashKey> pair3 = new Pair <TLeftInput, THashKey>(); TLeftKey local3 = default(TLeftKey); while (this.m_leftSource.MoveNext(ref pair3, ref local3)) { if ((mutables.m_outputLoopCount++ & 0x3f) == 0) { CancellationState.ThrowIfCanceled(this.m_cancellationToken); } Pair <TRightInput, ListChunk <TRightInput> > pair4 = new Pair <TRightInput, ListChunk <TRightInput> >(); TLeftInput local4 = pair3.First; THashKey key = pair3.Second; if (((key != null) && mutables.m_rightHashLookup.TryGetValue(key, ref pair4)) && (this.m_singleResultSelector != null)) { mutables.m_currentRightMatches = pair4.Second; mutables.m_currentRightMatchesIndex = 0; currentElement = this.m_singleResultSelector(local4, pair4.First); currentKey = local3; if (pair4.Second != null) { mutables.m_currentLeft = local4; mutables.m_currentLeftKey = local3; } return(true); } if (this.m_groupResultSelector != null) { IEnumerable <TRightInput> enumerable = pair4.Second; if (enumerable == null) { enumerable = (IEnumerable <TRightInput>)ParallelEnumerable.Empty <TRightInput>(); } currentElement = this.m_groupResultSelector(local4, enumerable); currentKey = local3; return(true); } } return(false); } currentElement = this.m_singleResultSelector(mutables.m_currentLeft, mutables.m_currentRightMatches.m_chunk[mutables.m_currentRightMatchesIndex]); currentKey = mutables.m_currentLeftKey; mutables.m_currentRightMatchesIndex++; return(true); }
// constructor used to build a new list. internal HashLookupValueList(TElement firstValue, TOrderKey firstOrderKey) { _head = CreatePair(firstValue, firstOrderKey); _tail = null; }
protected override Pair <IEnumerable <TElement>, int> CreateValuePair(ListChunk <TElement> baseValue) { return(new Pair <IEnumerable <TElement>, int>(baseValue, OrderKey)); }
//--------------------------------------------------------------------------------------- // Called when this enumerator is first enumerated; it must walk through the source // and redistribute elements to their slot in the exchange matrix. // private void EnumerateAndRedistributeElements() { Mutables mutables = m_mutables; Contract.Assert(mutables != null); ListChunk <Pair <TInputOutput, THashKey> >[] privateBuffers = new ListChunk <Pair <TInputOutput, THashKey> > [m_partitionCount]; ListChunk <TOrderKey>[] privateKeyBuffers = new ListChunk <TOrderKey> [m_partitionCount]; TInputOutput element = default(TInputOutput); TOrderKey key = default(TOrderKey); int loopCount = 0; while (m_source.MoveNext(ref element, ref key)) { if ((loopCount++ & CancellationState.POLL_INTERVAL) == 0) { CancellationState.ThrowIfCanceled(m_cancellationToken); } // Calculate the element's destination partition index, placing it into the // appropriate buffer from which partitions will later enumerate. int destinationIndex; THashKey elementHashKey = default(THashKey); if (m_keySelector != null) { elementHashKey = m_keySelector(element); destinationIndex = m_repartitionStream.GetHashCode(elementHashKey) % m_partitionCount; } else { Contract.Assert(typeof(THashKey) == typeof(NoKeyMemoizationRequired)); destinationIndex = m_repartitionStream.GetHashCode(element) % m_partitionCount; } Contract.Assert(0 <= destinationIndex && destinationIndex < m_partitionCount, "destination partition outside of the legal range of partitions"); // Get the buffer for the destnation partition, lazily allocating if needed. We maintain // this list in our own private cache so that we avoid accessing shared memory locations // too much. In the original implementation, we'd access the buffer in the matrix ([N,M], // where N is the current partition and M is the destination), but some rudimentary // performance profiling indicates copying at the end performs better. ListChunk <Pair <TInputOutput, THashKey> > buffer = privateBuffers[destinationIndex]; ListChunk <TOrderKey> keyBuffer = privateKeyBuffers[destinationIndex]; if (buffer == null) { const int INITIAL_PRIVATE_BUFFER_SIZE = 128; Contract.Assert(keyBuffer == null); privateBuffers[destinationIndex] = buffer = new ListChunk <Pair <TInputOutput, THashKey> >(INITIAL_PRIVATE_BUFFER_SIZE); privateKeyBuffers[destinationIndex] = keyBuffer = new ListChunk <TOrderKey>(INITIAL_PRIVATE_BUFFER_SIZE); } buffer.Add(new Pair <TInputOutput, THashKey>(element, elementHashKey)); keyBuffer.Add(key); } // Copy the local buffers to the shared space and then signal to other threads that // we are done. We can then immediately move on to enumerating the elements we found // for the current partition before waiting at the barrier. If we found a lot, we will // hopefully never have to physically wait. for (int i = 0; i < m_partitionCount; i++) { m_valueExchangeMatrix[m_partitionIndex, i] = privateBuffers[i]; m_keyExchangeMatrix[m_partitionIndex, i] = privateKeyBuffers[i]; } m_barrier.Signal(); // Begin at our own buffer. mutables.m_currentBufferIndex = m_partitionIndex; mutables.m_currentBuffer = privateBuffers[m_partitionIndex]; mutables.m_currentKeyBuffer = privateKeyBuffers[m_partitionIndex]; mutables.m_currentIndex = ENUMERATION_NOT_STARTED; }
//--------------------------------------------------------------------------------------- // MoveNext implements all the hash-join logic noted earlier. When it is called first, it // will execute the entire inner query tree, and build a hash-table lookup. This is the // Building phase. Then for the first call and all subsequent calls to MoveNext, we will // incrementally perform the Probing phase. We'll keep getting elements from the outer // data source, looking into the hash-table we built, and enumerating the full results. // // This routine supports both inner and outer (group) joins. An outer join will yield a // (possibly empty) list of matching elements from the inner instead of one-at-a-time, // as we do for inner joins. // internal override bool MoveNext(ref TOutput currentElement, ref TLeftKey currentKey) { Contract.Assert(_singleResultSelector != null || _groupResultSelector != null, "expected a compiled result selector"); Contract.Assert(_leftSource != null); Contract.Assert(_rightSource != null); // BUILD phase: If we haven't built the hash-table yet, create that first. Mutables mutables = _mutables; if (mutables == null) { mutables = _mutables = new Mutables(); #if DEBUG int hashLookupCount = 0; int hashKeyCollisions = 0; #endif mutables._rightHashLookup = new HashLookup <THashKey, Pair>(_keyComparer); Pair rightPair = new Pair(default(TRightInput), default(THashKey)); int rightKeyUnused = default(int); int i = 0; while (_rightSource.MoveNext(ref rightPair, ref rightKeyUnused)) { if ((i++ & CancellationState.POLL_INTERVAL) == 0) { CancellationState.ThrowIfCanceled(_cancellationToken); } TRightInput rightElement = (TRightInput)rightPair.First; THashKey rightHashKey = (THashKey)rightPair.Second; // We ignore null keys. if (rightHashKey != null) { #if DEBUG hashLookupCount++; #endif // See if we've already stored an element under the current key. If not, we // lazily allocate a pair to hold the elements mapping to the same key. const int INITIAL_CHUNK_SIZE = 2; Pair currentValue = new Pair(default(TRightInput), default(ListChunk <TRightInput>)); if (!mutables._rightHashLookup.TryGetValue(rightHashKey, ref currentValue)) { currentValue = new Pair(rightElement, null); if (_groupResultSelector != null) { // For group joins, we also add the element to the list. This makes // it easier later to yield the list as-is. currentValue.Second = new ListChunk <TRightInput>(INITIAL_CHUNK_SIZE); ((ListChunk <TRightInput>)currentValue.Second).Add((TRightInput)rightElement); } mutables._rightHashLookup.Add(rightHashKey, currentValue); } else { if (currentValue.Second == null) { // Lazily allocate a list to hold all but the 1st value. We need to // re-store this element because the pair is a value type. currentValue.Second = new ListChunk <TRightInput>(INITIAL_CHUNK_SIZE); mutables._rightHashLookup[rightHashKey] = currentValue; } ((ListChunk <TRightInput>)currentValue.Second).Add((TRightInput)rightElement); #if DEBUG hashKeyCollisions++; #endif } } } #if DEBUG TraceHelpers.TraceInfo("ParallelJoinQueryOperator::MoveNext - built hash table [count = {0}, collisions = {1}]", hashLookupCount, hashKeyCollisions); #endif } // PROBE phase: So long as the source has a next element, return the match. ListChunk <TRightInput> currentRightChunk = mutables._currentRightMatches; if (currentRightChunk != null && mutables._currentRightMatchesIndex == currentRightChunk.Count) { currentRightChunk = mutables._currentRightMatches = currentRightChunk.Next; mutables._currentRightMatchesIndex = 0; } if (mutables._currentRightMatches == null) { // We have to look up the next list of matches in the hash-table. Pair leftPair = new Pair(default(TLeftInput), default(THashKey)); TLeftKey leftKey = default(TLeftKey); while (_leftSource.MoveNext(ref leftPair, ref leftKey)) { if ((mutables._outputLoopCount++ & CancellationState.POLL_INTERVAL) == 0) { CancellationState.ThrowIfCanceled(_cancellationToken); } // Find the match in the hash table. Pair matchValue = new Pair(default(TRightInput), default(ListChunk <TRightInput>)); TLeftInput leftElement = (TLeftInput)leftPair.First; THashKey leftHashKey = (THashKey)leftPair.Second; // Ignore null keys. if (leftHashKey != null) { if (mutables._rightHashLookup.TryGetValue(leftHashKey, ref matchValue)) { // We found a new match. For inner joins, we remember the list in case // there are multiple value under this same key -- the next iteration will pick // them up. For outer joins, we will use the list momentarily. if (_singleResultSelector != null) { mutables._currentRightMatches = (ListChunk <TRightInput>)matchValue.Second; Contract.Assert(mutables._currentRightMatches == null || mutables._currentRightMatches.Count > 0, "we were expecting that the list would be either null or empty"); mutables._currentRightMatchesIndex = 0; // Yield the value. currentElement = _singleResultSelector(leftElement, (TRightInput)matchValue.First); currentKey = leftKey; // If there is a list of matches, remember the left values for next time. if (matchValue.Second != null) { mutables._currentLeft = leftElement; mutables._currentLeftKey = leftKey; } return(true); } } } // For outer joins, we always yield a result. if (_groupResultSelector != null) { // Grab the matches, or create an empty list if there are none. IEnumerable <TRightInput> matches = (ListChunk <TRightInput>)matchValue.Second; if (matches == null) { matches = ParallelEnumerable.Empty <TRightInput>(); } // Generate the current value. currentElement = _groupResultSelector(leftElement, matches); currentKey = leftKey; return(true); } } // If we've reached the end of the data source, we're done. return(false); } // Produce the next element and increment our index within the matches. Contract.Assert(_singleResultSelector != null); Contract.Assert(mutables._currentRightMatches != null); Contract.Assert(0 <= mutables._currentRightMatchesIndex && mutables._currentRightMatchesIndex < mutables._currentRightMatches.Count); currentElement = _singleResultSelector( mutables._currentLeft, mutables._currentRightMatches._chunk[mutables._currentRightMatchesIndex]); currentKey = mutables._currentLeftKey; mutables._currentRightMatchesIndex++; return(true); }