Ejemplo n.º 1
0
        public PartitionedGroupNestedPipe(
            GroupNestedStreamable <TOuterKey, TSource, TInnerKey> stream,
            IStreamObserver <CompoundGroupKey <TOuterKey, TInnerKey>, TSource> observer)
            : base(stream, observer)
        {
            this.keySelector     = stream.KeySelector;
            this.keySelectorFunc = this.keySelector.Compile();
            this.keyComparer     = ((CompoundGroupKeyEqualityComparer <TOuterKey, TInnerKey>)stream.Properties.KeyEqualityComparer).innerComparer.GetGetHashCodeExpr();
            this.innerHashCode   = this.keyComparer.Compile();

            this.errorMessages = stream.ErrorMessages;
            this.l1Pool        = MemoryManager.GetMemoryPool <CompoundGroupKey <TOuterKey, TInnerKey>, TSource>(stream.Properties.IsColumnar);
        }
Ejemplo n.º 2
0
        public ShuffleNestedStreamable(
            IStreamable <TOuterKey, TSource> source,
            Expression <Func <TSource, TInnerKey> > keySelector, int totalBranchesL2,
            int shuffleId)
            : base(source.Properties.GroupNested(keySelector))
        {
            Contract.Requires(source != null);
            Contract.Requires(keySelector != null);
            Contract.Requires(totalBranchesL2 > 0);

            Source               = source;
            KeySelector          = keySelector;
            this.totalBranchesL2 = totalBranchesL2;
            this.shuffleId       = shuffleId;
            powerOf2             = ((totalBranchesL2 & (totalBranchesL2 - 1)) == 0);

            if (totalBranchesL2 <= 1)
            {
                singleThreadedShuffler = new GroupNestedStreamable <TOuterKey, TSource, TInnerKey>(source, keySelector);
                this.properties        = singleThreadedShuffler.Properties;
            }
        }
Ejemplo n.º 3
0
        public override IDisposable Subscribe(IStreamObserver <TMapKey, TOutput> observer)
        {
            // asymmetric mapper implies that we have to have a 2-input mapper
            Contract.Assert((!this.leftAsymmetric1) || (this.sourceRight1 != null));
            Contract.Assert((!this.leftAsymmetric2) || (this.sourceRight2 != null));

            var mapArity    = Config.MapArity;
            var reduceArity = Config.ReduceArity;

            // process mapper #1
            Streamable <CompoundGroupKey <TMapKey, TReduceKey>, TReduceInput1>[] shuffleL2Results1;

            if (this.sourceRight1 != null) // two-input mapper
            {
                // DEAD
                shuffleL2Results1 = new MultiUnionStreamable <CompoundGroupKey <TMapKey, TReduceKey>, TReduceInput1> [reduceArity];

                // [1] spray batches into L1 physical cores
                var importLeft1  = new SprayGroupImportStreamable <TMapKey, TMapInputLeft1>(this.sourceLeft1, mapArity, this.leftAsymmetric1);
                var importRight1 = new SprayGroupImportStreamable <TMapKey, TMapInputRight1>(this.sourceRight1, mapArity);

                // [2] perform the spray lambda on each L1 core
                var sprayResults1 = new BinaryMulticastStreamable <TMapKey, TMapInputLeft1, TMapInputRight1, TReduceInput1> [mapArity];
                for (int i = 0; i < mapArity; i++)
                {
                    sprayResults1[i] = new BinaryMulticastStreamable <TMapKey, TMapInputLeft1, TMapInputRight1, TReduceInput1>(importLeft1, importRight1, this.mapper1);
                }

                // [3] apply shuffle on the result of each spray
                Streamable <CompoundGroupKey <TMapKey, TReduceKey>, TReduceInput1>[] shuffleL1Results1 = new ShuffleNestedStreamable <TMapKey, TReduceInput1, TReduceKey> [mapArity];
                for (int i = 0; i < mapArity; i++)
                {
                    shuffleL1Results1[i] = new ShuffleNestedStreamable <TMapKey, TReduceInput1, TReduceKey>(sprayResults1[i], this.keySelector1, reduceArity, i);
                }

                // [4] Union the shuffled data by group key
                MultiUnionStreamable <CompoundGroupKey <TMapKey, TReduceKey>, TReduceInput1> .l2index = 0;
                for (int i = 0; i < reduceArity; i++)
                {
                    shuffleL2Results1[i] = new MultiUnionStreamable <CompoundGroupKey <TMapKey, TReduceKey>, TReduceInput1>(shuffleL1Results1);
                }
            }
            else // single-input mapper
            {
                // [1] spray batches into L1 physical cores
                var importLeft = new SprayGroupImportStreamable <TMapKey, TMapInputLeft1>
                                     (this.sourceLeft1, mapArity, false, this.sprayComparer1);

                // [2] perform the spray lambda on each L1 core
                var sprayResults1 = new MulticastStreamable <TMapKey, TMapInputLeft1, TReduceInput1> [mapArity];
                for (int i = 0; i < mapArity; i++)
                {
                    sprayResults1[i] = new MulticastStreamable <TMapKey, TMapInputLeft1, TReduceInput1>(importLeft, a => this.mapper1(a, null));
                }

                if (this.reduceInMap || (this.reduceOptions == OperationalHint.Asymmetric))
                {
                    shuffleL2Results1 = new Streamable <CompoundGroupKey <TMapKey, TReduceKey>, TReduceInput1> [mapArity];
                    for (int i = 0; i < mapArity; i++)
                    {
                        shuffleL2Results1[i] = new GroupNestedStreamable <TMapKey, TReduceInput1, TReduceKey>(sprayResults1[i], this.keySelector1);
                    }
                }
                else
                {
                    shuffleL2Results1 = new MultiUnionStreamable <CompoundGroupKey <TMapKey, TReduceKey>, TReduceInput1> [reduceArity];

                    // [3] apply shuffle on the result of each spray
                    Streamable <CompoundGroupKey <TMapKey, TReduceKey>, TReduceInput1>[] shuffleL1Results1 = new ShuffleNestedStreamable <TMapKey, TReduceInput1, TReduceKey> [mapArity];
                    for (int i = 0; i < mapArity; i++)
                    {
                        shuffleL1Results1[i] = new ShuffleNestedStreamable <TMapKey, TReduceInput1, TReduceKey>(sprayResults1[i], this.keySelector1, reduceArity, i);
                    }

                    // [4] Union the shuffled data by group key
                    MultiUnionStreamable <CompoundGroupKey <TMapKey, TReduceKey>, TReduceInput1> .l2index = 0;
                    for (int i = 0; i < reduceArity; i++)
                    {
                        shuffleL2Results1[i] = new MultiUnionStreamable <CompoundGroupKey <TMapKey, TReduceKey>, TReduceInput1>(shuffleL1Results1);
                    }
                }
            }

            // process mapper #2
            Streamable <CompoundGroupKey <TMapKey, TReduceKey>, TReduceInput2>[] shuffleL2Results2;
            if (this.sourceRight2 != null) // two-input mapper
            {
                // DEAD
                shuffleL2Results2 = new MultiUnionStreamable <CompoundGroupKey <TMapKey, TReduceKey>, TReduceInput2> [reduceArity];

                // [1] spray batches into L1 physical cores
                var importLeft2  = new SprayGroupImportStreamable <TMapKey, TMapInputLeft2>(this.sourceLeft2, mapArity, this.leftAsymmetric2);
                var importRight2 = new SprayGroupImportStreamable <TMapKey, TMapInputRight2>(this.sourceRight2, mapArity);

                // [2] perform the spray lambda on each L1 core
                var sprayResults2 = new BinaryMulticastStreamable <TMapKey, TMapInputLeft2, TMapInputRight2, TReduceInput2> [mapArity];
                for (int i = 0; i < mapArity; i++)
                {
                    sprayResults2[i] = new BinaryMulticastStreamable <TMapKey, TMapInputLeft2, TMapInputRight2, TReduceInput2>(importLeft2, importRight2, this.mapper2);
                }

                // [3] apply shuffle on the result of each spray
                Streamable <CompoundGroupKey <TMapKey, TReduceKey>, TReduceInput2>[] shuffleL1Results2 = new ShuffleNestedStreamable <TMapKey, TReduceInput2, TReduceKey> [mapArity];
                for (int i = 0; i < mapArity; i++)
                {
                    shuffleL1Results2[i] = new ShuffleNestedStreamable <TMapKey, TReduceInput2, TReduceKey>(sprayResults2[i], this.keySelector2, reduceArity, i);
                }

                // [4] Union the shuffled data by group key
                MultiUnionStreamable <CompoundGroupKey <TMapKey, TReduceKey>, TReduceInput2> .l2index = 0;
                for (int i = 0; i < reduceArity; i++)
                {
                    shuffleL2Results2[i] = new MultiUnionStreamable <CompoundGroupKey <TMapKey, TReduceKey>, TReduceInput2>(shuffleL1Results2);
                }
            }
            else // single-input mapper
            {
                // [1] spray batches into L1 physical cores
                var importLeft = new SprayGroupImportStreamable <TMapKey, TMapInputLeft2>(this.sourceLeft2, mapArity, this.reduceOptions == OperationalHint.Asymmetric);

                // [2] perform the spray lambda on each L1 core
                var sprayResults2 = new MulticastStreamable <TMapKey, TMapInputLeft2, TReduceInput2> [mapArity];
                for (int i = 0; i < mapArity; i++)
                {
                    sprayResults2[i] = new MulticastStreamable <TMapKey, TMapInputLeft2, TReduceInput2>(importLeft, a => this.mapper2(a, null));
                }

                if (this.reduceInMap || (this.reduceOptions == OperationalHint.Asymmetric))
                {
                    shuffleL2Results2 = new Streamable <CompoundGroupKey <TMapKey, TReduceKey>, TReduceInput2> [mapArity];
                    for (int i = 0; i < mapArity; i++)
                    {
                        shuffleL2Results2[i] = new GroupNestedStreamable <TMapKey, TReduceInput2, TReduceKey>(sprayResults2[i], this.keySelector2);
                    }
                }
                else
                {
                    shuffleL2Results2 = new MultiUnionStreamable <CompoundGroupKey <TMapKey, TReduceKey>, TReduceInput2> [reduceArity];

                    // [3] apply shuffle on the result of each spray
                    var shuffleL1Results2 = new ShuffleNestedStreamable <TMapKey, TReduceInput2, TReduceKey> [mapArity];
                    for (int i = 0; i < mapArity; i++)
                    {
                        shuffleL1Results2[i] = new ShuffleNestedStreamable <TMapKey, TReduceInput2, TReduceKey>(sprayResults2[i], this.keySelector2, reduceArity, i);
                    }

                    // [4] Union the shuffled data by group key
                    MultiUnionStreamable <CompoundGroupKey <TMapKey, TReduceKey>, TReduceInput2> .l2index = 0;
                    for (int i = 0; i < reduceArity; i++)
                    {
                        shuffleL2Results2[i] = new MultiUnionStreamable <CompoundGroupKey <TMapKey, TReduceKey>, TReduceInput2>(shuffleL1Results2);
                    }
                }
            }

            // process 2-input reducer
            // [5] perform the apply lambda on each L2 core
            var innerResults
                = new BinaryMulticastStreamable <CompoundGroupKey <TMapKey, TReduceKey>, TReduceInput1, TReduceInput2, TBind> [shuffleL2Results1.Length];
            var ungroupInnerResults
                = new UngroupStreamable <TMapKey, TReduceKey, TBind, TOutput> [shuffleL2Results1.Length];

            for (int i = 0; i < shuffleL2Results1.Length; i++)
            {
                innerResults[i]        = new BinaryMulticastStreamable <CompoundGroupKey <TMapKey, TReduceKey>, TReduceInput1, TReduceInput2, TBind>(shuffleL2Results1[i], shuffleL2Results2[i], this.reducer);
                ungroupInnerResults[i] = new UngroupStreamable <TMapKey, TReduceKey, TBind, TOutput>(this.sourceLeft1.Properties.KeyEqualityComparer, innerResults[i], this.resultSelector);
            }

            // [6] final single merging union
            var union = new MultiUnionStreamable <TMapKey, TOutput>(ungroupInnerResults, false);

            return(union.Subscribe(observer));
        }
Ejemplo n.º 4
0
        public override IDisposable Subscribe(IStreamObserver <TMapKey, TOutput> observer)
        {
            // asymmetric mapper implies that we have to have a 2-input mapper
            Contract.Assert((!this.leftAsymmetric) || (this.sourceRight != null));

            var mapArity    = this.isMulticore ? 1 : Config.MapArity;
            var reduceArity = this.isMulticore ? 1 : Config.ReduceArity;

            if (this.keySelector != null)
            {
                if (this.sourceRight != null) // two-input mapper
                {
                    // [1] spray batches into L1 physical cores
                    var importLeft  = new SprayGroupImportStreamable <TMapKey, TMapInputLeft>(this.sourceLeft, mapArity, this.leftAsymmetric);
                    var importRight = new SprayGroupImportStreamable <TMapKey, TMapInputRight>(this.sourceRight, mapArity);

                    // [2] perform the spray lambda on each L1 core
                    var sprayResults = new BinaryMulticastStreamable <TMapKey, TMapInputLeft, TMapInputRight, TReduceInput> [mapArity];
                    for (int i = 0; i < mapArity; i++)
                    {
                        sprayResults[i] = new BinaryMulticastStreamable <TMapKey, TMapInputLeft, TMapInputRight, TReduceInput>(importLeft, importRight, this.mapper);
                    }

                    // [3] apply shuffle on the result of each spray
                    Streamable <CompoundGroupKey <TMapKey, TReduceKey>, TReduceInput>[] shuffleL1Results = new ShuffleNestedStreamable <TMapKey, TReduceInput, TReduceKey> [mapArity];
                    for (int i = 0; i < mapArity; i++)
                    {
                        shuffleL1Results[i] = new ShuffleNestedStreamable <TMapKey, TReduceInput, TReduceKey>(this.keyComparer, sprayResults[i], this.keySelector, reduceArity, i);
                    }

                    // [4] Union the shuffled data by group key
                    MultiUnionStreamable <CompoundGroupKey <TMapKey, TReduceKey>, TReduceInput> .l2index = 0;
                    var shuffleL2Results = new MultiUnionStreamable <CompoundGroupKey <TMapKey, TReduceKey>, TReduceInput> [reduceArity];
                    for (int i = 0; i < reduceArity; i++)
                    {
                        shuffleL2Results[i] = new MultiUnionStreamable <CompoundGroupKey <TMapKey, TReduceKey>, TReduceInput>(shuffleL1Results);
                    }

                    // [5] perform the apply lambda on each L2 core
                    var innerResults        = new MulticastStreamable <CompoundGroupKey <TMapKey, TReduceKey>, TReduceInput, TBind> [reduceArity];
                    var ungroupInnerResults = new UngroupStreamable <TMapKey, TReduceKey, TBind, TOutput> [reduceArity];

                    for (int i = 0; i < reduceArity; i++)
                    {
                        innerResults[i]        = new MulticastStreamable <CompoundGroupKey <TMapKey, TReduceKey>, TReduceInput, TBind>(shuffleL2Results[i], this.reducer);
                        ungroupInnerResults[i] = new UngroupStreamable <TMapKey, TReduceKey, TBind, TOutput>(this.sourceLeft.Properties.KeyEqualityComparer, innerResults[i], this.resultSelector);
                    }
                    // [6] final single merging union
                    var union = new MultiUnionStreamable <TMapKey, TOutput>(ungroupInnerResults, false);

                    return(union.Subscribe(observer));
                }
                else // single-input mapper
                {
                    // [1] spray batches into L1 physical cores
                    var importLeft = new SprayGroupImportStreamable <TMapKey, TMapInputLeft>(this.sourceLeft, mapArity, this.leftAsymmetric, this.sprayComparer);

                    // [2] perform the spray lambda on each L1 core
                    var sprayResults = new MulticastStreamable <TMapKey, TMapInputLeft, TReduceInput> [mapArity];
                    for (int i = 0; i < mapArity; i++)
                    {
                        sprayResults[i] = new MulticastStreamable <TMapKey, TMapInputLeft, TReduceInput>(importLeft, a => this.mapper(a, null));
                    }

                    Streamable <CompoundGroupKey <TMapKey, TReduceKey>, TReduceInput>[] mergeInputs;
                    if (this.reduceInMap) // apply reducer in map phase itself
                    {
                        // [3] apply shuffle on the result of each spray
                        mergeInputs = new Streamable <CompoundGroupKey <TMapKey, TReduceKey>, TReduceInput> [mapArity];
                        for (int i = 0; i < mapArity; i++)
                        {
                            mergeInputs[i] = new GroupNestedStreamable <TMapKey, TReduceInput, TReduceKey>(this.keyComparer, sprayResults[i], this.keySelector);
                        }
                    }
                    else
                    {
                        // [3] apply shuffle on the result of each spray
                        Streamable <CompoundGroupKey <TMapKey, TReduceKey>, TReduceInput>[] shuffleL1Results = new ShuffleNestedStreamable <TMapKey, TReduceInput, TReduceKey> [mapArity];
                        for (int i = 0; i < mapArity; i++)
                        {
                            shuffleL1Results[i] = new ShuffleNestedStreamable <TMapKey, TReduceInput, TReduceKey>(this.keyComparer, sprayResults[i], this.keySelector, reduceArity, i);
                        }

                        // [4] Union the shuffled data by group key
                        MultiUnionStreamable <CompoundGroupKey <TMapKey, TReduceKey>, TReduceInput> .l2index = 0;

                        mergeInputs = new Streamable <CompoundGroupKey <TMapKey, TReduceKey>, TReduceInput> [reduceArity];
                        mergeInputs = new MultiUnionStreamable <CompoundGroupKey <TMapKey, TReduceKey>, TReduceInput> [reduceArity];
                        for (int i = 0; i < reduceArity; i++)
                        {
                            mergeInputs[i] = new MultiUnionStreamable <CompoundGroupKey <TMapKey, TReduceKey>, TReduceInput>(shuffleL1Results);
                        }
                    }

                    // [5] perform the apply lambda on each L2 core
                    var innerResults
                        = new MulticastStreamable <CompoundGroupKey <TMapKey, TReduceKey>, TReduceInput, TBind> [mergeInputs.Length];
                    var ungroupInnerResults
                        = new UngroupStreamable <TMapKey, TReduceKey, TBind, TOutput> [mergeInputs.Length];

                    for (int i = 0; i < mergeInputs.Length; i++)
                    {
                        innerResults[i]        = new MulticastStreamable <CompoundGroupKey <TMapKey, TReduceKey>, TReduceInput, TBind>(mergeInputs[i], this.reducer);
                        ungroupInnerResults[i] = new UngroupStreamable <TMapKey, TReduceKey, TBind, TOutput>(this.sourceLeft.Properties.KeyEqualityComparer, innerResults[i], this.resultSelector);
                    }

                    // [6] final single merging union
                    var union = new MultiUnionStreamable <TMapKey, TOutput>(ungroupInnerResults, false);

                    return(union.Subscribe(observer));
                }
            }
            else
            {
                if (this.sourceRight != null) // two-input mapper
                {
                    // [1] spray batches into L1 physical cores
                    var importLeft  = new SprayGroupImportStreamable <TMapKey, TMapInputLeft>(this.sourceLeft, mapArity, this.leftAsymmetric);
                    var importRight = new SprayGroupImportStreamable <TMapKey, TMapInputRight>(this.sourceRight, mapArity);

                    // [2] perform the spray lambda on each L1 core
                    var sprayResults = new BinaryMulticastStreamable <TMapKey, TMapInputLeft, TMapInputRight, TReduceInput> [mapArity];
                    for (int i = 0; i < mapArity; i++)
                    {
                        sprayResults[i] = new BinaryMulticastStreamable <TMapKey, TMapInputLeft, TMapInputRight, TReduceInput>(importLeft, importRight, this.mapper);
                    }

                    // [4] Union the shuffled data by group key
                    MultiUnionStreamable <TMapKey, TReduceInput> .l2index = 0;
                    var shuffleL2Result
                        = new MultiUnionStreamable <TMapKey, TReduceInput>(sprayResults, false) as MultiUnionStreamable <TMapKey, TOutput>;

                    return(shuffleL2Result.Subscribe(observer));
                }
                else // single-input mapper
                {
                    // [1] spray batches into L1 physical cores
                    var importLeft = new SprayGroupImportStreamable <TMapKey, TMapInputLeft>(this.sourceLeft, mapArity);

                    // [2] perform the spray lambda on each L1 core
                    var sprayResults = new MulticastStreamable <TMapKey, TMapInputLeft, TReduceInput> [mapArity];
                    for (int i = 0; i < mapArity; i++)
                    {
                        sprayResults[i] = new MulticastStreamable <TMapKey, TMapInputLeft, TReduceInput>(importLeft, a => this.mapper(a, null));
                    }

                    // [4] Union the shuffled data by group key
                    MultiUnionStreamable <TMapKey, TReduceInput> .l2index = 0;
                    var shuffleL2Result
                        = new MultiUnionStreamable <TMapKey, TReduceInput>(sprayResults, false) as MultiUnionStreamable <TMapKey, TOutput>;

                    return(shuffleL2Result.Subscribe(observer));
                }
            }
        }