Пример #1
0
 internal virtual string Visit(DLinqHashPartitionNode node,
                               CodeMemberMethod vertexMethod,
                               string[] readerNames,
                               string[] writerNames)
 {
     return node.AddVertexCode(vertexMethod, readerNames, writerNames);
 }
Пример #2
0
        private DLinqQueryNode VisitSlidingWindow(QueryNodeInfo source,
                                                  LambdaExpression procLambda,
                                                  Expression windowSizeExpr,
                                                  Expression queryExpr)
        {
            // var windows = source.Apply(s => DryadLinqHelper.Last(s, windowSize));
            // var slided = windows.Apply(s => DryadLinqHelper.Slide(s)).HashPartition(x => x.Index);
            // slided.Apply(source, (x, y) => DryadLinqHelper.ProcessWindows(x, y, procFunc, windowSize));
            DLinqQueryNode child = this.Visit(source);
            if (child.IsDynamic)
            {
                throw new DryadLinqException("SlidingWindow is only supported for static partition count");
            }

            ExpressionSimplifier<int> evaluator = new ExpressionSimplifier<int>();
            Expression windowSize = Expression.Constant(evaluator.Eval(windowSizeExpr), typeof(int));
            
            child.IsForked = true;

            // Apply node for s => Last(s, windowSize)
            Type paramType = typeof(IEnumerable<>).MakeGenericType(child.OutputTypes[0]);
            ParameterExpression param = Expression.Parameter(paramType, DryadLinqCodeGen.MakeUniqueName("s"));
            MethodInfo minfo = typeof(DryadLinqHelper).GetMethod("Last");
            minfo = minfo.MakeGenericMethod(child.OutputTypes[0]);
            Expression body = Expression.Call(minfo, param, windowSize);
            Type funcType = typeof(Func<,>).MakeGenericType(param.Type, body.Type);
            LambdaExpression procFunc = Expression.Lambda(funcType, body, param);
            DLinqQueryNode lastNode = new DLinqApplyNode(procFunc, queryExpr, child);
            lastNode = new DLinqMergeNode(true, queryExpr, lastNode);

            // Apply node for s => Slide(s)
            param = Expression.Parameter(body.Type, DryadLinqCodeGen.MakeUniqueName("s"));
            minfo = typeof(DryadLinqHelper).GetMethod("Slide");
            minfo = minfo.MakeGenericMethod(child.OutputTypes[0]);
            body = Expression.Call(minfo, param);
            funcType = typeof(Func<,>).MakeGenericType(param.Type, body.Type);
            procFunc = Expression.Lambda(funcType, body, param);
            DLinqQueryNode slideNode = new DLinqApplyNode(procFunc, queryExpr, lastNode);

            // Hash partition to distribute from partition i to i+1
            int pcount = child.OutputPartition.Count;
            param = Expression.Parameter(body.Type.GetGenericArguments()[0], "x");
            Expression keySelectBody = Expression.Property(param, "Index");
            funcType = typeof(Func<,>).MakeGenericType(param.Type, keySelectBody.Type);
            LambdaExpression keySelectExpr = Expression.Lambda(funcType, keySelectBody, param);
            DLinqQueryNode hdistNode 
                = new DLinqHashPartitionNode(keySelectExpr, null, pcount, queryExpr, slideNode);

            // Apply node for (x, y) => ProcessWindows(x, y, proclambda, windowSize)
            Type paramType1 = typeof(IEnumerable<>).MakeGenericType(body.Type);
            ParameterExpression param1 = Expression.Parameter(paramType1, DryadLinqCodeGen.MakeUniqueName("x"));
            Type paramType2 = typeof(IEnumerable<>).MakeGenericType(child.OutputTypes[0]);
            ParameterExpression param2 = Expression.Parameter(paramType2, DryadLinqCodeGen.MakeUniqueName("y"));
            minfo = typeof(DryadLinqHelper).GetMethod("ProcessWindows");
            minfo = minfo.MakeGenericMethod(child.OutputTypes[0], procLambda.Body.Type);
            body = Expression.Call(minfo, param1, param2, procLambda, windowSize);
            funcType = typeof(Func<,,>).MakeGenericType(param1.Type, param2.Type, body.Type);
            procFunc = Expression.Lambda(funcType, body, param1, param2);
            return new DLinqApplyNode(procFunc, queryExpr, hdistNode, child);
        }
Пример #3
0
            VisitApplyWithPartitionIndex(QueryNodeInfo source,
                                         LambdaExpression procLambda,
                                         Expression queryExpr)
        {
            // The computation looks like this:
            //     var indices = source.Apply(s => ValueZero(s)).Apply(s => AssignIndex(s))
            //                         .HashPartition(x => x)
            //     indices.Apply(source, (x, y) => ApplyWithPartitionIndex(x, y, procFunc));
            DLinqQueryNode child = this.Visit(source);
            if (child.IsDynamic)
            {
                throw new DryadLinqException("ApplyWithPartitionIndex is only supported for static partition count");
            }

            child.IsForked = true;

            // Apply node for s => ValueZero(s)
            Type paramType = typeof(IEnumerable<>).MakeGenericType(child.OutputTypes[0]);
            ParameterExpression param = Expression.Parameter(paramType, "s");
            MethodInfo minfo = typeof(DryadLinqHelper).GetMethod("ValueZero");
            minfo = minfo.MakeGenericMethod(child.OutputTypes[0]);
            Expression body = Expression.Call(minfo, param);
            Type funcType = typeof(Func<,>).MakeGenericType(param.Type, body.Type);            
            LambdaExpression procFunc = Expression.Lambda(funcType, body, param);
            DLinqQueryNode valueZeroNode = new DLinqApplyNode(procFunc, queryExpr, child);
            valueZeroNode = new DLinqMergeNode(true, queryExpr, valueZeroNode); 

            // Apply node for s => AssignIndex(s)
            paramType = typeof(IEnumerable<>).MakeGenericType(typeof(int));
            param = Expression.Parameter(paramType, "s");
            minfo = typeof(DryadLinqHelper).GetMethod("AssignIndex");
            body = Expression.Call(minfo, param);
            funcType = typeof(Func<,>).MakeGenericType(param.Type, body.Type);
            procFunc = Expression.Lambda(funcType, body, param);
            DLinqQueryNode assignIndexNode = new DLinqApplyNode(procFunc, queryExpr, valueZeroNode);

            // HashPartition to distribute the indices -- one to each partition.
            int pcount = child.OutputPartition.Count;
            param = Expression.Parameter(body.Type, "x");
            funcType = typeof(Func<,>).MakeGenericType(param.Type, param.Type);
            LambdaExpression keySelectExpr = Expression.Lambda(funcType, param, param);
            DLinqQueryNode hdistNode 
                = new DLinqHashPartitionNode(keySelectExpr, null, pcount, queryExpr, assignIndexNode);

            // Apply node for (x, y) => ApplyWithPartitionIndex(x, y, procLambda));
            Type paramType1 = typeof(IEnumerable<>).MakeGenericType(child.OutputTypes[0]);
            ParameterExpression param1 = Expression.Parameter(paramType1, DryadLinqCodeGen.MakeUniqueName("x"));
            Type paramType2 = typeof(IEnumerable<>).MakeGenericType(typeof(int));
            ParameterExpression param2 = Expression.Parameter(paramType2, DryadLinqCodeGen.MakeUniqueName("y"));
            minfo = typeof(DryadLinqHelper).GetMethod("ProcessWithIndex");
            minfo = minfo.MakeGenericMethod(child.OutputTypes[0], procLambda.Body.Type.GetGenericArguments()[0]);
            body = Expression.Call(minfo, param1, param2, procLambda);
            funcType = typeof(Func<,,>).MakeGenericType(param1.Type, param2.Type, body.Type);
            procFunc = Expression.Lambda(funcType, body, param1, param2);
            return new DLinqApplyNode(procFunc, queryExpr, child, hdistNode);
        }
Пример #4
0
        // Basic plan: (reverse all partitions) then (reverse data in each partition)
        // The main complication is to perform the first step.
        // Approach: 
        //   - tee the input.
        //   - have a dummy apply node that produces the singleton {0} at each partition
        //   - merge to get a seq {0,0,..} whose length = nPartition.
        //   - convert that seq to { (0,n), (1,n), ...}
        //   - hash-partition to send one item to each of the n workers.
        //   - use binary-apply to attach targetIndex to each source item
        //              Apply( seq1 = indexCountPair, seq2 = original data) => ({tgt, item0}, {tgt, item1}, .. )
        //   - hash-partition to move items to target partition.
        //   - use local LINQ reverse to do the local data reversal.
        private DLinqQueryNode VisitReverse(QueryNodeInfo source, Expression queryExpr)
        {
            DLinqQueryNode child = this.Visit(source);
            if (child.IsDynamic)
            {
                throw new DryadLinqException("Reverse is only supported for static partition count");
            }

            child.IsForked = true;

            // Apply node for s => ValueZero(s)
            Type paramType = typeof(IEnumerable<>).MakeGenericType(child.OutputTypes[0]);
            ParameterExpression param = Expression.Parameter(paramType, "s");
            MethodInfo minfo = typeof(DryadLinqHelper).GetMethod("ValueZero");
            minfo = minfo.MakeGenericMethod(child.OutputTypes[0]);
            Expression body = Expression.Call(minfo, param);
            Type funcType = typeof(Func<,>).MakeGenericType(param.Type, body.Type);            
            LambdaExpression procFunc = Expression.Lambda(funcType, body, param);
            DLinqQueryNode valueZeroNode = new DLinqApplyNode(procFunc, queryExpr, child);

            // Apply node for s => ReverseIndex(s)
            paramType = typeof(IEnumerable<>).MakeGenericType(typeof(int));
            param = Expression.Parameter(paramType, "s");
            minfo = typeof(DryadLinqHelper).GetMethod("MakeIndexCountPairs");
            body = Expression.Call(minfo, param);
            funcType = typeof(Func<,>).MakeGenericType(param.Type, body.Type);
            procFunc = Expression.Lambda(funcType, body, param);
            DLinqQueryNode mergeZeroNode = new DLinqMergeNode(true, queryExpr, valueZeroNode); 
            DLinqQueryNode indexCountNode = new DLinqApplyNode(procFunc, queryExpr, mergeZeroNode);
            
            // HashPartition to distribute the indexCounts -- one to each partition.
            // each partition will receive (myPartitionID, pcount).
            int pcount = child.OutputPartition.Count;
            param = Expression.Parameter(body.Type.GetGenericArguments()[0], "x");
            Expression keySelectBody = Expression.Property(param, "Index");
            funcType = typeof(Func<,>).MakeGenericType(param.Type, keySelectBody.Type);
            LambdaExpression keySelectExpr = Expression.Lambda(funcType, keySelectBody, param);
            DLinqQueryNode hdistNode = new DLinqHashPartitionNode(keySelectExpr,
                                                                  null,
                                                                  pcount,
                                                                  queryExpr,
                                                                  indexCountNode);

            // Apply node for (x, y) => AddIndexForReverse(x, y)
            ParameterExpression param1 = Expression.Parameter(body.Type, "x");
            Type paramType2 = typeof(IEnumerable<>).MakeGenericType(child.OutputTypes[0]);
            ParameterExpression param2 = Expression.Parameter(paramType2, "y");
            minfo = typeof(DryadLinqHelper).GetMethod("AddIndexForReverse");
            minfo = minfo.MakeGenericMethod(child.OutputTypes[0]);
            body = Expression.Call(minfo, param1, param2);
            funcType = typeof(Func<,,>).MakeGenericType(param1.Type, param2.Type, body.Type);            
            LambdaExpression addIndexFunc = Expression.Lambda(funcType, body, param1, param2);
            DLinqQueryNode addIndexNode = new DLinqApplyNode(addIndexFunc, queryExpr, hdistNode, child);

            // HashPartition(x => x.index, x => x.value, pcount)
            // Moves all data to correct target partition.  (each worker will direct all its items to one target partition)
            param = Expression.Parameter(body.Type.GetGenericArguments()[0], "x");
            body = Expression.Property(param, "Index");
            funcType = typeof(Func<,>).MakeGenericType(param.Type, body.Type);
            keySelectExpr = Expression.Lambda(funcType, body, param);
            body = Expression.Property(param, "Value");
            funcType = typeof(Func<,>).MakeGenericType(param.Type, body.Type);
            LambdaExpression resultSelectExpr = Expression.Lambda(funcType, body, param);
            DLinqQueryNode reversePartitionNode = new DLinqHashPartitionNode(
                                                            keySelectExpr, resultSelectExpr, null,
                                                            pcount, false, queryExpr, addIndexNode);

            // Reverse node
            paramType = typeof(IEnumerable<>).MakeGenericType(reversePartitionNode.OutputTypes[0]);
            param = Expression.Parameter(paramType, "x");
            minfo = typeof(DryadLinqVertex).GetMethod("Reverse");
            minfo = minfo.MakeGenericMethod(child.OutputTypes[0]);
            body = Expression.Call(minfo, param);
            funcType = typeof(Func<,>).MakeGenericType(param.Type, body.Type);            
            procFunc = Expression.Lambda(funcType, body, param);
            DLinqQueryNode resNode = new DLinqMergeNode(true, queryExpr, reversePartitionNode);
            resNode = new DLinqApplyNode(procFunc, queryExpr, resNode);

            return resNode;
        }
Пример #5
0
        private DLinqQueryNode VisitHashPartition(QueryNodeInfo source,
                                                  LambdaExpression keySelectExpr,
                                                  LambdaExpression resultSelectExpr,
                                                  Expression comparerExpr,
                                                  Expression countExpr,
                                                  Expression queryExpr)
        {
            DLinqQueryNode child = this.Visit(source);
            Type keyType = keySelectExpr.Type.GetGenericArguments()[1];
            
            if (comparerExpr == null && !TypeSystem.HasDefaultEqualityComparer(keyType))
            {
                throw DryadLinqException.Create(DryadLinqErrorCode.ComparerMustBeSpecifiedOrKeyTypeMustBeIEquatable,
                                                string.Format(SR.ComparerMustBeSpecifiedOrKeyTypeMustBeIEquatable, keyType),
                                                queryExpr);
            }

            bool isDynamic = (StaticConfig.DynamicOptLevel & StaticConfig.DynamicHashPartitionLevel) != 0;
            int nOutputPartitions;
            if (countExpr != null)
            {
                ExpressionSimplifier<int> evaluator = new ExpressionSimplifier<int>();
                nOutputPartitions = evaluator.Eval(countExpr);
                isDynamic = false;
            }
            else
            {
                // Note: For MayRTM, isDynamic will never be true.
                nOutputPartitions = (isDynamic) ? 1 : child.OutputPartition.Count;
            }

            DLinqQueryNode resNode = new DLinqHashPartitionNode(
                                              keySelectExpr, resultSelectExpr, comparerExpr, nOutputPartitions,
                                              isDynamic, queryExpr, child);
            resNode = new DLinqMergeNode(false, queryExpr, resNode);
            return resNode;
        }
Пример #6
0
        private DLinqQueryNode VisitGroupBy(QueryNodeInfo source,
                                            LambdaExpression keySelectExpr,
                                            LambdaExpression elemSelectExpr,
                                            LambdaExpression resultSelectExpr,
                                            Expression comparerExpr,
                                            Expression queryExpr)
        {
            DLinqQueryNode child = this.Visit(source);
            
            ExpressionInfo einfo = new ExpressionInfo(keySelectExpr);
            if (einfo.IsExpensive)
            {
                // Any method call that is not tagged as "expensive=false" will be deemed expensive.
                // if the keySelector is expensive, we rewrite the query so that the key-function is invoked only once
                // and the record key passed around via a Pair<TKey,TRecord>.
                // keyFunc becomes pair=>pair.Key
                // elementSelector must be rewritten so that references to (record) become (pair.Value)

                Type[] vkTypes = keySelectExpr.Type.GetGenericArguments();
                Type pairType = typeof(Pair<,>).MakeGenericType(vkTypes[1], vkTypes[0]);
                ParameterExpression pairParam = Expression.Parameter(pairType, "e");

                // Add Select(x => new Pair<K,S>(key(x), x))
                ParameterExpression valueParam = keySelectExpr.Parameters[0];
                Expression body = Expression.New(pairType.GetConstructors()[0], keySelectExpr.Body, valueParam);
                Type delegateType = typeof(Func<,>).MakeGenericType(valueParam.Type, body.Type);
                LambdaExpression selectExpr = Expression.Lambda(delegateType, body, valueParam);
                child = new DLinqSelectNode(QueryNodeType.Select, selectExpr, null, queryExpr, child);
                
                // Change keySelector to e => e.Key
                PropertyInfo keyInfo = pairParam.Type.GetProperty("Key");
                body = Expression.Property(pairParam, keyInfo);
                delegateType = typeof(Func<,>).MakeGenericType(pairParam.Type, body.Type);
                keySelectExpr = Expression.Lambda(delegateType, body, pairParam);

                // Add or change elementSelector with e.Value
                PropertyInfo valueInfo = pairParam.Type.GetProperty("Value");
                body = Expression.Property(pairParam, valueInfo);
                if (elemSelectExpr != null)
                {
                    ParameterSubst subst = new ParameterSubst(elemSelectExpr.Parameters[0], body);
                    body = subst.Visit(elemSelectExpr.Body);
                }
                delegateType = typeof(Func<,>).MakeGenericType(pairParam.Type, body.Type);
                elemSelectExpr = Expression.Lambda(delegateType, body, pairParam);
            }

            Type keyType = keySelectExpr.Type.GetGenericArguments()[1];
            if (comparerExpr == null && !TypeSystem.HasDefaultEqualityComparer(keyType))
            {
                throw DryadLinqException.Create(DryadLinqErrorCode.ComparerMustBeSpecifiedOrKeyTypeMustBeIEquatable,
                                                string.Format(SR.ComparerMustBeSpecifiedOrKeyTypeMustBeIEquatable, keyType),
                                                queryExpr);
            }
            Type elemType;
            if (elemSelectExpr == null)
            {
                elemType = keySelectExpr.Type.GetGenericArguments()[0];
            }
            else
            {
                elemType = elemSelectExpr.Type.GetGenericArguments()[1];
            }
                 
            // The comparer object:
            object comparer = null;
            if (comparerExpr != null)
            {
                ExpressionSimplifier<object> evaluator = new ExpressionSimplifier<object>();
                comparer = evaluator.Eval(comparerExpr);
            }

            LambdaExpression keySelectExpr1 = keySelectExpr;
            LambdaExpression elemSelectExpr1 = elemSelectExpr;
            LambdaExpression resultSelectExpr1 = resultSelectExpr;
            LambdaExpression seedExpr1 = null;
            LambdaExpression accumulateExpr1 = null;
            
            List<DecompositionInfo> dInfoList = null;
            if (resultSelectExpr != null)
            {
                dInfoList = Decomposition.GetDecompositionInfoList(resultSelectExpr, this.m_codeGen);
            }

            String groupByOpName = "GroupBy";
            DLinqQueryNode groupByNode = child;
            bool isPartitioned = child.OutputPartition.IsPartitionedBy(keySelectExpr, comparer);
            if (dInfoList != null)
            {
                // ** Decomposable GroupBy-Reduce
                // This block creates the first GroupByNode and does some preparation for subsequent nodes.
                if (child.IsOrderedBy(keySelectExpr, comparer))
                {
                    groupByOpName = "OrderedGroupBy";
                }

                int dcnt = dInfoList.Count;
                ParameterExpression keyParam;
                if (resultSelectExpr.Parameters.Count == 1)
                {
                    keyParam = Expression.Parameter(keyType, DryadLinqCodeGen.MakeUniqueName("k"));
                }
                else
                {
                    keyParam = resultSelectExpr.Parameters[0];
                }

                // Seed:
                ParameterExpression param2 = Expression.Parameter(
                                                 elemType, DryadLinqCodeGen.MakeUniqueName("e"));
                Expression zeroExpr = Expression.Constant(0, typeof(int));
                Expression seedBody = zeroExpr;
                if (dcnt != 0)
                {
                    LambdaExpression seed = dInfoList[dcnt-1].Seed;
                    ParameterSubst subst = new ParameterSubst(seed.Parameters[0], param2);
                    seedBody = subst.Visit(seed.Body);
                    for (int i = dcnt - 2; i >= 0; i--)
                    {
                        seed = dInfoList[i].Seed;
                        subst = new ParameterSubst(seed.Parameters[0], param2);
                        Expression firstExpr = subst.Visit(seed.Body);
                        Type newPairType = typeof(Pair<,>).MakeGenericType(firstExpr.Type, seedBody.Type);
                        seedBody = Expression.New(newPairType.GetConstructors()[0], firstExpr, seedBody);
                    }
                }
                LambdaExpression seedExpr = Expression.Lambda(seedBody, param2);

                // Accumulate:
                ParameterExpression param1 = Expression.Parameter(
                                                 seedBody.Type, DryadLinqCodeGen.MakeUniqueName("a"));
                Expression accumulateBody = zeroExpr;
                if (dcnt != 0)
                {
                    accumulateBody = Decomposition.AccumulateList(param1, param2, dInfoList, 0);
                }
                LambdaExpression accumulateExpr = Expression.Lambda(accumulateBody, param1, param2);

                // Now prepare for the merge-aggregator and/or in the secondary group-by.
                // keySelectExpr1: e => e.Key
                Type reducerResType = typeof(Pair<,>).MakeGenericType(keyParam.Type, accumulateBody.Type);                
                ParameterExpression reducerResParam = Expression.Parameter(reducerResType, "e");
                PropertyInfo keyInfo = reducerResParam.Type.GetProperty("Key");
                Expression body = Expression.Property(reducerResParam, keyInfo);
                Type delegateType = typeof(Func<,>).MakeGenericType(reducerResParam.Type, body.Type);
                keySelectExpr1 = Expression.Lambda(delegateType, body, reducerResParam);

                // elemSelectExpr1: e => e.Value
                PropertyInfo valueInfo = reducerResParam.Type.GetProperty("Value");
                body = Expression.Property(reducerResParam, valueInfo);
                delegateType = typeof(Func<,>).MakeGenericType(reducerResParam.Type, body.Type);
                elemSelectExpr1 = Expression.Lambda(delegateType, body, reducerResParam);

                // SeedExpr1
                param2 = Expression.Parameter(elemSelectExpr1.Body.Type,
                                              DryadLinqCodeGen.MakeUniqueName("e"));
                seedExpr1 = Expression.Lambda(param2, param2);
                
                // AccumulateExpr1
                Expression recursiveAccumulateBody = zeroExpr;
                if (dcnt != 0)
                {
                    recursiveAccumulateBody = Decomposition.RecursiveAccumulateList(param1, param2, dInfoList, 0);
                }
                accumulateExpr1 = Expression.Lambda(recursiveAccumulateBody, param1, param2);
                
                // resultSelectExpr1
                resultSelectExpr1 = null;

                // The first groupByNode.  
                // If the input was already correctly partitioned, this will be the only groupByNode.
                bool isPartial = StaticConfig.GroupByLocalAggregationIsPartial && !isPartitioned;
                groupByNode = new DLinqGroupByNode(
                                       groupByOpName, keySelectExpr, elemSelectExpr, null,
                                       seedExpr, accumulateExpr, accumulateExpr1, comparerExpr,
                                       isPartial, queryExpr, child);
            }
            else
            {
                // Can't do partial aggregation.
                // Use sort, mergesort, and ordered groupby, if TKey implements IComparable.
                if ((comparer != null && TypeSystem.IsComparer(comparer, keyType)) ||
                    (comparer == null && TypeSystem.HasDefaultComparer(keyType)))
                {
                    if (!child.IsOrderedBy(keySelectExpr, comparer))
                    {
                        groupByNode = new DLinqOrderByNode(keySelectExpr, comparerExpr, true, queryExpr, child);
                    }
                    groupByOpName = "OrderedGroupBy";
                }

                // Add a GroupByNode if it is partitioned or has elementSelector.
                // If the input was already correctly partitioned, this will be the only groupByNode.
                if (isPartitioned)
                {
                    groupByNode = new DLinqGroupByNode(groupByOpName,
                                                       keySelectExpr,
                                                       elemSelectExpr,
                                                       resultSelectExpr,
                                                       null,  // seed
                                                       null,  // accumulate
                                                       null,  // recursiveAccumulate
                                                       comparerExpr,
                                                       false, // isPartial
                                                       queryExpr,
                                                       groupByNode);
                }
                else if (elemSelectExpr != null)
                {
                    // Local GroupBy without resultSelector:
                    groupByNode = new DLinqGroupByNode(groupByOpName,
                                                       keySelectExpr,
                                                       elemSelectExpr,
                                                       null,  // resultSelect
                                                       null,  // seed
                                                       null,  // accumulate
                                                       null,  // recursiveAccumulate
                                                       comparerExpr,
                                                       StaticConfig.GroupByLocalAggregationIsPartial,  // isPartial
                                                       queryExpr,
                                                       groupByNode);
                    
                    // keySelectExpr1: g => g.Key
                    ParameterExpression groupParam = Expression.Parameter(groupByNode.OutputTypes[0], "g");
                    PropertyInfo keyInfo = groupParam.Type.GetProperty("Key");
                    Expression body = Expression.Property(groupParam, keyInfo);
                    Type delegateType = typeof(Func<,>).MakeGenericType(groupParam.Type, body.Type);
                    keySelectExpr1 = Expression.Lambda(delegateType, body, groupParam);

                    // No elementSelector
                    elemSelectExpr1 = null;

                    // resultSelectExpr1
                    ParameterExpression keyParam;
                    Type groupType = typeof(IEnumerable<>).MakeGenericType(groupByNode.OutputTypes[0]);
                    groupParam = Expression.Parameter(groupType, DryadLinqCodeGen.MakeUniqueName("g"));
                    if (resultSelectExpr == null)
                    {
                        // resultSelectExpr1: (k, g) => MakeDryadLinqGroup(k, g)
                        keyParam = Expression.Parameter(keySelectExpr1.Body.Type, DryadLinqCodeGen.MakeUniqueName("k"));
                        MethodInfo groupingInfo = typeof(DryadLinqEnumerable).GetMethod("MakeDryadLinqGroup");
                        groupingInfo = groupingInfo.MakeGenericMethod(keyParam.Type, elemType);
                        body = Expression.Call(groupingInfo, keyParam, groupParam);
                    }
                    else
                    {
                        // resultSelectExpr1: (k, g) => resultSelectExpr(k, FlattenGroups(g))
                        keyParam = resultSelectExpr.Parameters[0];
                        MethodInfo flattenInfo = typeof(DryadLinqEnumerable).GetMethod("FlattenGroups");
                        flattenInfo = flattenInfo.MakeGenericMethod(keyParam.Type, elemType);
                        Expression groupExpr = Expression.Call(flattenInfo, groupParam);
                        ParameterSubst subst = new ParameterSubst(resultSelectExpr.Parameters[1], groupExpr);
                        body = subst.Visit(resultSelectExpr.Body);
                    }
                    delegateType = typeof(Func<,,>).MakeGenericType(keyParam.Type, groupParam.Type, body.Type);
                    resultSelectExpr1 = Expression.Lambda(delegateType, body, keyParam, groupParam);
                }
            }

            // At this point, the first GroupByNode has been created.
            DLinqQueryNode groupByNode1 = groupByNode;
            DLinqMergeNode mergeNode = null;
            if (!isPartitioned)
            {
                // Create HashPartitionNode, MergeNode, and second GroupByNode

                // Note, if we are doing decomposable-GroupByReduce, there is still some work to go after this
                //   - attach the combiner to the first merge-node
                //   - attach the combiner to the merge-node
                //   - attach finalizer to second GroupBy
                int parCount = (groupByNode.IsDynamic) ? StaticConfig.DefaultPartitionCount : groupByNode.OutputPartition.Count;
                bool isDynamic = (StaticConfig.DynamicOptLevel & StaticConfig.DynamicHashPartitionLevel) != 0;
                DLinqQueryNode hdistNode = new DLinqHashPartitionNode(keySelectExpr1, comparerExpr, parCount,
                                                                      isDynamic, queryExpr, groupByNode);

                // Create the Merge Node
                if (groupByOpName == "OrderedGroupBy")
                {
                    // Mergesort with the same keySelector of the hash partition 
                    mergeNode = new DLinqMergeNode(keySelectExpr1, comparerExpr, true, queryExpr, hdistNode);
                }
                else
                {
                    // Random merge
                    mergeNode = new DLinqMergeNode(false, queryExpr, hdistNode);
                }
                groupByNode1 = new DLinqGroupByNode(groupByOpName, keySelectExpr1, elemSelectExpr1,
                                                    resultSelectExpr1, seedExpr1, accumulateExpr1,
                                                    accumulateExpr1, comparerExpr, false, queryExpr,
                                                    mergeNode);
            }

            // Final tidy-up for decomposable GroupBy-Reduce pattern.
            //   - attach combiner to first GroupByNode
            //   - attache combiner to MergeNode as an aggregator
            //   - build a SelectNode to project out results and call finalizer on them.
            if (dInfoList != null)
            {
                // Add dynamic aggregator to the merge-node, if applicable
                if (StaticConfig.GroupByDynamicReduce && !isPartitioned)
                {
                    mergeNode.AddAggregateNode(groupByNode1);
                }
                
                // Add the final Select node
                Type keyResultPairType = typeof(Pair<,>).MakeGenericType(keyType, seedExpr1.Body.Type);
                ParameterExpression keyResultPairParam = Expression.Parameter(keyResultPairType,
                                                                              DryadLinqCodeGen.MakeUniqueName("e"));
                PropertyInfo valuePropInfo_1 = keyResultPairType.GetProperty("Value");
                Expression combinedValueExpr = Expression.Property(keyResultPairParam, valuePropInfo_1);

                // First, build the combinerList
                int dcnt = dInfoList.Count;
                Expression[] combinerList = new Expression[dcnt];
                for (int i = 0; i < dcnt; i++)
                {
                    if (i + 1 == dcnt)
                    {
                        combinerList[i] = combinedValueExpr;
                    }
                    else
                    {
                        PropertyInfo keyPropInfo = combinedValueExpr.Type.GetProperty("Key");
                        combinerList[i] = Expression.Property(combinedValueExpr, keyPropInfo);
                        PropertyInfo valuePropInfo = combinedValueExpr.Type.GetProperty("Value");
                        combinedValueExpr = Expression.Property(combinedValueExpr, valuePropInfo);
                    }
                    LambdaExpression finalizerExpr = dInfoList[i].FinalReducer;
                    if (finalizerExpr != null)
                    {
                        ParameterSubst subst = new ParameterSubst(finalizerExpr.Parameters[0], combinerList[i]);
                        combinerList[i] = subst.Visit(finalizerExpr.Body);
                    }
                }

                // Build the funcList
                Expression[] funcList = new Expression[dcnt];
                for (int i = 0; i < dcnt; i++)
                {
                    funcList[i] = dInfoList[i].Func;
                }

                // Apply the substitutions
                CombinerSubst combinerSubst = new CombinerSubst(resultSelectExpr, keyResultPairParam, funcList, combinerList);
                Expression finalizerSelectBody = combinerSubst.Visit();

                // Finally, the Select node
                Type delegateType = typeof(Func<,>).MakeGenericType(keyResultPairType, finalizerSelectBody.Type);
                LambdaExpression selectExpr = Expression.Lambda(delegateType, finalizerSelectBody, keyResultPairParam);
                groupByNode1 = new DLinqSelectNode(QueryNodeType.Select, selectExpr, null, queryExpr, groupByNode1);
            }
            return groupByNode1;
        }
Пример #7
0
        private DLinqQueryNode VisitZip(QueryNodeInfo first,
                                        QueryNodeInfo second,
                                        LambdaExpression resultSelector,
                                        MethodCallExpression queryExpr)
        {
            DLinqQueryNode child1 = this.Visit(first);
            DLinqQueryNode child2 = this.Visit(second);

            if (child1.IsDynamic || child2.IsDynamic)
            {
                // Well, let us for now do it on a single machine
                child1 = new DLinqMergeNode(true, queryExpr, child1);
                child2 = new DLinqMergeNode(true, queryExpr, child2);

                // Apply node for (x, y) => Zip(x, y, resultSelector)
                Type paramType1 = typeof(IEnumerable<>).MakeGenericType(child1.OutputTypes[0]);
                ParameterExpression param1 = Expression.Parameter(paramType1, "s1");
                Type paramType2 = typeof(IEnumerable<>).MakeGenericType(child2.OutputTypes[0]);
                ParameterExpression param2 = Expression.Parameter(paramType2, "s2");
                MethodInfo minfo = typeof(DryadLinqHelper).GetMethod("Zip");
                minfo = minfo.MakeGenericMethod(child1.OutputTypes[0]);
                Expression body = Expression.Call(minfo, param1, param2, resultSelector);
                Type funcType = typeof(Func<,>).MakeGenericType(param1.Type, param2.Type, body.Type);            
                LambdaExpression procFunc = Expression.Lambda(funcType, body, param1, param2);
                return new DLinqApplyNode(procFunc, queryExpr, child1, child2);
            }
            else
            {
                int parCount1 = child1.OutputPartition.Count;
                int parCount2 = child2.OutputPartition.Count;
                
                // Count nodes
                DLinqQueryNode countNode1 = new DLinqBasicAggregateNode(null, AggregateOpType.LongCount,
                                                                        true, false, queryExpr, child1);
                DLinqQueryNode countNode2 = new DLinqBasicAggregateNode(null, AggregateOpType.LongCount,
                                                                        true, false, queryExpr, child2);
                countNode1 = new DLinqMergeNode(true, queryExpr, countNode1);
                countNode2 = new DLinqMergeNode(true, queryExpr, countNode2);

                // Apply node for (x, y) => ZipCount(x, y)
                Type paramType1 = typeof(IEnumerable<>).MakeGenericType(typeof(long));
                ParameterExpression param1 = Expression.Parameter(paramType1, "x");
                ParameterExpression param2 = Expression.Parameter(paramType1, "y");                
                MethodInfo minfo = typeof(DryadLinqHelper).GetMethod("ZipCount");
                Expression body = Expression.Call(minfo, param1, param2);
                Type funcType = typeof(Func<,,>).MakeGenericType(param1.Type, param2.Type, body.Type);            
                LambdaExpression zipCount = Expression.Lambda(funcType, body, param1, param2);
                DLinqQueryNode indexedCountNode = new DLinqApplyNode(zipCount, queryExpr, countNode1, countNode2);

                // HashPartition(x => x.index, parCount2)
                ParameterExpression param = Expression.Parameter(body.Type.GetGenericArguments()[0], "x");
                Expression keySelectBody = Expression.Property(param, "Index");                
                funcType = typeof(Func<,>).MakeGenericType(param.Type, keySelectBody.Type);
                LambdaExpression keySelectExpr = Expression.Lambda(funcType, keySelectBody, param);
                DLinqQueryNode distCountNode = new DLinqHashPartitionNode(keySelectExpr,
                                                                          null,
                                                                          parCount2,
                                                                          queryExpr,
                                                                          indexedCountNode);

                // Apply node for (x, y) => AssignPartitionIndex(x, y)
                param1 = Expression.Parameter(body.Type, "x");
                Type paramType2 = typeof(IEnumerable<>).MakeGenericType(child2.OutputTypes[0]);
                param2 = Expression.Parameter(paramType2, "y");
                minfo = typeof(DryadLinqHelper).GetMethod("AssignPartitionIndex");
                minfo = minfo.MakeGenericMethod(child2.OutputTypes[0]);
                body = Expression.Call(minfo, param1, param2);
                funcType = typeof(Func<,,>).MakeGenericType(param1.Type, param2.Type, body.Type);            
                LambdaExpression assignIndex = Expression.Lambda(funcType, body, param1, param2);
                DLinqQueryNode addIndexNode = new DLinqApplyNode(assignIndex, queryExpr, distCountNode, child2);

                // HashPartition(x => x.index, x => x.value, parCount1)
                param = Expression.Parameter(body.Type.GetGenericArguments()[0], "x");
                body = Expression.Property(param, "Index");
                funcType = typeof(Func<,>).MakeGenericType(param.Type, body.Type);
                keySelectExpr = Expression.Lambda(funcType, body, param);
                body = Expression.Property(param, "Value");
                funcType = typeof(Func<,>).MakeGenericType(param.Type, body.Type);
                LambdaExpression resultSelectExpr = Expression.Lambda(funcType, body, param);
                DLinqQueryNode newChild2 = new DLinqHashPartitionNode(keySelectExpr,
                                                                      resultSelectExpr,
                                                                      null,
                                                                      parCount1,
                                                                      false,
                                                                      queryExpr,
                                                                      addIndexNode);
                newChild2 = new DLinqMergeNode(true, queryExpr, newChild2);

                // Finally the zip node
                return new DLinqZipNode(resultSelector, queryExpr, child1, newChild2);
            }
        }
Пример #8
0
        private DLinqQueryNode VisitSetOperation(QueryNodeInfo source1,
                                                 QueryNodeInfo source2,
                                                 QueryNodeType nodeType,
                                                 Expression comparerExpr,
                                                 Expression queryExpr)
        {
            DLinqQueryNode child1 = this.Visit(source1);
            DLinqQueryNode child2 = this.Visit(source2);
            DLinqQueryNode resNode = null;

            Type keyType = child1.OutputTypes[0];
            if (comparerExpr == null && !TypeSystem.HasDefaultEqualityComparer(keyType))
            {
                throw DryadLinqException.Create(DryadLinqErrorCode.ComparerMustBeSpecifiedOrKeyTypeMustBeIEquatable,
                                                string.Format(SR.ComparerMustBeSpecifiedOrKeyTypeMustBeIEquatable, keyType),
                                                queryExpr);
            }

            // The comparer object:
            object comparer = null;
            if (comparerExpr != null)
            {
                ExpressionSimplifier<object> evaluator = new ExpressionSimplifier<object>();
                comparer = evaluator.Eval(comparerExpr);
            }

            LambdaExpression keySelectExpr = IdentityFunction.Instance(keyType);
            if (child1.IsDynamic || child2.IsDynamic)
            {
                // Well, let us do the simplest thing for now
                child1 = new DLinqHashPartitionNode(keySelectExpr,
                                                    null,
                                                    StaticConfig.DefaultPartitionCount,
                                                    queryExpr,
                                                    child1);
                if (IsMergeNodeNeeded(child1))
                {
                    child1 = new DLinqMergeNode(false, queryExpr, child1);
                }

                child2 = new DLinqHashPartitionNode(keySelectExpr,
                                                    null,
                                                    StaticConfig.DefaultPartitionCount,
                                                    queryExpr,
                                                    child2);
                if (IsMergeNodeNeeded(child2))
                {
                    child2 = new DLinqMergeNode(false, queryExpr, child2);
                }

                resNode = new DLinqSetOperationNode(nodeType, nodeType.ToString(), comparerExpr,
                                                    queryExpr, child1, child2);
                return resNode;
            }

            bool isDescending1 = child1.OutputDataSetInfo.orderByInfo.IsDescending;
            bool isDescending2 = child2.OutputDataSetInfo.orderByInfo.IsDescending;

            // Partition child1 and child2 if needed
            if (child1.OutputPartition.ParType == PartitionType.Range &&
                child1.OutputPartition.HasKeys &&
                child1.OutputPartition.IsPartitionedBy(keySelectExpr, comparer))
            {
                if (child2.OutputPartition.ParType != PartitionType.Range ||
                    !child2.OutputPartition.IsPartitionedBy(keySelectExpr, comparer) ||
                    child1.OutputPartition.IsSamePartition(child2.OutputPartition))
                {
                    // Range distribute child2 using child1's partition
                    child2 = child1.OutputPartition.CreatePartitionNode(keySelectExpr, child2);
                    if (IsMergeNodeNeeded(child2))
                    {
                        child2 = new DLinqMergeNode(false, queryExpr, child2);
                    }
                }
            }
            else if (child2.OutputPartition.ParType == PartitionType.Range &&
                     child2.OutputPartition.HasKeys &&
                     child2.OutputPartition.IsPartitionedBy(keySelectExpr, comparer))
            {
                // Range distribute child1 using child2's partition
                child1 = child2.OutputPartition.CreatePartitionNode(keySelectExpr, child1);
                if (IsMergeNodeNeeded(child1))
                {
                    child1 = new DLinqMergeNode(false, queryExpr, child1);
                }
            }
            else if (child1.OutputPartition.ParType == PartitionType.Hash &&
                     child1.OutputPartition.IsPartitionedBy(keySelectExpr, comparer))
            {
                if (child2.OutputPartition.ParType != PartitionType.Hash ||
                    !child2.OutputPartition.IsPartitionedBy(keySelectExpr, comparer) ||
                    !child1.OutputPartition.IsSamePartition(child2.OutputPartition))
                {
                    // Hash distribute child2:
                    child2 = new DLinqHashPartitionNode(keySelectExpr,
                                                        comparerExpr,
                                                        child1.OutputPartition.Count,
                                                        queryExpr,
                                                        child2);
                    if (IsMergeNodeNeeded(child2))
                    {
                        child2 = new DLinqMergeNode(false, queryExpr, child2);
                    }
                }
            }
            else if (child2.OutputPartition.ParType == PartitionType.Hash &&
                     child2.OutputPartition.IsPartitionedBy(keySelectExpr, comparer))
            {
                child1 = new DLinqHashPartitionNode(keySelectExpr,
                                                    comparerExpr,
                                                    child2.OutputPartition.Count,
                                                    queryExpr,
                                                    child1);
                if (IsMergeNodeNeeded(child1))
                {
                    child1 = new DLinqMergeNode(false, queryExpr, child1);
                }
            }
            else
            {
                // No luck. Hash distribute both child1 and child2, then perform hash operation
                int parCnt = Math.Max(child1.OutputPartition.Count, child2.OutputPartition.Count);
                if (parCnt > 1)
                {
                    child1 = new DLinqHashPartitionNode(keySelectExpr, comparerExpr, parCnt, queryExpr, child1);
                    if (IsMergeNodeNeeded(child1))
                    {
                        child1 = new DLinqMergeNode(false, queryExpr, child1);
                    }

                    child2 = new DLinqHashPartitionNode(keySelectExpr, comparerExpr, parCnt, queryExpr, child2);
                    if (IsMergeNodeNeeded(child2))
                    {
                        child2 = new DLinqMergeNode(false, queryExpr, child2);
                    }
                }
            }

            // Perform either hash or ordered operation
            string opName = "";
            if (child1.IsOrderedBy(keySelectExpr, comparer))
            {
                if (!child1.IsOrderedBy(keySelectExpr, comparer) ||
                    isDescending1 != isDescending2)
                {
                    // Sort inner if unsorted                    
                    child2 = new DLinqOrderByNode(keySelectExpr, comparerExpr, isDescending1, queryExpr, child2);
                }
                opName = "Ordered";
            }
            else if (child2.IsOrderedBy(keySelectExpr, comparer))
            {
                if (!child1.IsOrderedBy(keySelectExpr, comparer) ||
                    isDescending1 != isDescending2)
                {
                    // Sort outer if unsorted
                    child1 = new DLinqOrderByNode(keySelectExpr, comparerExpr, isDescending2, queryExpr, child1);
                }
                opName = "Ordered";
            }

            resNode = new DLinqSetOperationNode(nodeType, opName + nodeType, comparerExpr, queryExpr, child1, child2);
            return resNode;
        }
Пример #9
0
        private DLinqQueryNode VisitConcat(QueryNodeInfo source, MethodCallExpression queryExpr)
        {
            DLinqQueryNode[] childs = new DLinqQueryNode[source.Children.Count];
            for (int i = 0; i < source.Children.Count; ++i)
            {
                childs[i] = this.Visit(source.Children[i].Child);
            }
            DLinqQueryNode resNode = new DLinqConcatNode(queryExpr, childs);

            int parCount = resNode.OutputPartition.Count;
            if (!resNode.IsDynamic && parCount > StaticConfig.MaxPartitionCount)
            {
                // Too many partitions, need to repartition
                int newParCount = parCount / 2;
                DLinqQueryNode countNode = new DLinqBasicAggregateNode(null, AggregateOpType.LongCount,
                                                                       true, false, queryExpr, resNode);
                DLinqQueryNode mergeCountNode = new DLinqMergeNode(true, queryExpr, countNode);

                // Apply node for s => IndexedCount(s)
                Type paramType = typeof(IEnumerable<>).MakeGenericType(typeof(long));
                ParameterExpression param = Expression.Parameter(paramType, "s");
                MethodInfo minfo = typeof(DryadLinqHelper).GetMethod("IndexedCount");
                minfo = minfo.MakeGenericMethod(typeof(long));
                Expression body = Expression.Call(minfo, param);
                Type funcType = typeof(Func<,>).MakeGenericType(param.Type, body.Type);            
                LambdaExpression indexedCountFunc = Expression.Lambda(funcType, body, param);
                DLinqQueryNode indexedCountNode = new DLinqApplyNode(indexedCountFunc, queryExpr, mergeCountNode);

                // HashPartition(x => x.index, parCount)
                param = Expression.Parameter(body.Type.GetGenericArguments()[0], "x");
                Expression keySelectBody = Expression.Property(param, "Index");                
                funcType = typeof(Func<,>).MakeGenericType(param.Type, keySelectBody.Type);
                LambdaExpression keySelectExpr = Expression.Lambda(funcType, keySelectBody, param);
                DLinqQueryNode distCountNode = new DLinqHashPartitionNode(keySelectExpr,
                                                                          null,
                                                                          parCount,
                                                                          queryExpr,
                                                                          indexedCountNode);

                // Apply node for (x, y) => AddPartitionIndex(x, y, newParCount)
                ParameterExpression param1 = Expression.Parameter(body.Type, "x");
                Type paramType2 = typeof(IEnumerable<>).MakeGenericType(resNode.OutputTypes[0]);
                ParameterExpression param2 = Expression.Parameter(paramType2, "y");
                minfo = typeof(DryadLinqHelper).GetMethod("AddPartitionIndex");
                minfo = minfo.MakeGenericMethod(resNode.OutputTypes[0]);
                body = Expression.Call(minfo, param1, param2, Expression.Constant(newParCount));
                funcType = typeof(Func<,,>).MakeGenericType(param1.Type, param2.Type, body.Type);            
                LambdaExpression addIndexFunc = Expression.Lambda(funcType, body, param1, param2);
                DLinqQueryNode addIndexNode = new DLinqApplyNode(addIndexFunc, queryExpr, distCountNode, resNode);

                // HashPartition(x => x.index, x => x.value, newParCount)
                param = Expression.Parameter(body.Type.GetGenericArguments()[0], "x");
                body = Expression.Property(param, "Index");
                funcType = typeof(Func<,>).MakeGenericType(param.Type, body.Type);
                keySelectExpr = Expression.Lambda(funcType, body, param);
                body = Expression.Property(param, "Value");
                funcType = typeof(Func<,>).MakeGenericType(param.Type, body.Type);
                LambdaExpression resultSelectExpr = Expression.Lambda(funcType, body, param);
                resNode = new DLinqHashPartitionNode(keySelectExpr,
                                                     resultSelectExpr,
                                                     null,
                                                     newParCount,
                                                     false,
                                                     queryExpr,
                                                     addIndexNode);
                resNode = new DLinqMergeNode(true, queryExpr, resNode);
            }
            return resNode;
        }
Пример #10
0
        private DLinqQueryNode VisitDistinct(QueryNodeInfo source,
                                             Expression comparerExpr,
                                             Expression queryExpr)
        {
            DLinqQueryNode child = this.Visit(source);

            Type keyType = child.OutputTypes[0];
            if (comparerExpr == null && !TypeSystem.HasDefaultEqualityComparer(keyType))
            {
                throw DryadLinqException.Create(DryadLinqErrorCode.ComparerMustBeSpecifiedOrKeyTypeMustBeIEquatable,
                                                string.Format(SR.ComparerMustBeSpecifiedOrKeyTypeMustBeIEquatable, keyType),
                                                queryExpr);
            }

            object comparer = null;
            if (comparerExpr != null)
            {
                ExpressionSimplifier<object> evaluator = new ExpressionSimplifier<object>();
                comparer = evaluator.Eval(comparerExpr);
            }

            LambdaExpression keySelectExpr = IdentityFunction.Instance(keyType);
            if (!child.OutputPartition.IsPartitionedBy(keySelectExpr, comparer))
            {
                if (child.IsDynamic || child.OutputPartition.Count > 1)
                {
                    child = new DLinqDistinctNode(true, comparerExpr, queryExpr, child);
                    bool isDynamic = (StaticConfig.DynamicOptLevel & StaticConfig.DynamicHashPartitionLevel) != 0;
                    child = new DLinqHashPartitionNode(keySelectExpr,
                                                       comparerExpr,
                                                       child.OutputPartition.Count,
                                                       isDynamic,
                                                       queryExpr,
                                                       child);
                    child = new DLinqMergeNode(false, queryExpr, child);
                }
            }
            DLinqQueryNode resNode = new DLinqDistinctNode(false, comparerExpr, queryExpr, child);
            return resNode;
        }
Пример #11
0
        private DLinqQueryNode VisitJoin(QueryNodeInfo outerSource,
                                         QueryNodeInfo innerSource,
                                         QueryNodeType nodeType,
                                         LambdaExpression outerKeySelector,
                                         LambdaExpression innerKeySelector,
                                         LambdaExpression resultSelector,
                                         Expression comparerExpr,
                                         Expression queryExpr)
        {
            DLinqQueryNode outerChild = this.Visit(outerSource);
            DLinqQueryNode innerChild = this.Visit(innerSource);
            DLinqQueryNode joinNode = null;

            Type keyType = outerKeySelector.Type.GetGenericArguments()[1];
            if (comparerExpr == null && !TypeSystem.HasDefaultEqualityComparer(keyType))
            {
                throw DryadLinqException.Create(DryadLinqErrorCode.ComparerMustBeSpecifiedOrKeyTypeMustBeIEquatable,
                                                string.Format(SR.ComparerMustBeSpecifiedOrKeyTypeMustBeIEquatable, keyType),
                                                queryExpr);
            }

            // The comparer object:
            object comparer = null;
            if (comparerExpr != null)
            {
                ExpressionSimplifier<object> evaluator = new ExpressionSimplifier<object>();
                comparer = evaluator.Eval(comparerExpr);
            }

            if (outerChild.IsDynamic || innerChild.IsDynamic)
            {
                // Well, let us do the simplest thing for now
                outerChild = new DLinqHashPartitionNode(outerKeySelector,
                                                        comparerExpr,
                                                        StaticConfig.DefaultPartitionCount,
                                                        queryExpr,
                                                        outerChild);
                if (IsMergeNodeNeeded(outerChild))
                {
                    outerChild = new DLinqMergeNode(false, queryExpr, outerChild);
                }
                
                innerChild = new DLinqHashPartitionNode(innerKeySelector,
                                                        comparerExpr,
                                                        StaticConfig.DefaultPartitionCount,
                                                        queryExpr,
                                                        innerChild);
                if (IsMergeNodeNeeded(innerChild))
                {
                    innerChild = new DLinqMergeNode(false, queryExpr, innerChild);
                }

                joinNode = new DLinqJoinNode(nodeType,
                                             "Hash" + nodeType,
                                             outerKeySelector,
                                             innerKeySelector,
                                             resultSelector,
                                             comparerExpr,
                                             queryExpr,
                                             outerChild,
                                             innerChild);
                return joinNode;
            }


            bool isOuterDescending = outerChild.OutputDataSetInfo.orderByInfo.IsDescending;
            bool isInnerDescending = innerChild.OutputDataSetInfo.orderByInfo.IsDescending;

            // Partition outer and inner if needed
            if (outerChild.OutputPartition.ParType == PartitionType.Range &&
                outerChild.OutputPartition.HasKeys &&
                outerChild.OutputPartition.IsPartitionedBy(outerKeySelector, comparer))
            {
                if (innerChild.OutputPartition.ParType != PartitionType.Range ||
                    !innerChild.OutputPartition.IsPartitionedBy(innerKeySelector, comparer) ||
                    !outerChild.OutputPartition.IsSamePartition(innerChild.OutputPartition))
                {
                    // Range distribute inner using outer's partition.
                    innerChild = outerChild.OutputPartition.CreatePartitionNode(innerKeySelector, innerChild);
                    if (IsMergeNodeNeeded(innerChild))
                    {
                        innerChild = new DLinqMergeNode(false, queryExpr, innerChild);
                    }
                }
            }
            else if (innerChild.OutputPartition.ParType == PartitionType.Range &&
                     innerChild.OutputPartition.HasKeys &&
                     innerChild.OutputPartition.IsPartitionedBy(innerKeySelector, comparer))
            {
                // Range distribute outer using inner's partition.
                outerChild = innerChild.OutputPartition.CreatePartitionNode(outerKeySelector, outerChild);
                if (IsMergeNodeNeeded(outerChild))
                {
                    outerChild = new DLinqMergeNode(false, queryExpr, outerChild);
                }
            }
            else if (outerChild.OutputPartition.ParType == PartitionType.Hash &&
                     outerChild.OutputPartition.IsPartitionedBy(outerKeySelector, comparer))
            {
                if (innerChild.OutputPartition.ParType != PartitionType.Hash ||
                    !innerChild.OutputPartition.IsPartitionedBy(innerKeySelector, comparer) ||
                    !outerChild.OutputPartition.IsSamePartition(innerChild.OutputPartition))
                {
                    innerChild = new DLinqHashPartitionNode(innerKeySelector,
                                                            comparerExpr,
                                                            outerChild.OutputPartition.Count,
                                                            queryExpr,
                                                            innerChild);
                    if (IsMergeNodeNeeded(innerChild))
                    {
                        innerChild = new DLinqMergeNode(false, queryExpr, innerChild);
                    }
                }
            }
            else if (innerChild.OutputPartition.ParType == PartitionType.Hash &&
                     innerChild.OutputPartition.IsPartitionedBy(innerKeySelector, comparer))
            {
                outerChild = new DLinqHashPartitionNode(outerKeySelector,
                                                        comparerExpr,
                                                        innerChild.OutputPartition.Count,
                                                        queryExpr,
                                                        outerChild);
                if (IsMergeNodeNeeded(outerChild))
                {
                    outerChild = new DLinqMergeNode(false, queryExpr, outerChild);
                }
            }
            else
            {
                // No luck. Hash partition both outer and inner
                int parCnt = Math.Max(outerChild.OutputPartition.Count, innerChild.OutputPartition.Count);
                if (parCnt > 1)
                {
                    outerChild = new DLinqHashPartitionNode(outerKeySelector,
                                                            comparerExpr,
                                                            parCnt,
                                                            queryExpr,
                                                            outerChild);
                    if (IsMergeNodeNeeded(outerChild))
                    {
                        outerChild = new DLinqMergeNode(false, queryExpr, outerChild);
                    }

                    innerChild = new DLinqHashPartitionNode(innerKeySelector,
                                                            comparerExpr,
                                                            parCnt,
                                                            queryExpr,
                                                            innerChild);
                    if (IsMergeNodeNeeded(innerChild))
                    {
                        innerChild = new DLinqMergeNode(false, queryExpr, innerChild);
                    }
                }
            }

            // Perform either merge or hash join
            string opName = "Hash";
            if (outerChild.IsOrderedBy(outerKeySelector, comparer))
            {
                if (!innerChild.IsOrderedBy(innerKeySelector, comparer) ||
                    isOuterDescending != isInnerDescending)
                {
                    // Sort inner if unsorted                    
                    innerChild = new DLinqOrderByNode(innerKeySelector, comparerExpr,
                                                      isOuterDescending, queryExpr, innerChild);
                }
                opName = "Merge";
            }
            else if (innerChild.IsOrderedBy(innerKeySelector, comparer))
            {
                if (!outerChild.IsOrderedBy(outerKeySelector, comparer) ||
                    isOuterDescending != isInnerDescending)
                {
                    // Sort outer if unsorted
                    outerChild = new DLinqOrderByNode(outerKeySelector, comparerExpr,
                                                      isInnerDescending, queryExpr, outerChild);
                }
                opName = "Merge";                
            }
                    
            joinNode = new DLinqJoinNode(nodeType,
                                         opName + nodeType,
                                         outerKeySelector,
                                         innerKeySelector,
                                         resultSelector,
                                         comparerExpr,
                                         queryExpr,
                                         outerChild,
                                         innerChild);
            return joinNode;
        }
Пример #12
0
        private DLinqQueryNode CreateOffset(bool isLong, Expression queryExpr, DLinqQueryNode child)
        {
            // Count node
            DLinqQueryNode countNode = new DLinqBasicAggregateNode(null, AggregateOpType.LongCount,
                                                                   true, false, queryExpr, child);

            // Apply node for x => Offsets(x)
            Type paramType = typeof(IEnumerable<>).MakeGenericType(typeof(long));
            ParameterExpression param = Expression.Parameter(paramType, "x");
            MethodInfo minfo = typeof(DryadLinqEnumerable).GetMethod("Offsets");
            Expression body = Expression.Call(minfo, param, Expression.Constant(isLong, typeof(bool)));
            Type type = typeof(Func<,>).MakeGenericType(param.Type, body.Type);
            LambdaExpression procFunc = Expression.Lambda(type, body, param);
            DLinqQueryNode mergeCountNode = new DLinqMergeNode(true, queryExpr, countNode);
            DLinqQueryNode offsetsNode = new DLinqApplyNode(procFunc, queryExpr, mergeCountNode);

            // HashPartition
            LambdaExpression keySelectExpr = IdentityFunction.Instance(typeof(IndexedValue<long>));
            int pcount = child.OutputPartition.Count;
            DLinqQueryNode hdistNode = new DLinqHashPartitionNode(keySelectExpr, null, null, pcount,
                                                                  false, queryExpr, offsetsNode);
            DLinqQueryNode resNode = new DLinqMergeNode(false, queryExpr, hdistNode);
            return resNode;
        }