private DLinqQueryNode VisitGroupBy(QueryNodeInfo source,
                                            LambdaExpression keySelectExpr,
                                            LambdaExpression elemSelectExpr,
                                            LambdaExpression resultSelectExpr,
                                            Expression comparerExpr,
                                            Expression queryExpr)
        {
            DLinqQueryNode child = this.Visit(source);
            
            ExpressionInfo einfo = new ExpressionInfo(keySelectExpr);
            if (einfo.IsExpensive)
            {
                // Any method call that is not tagged as "expensive=false" will be deemed expensive.
                // if the keySelector is expensive, we rewrite the query so that the key-function is invoked only once
                // and the record key passed around via a Pair<TKey,TRecord>.
                // keyFunc becomes pair=>pair.Key
                // elementSelector must be rewritten so that references to (record) become (pair.Value)

                Type[] vkTypes = keySelectExpr.Type.GetGenericArguments();
                Type pairType = typeof(Pair<,>).MakeGenericType(vkTypes[1], vkTypes[0]);
                ParameterExpression pairParam = Expression.Parameter(pairType, "e");

                // Add Select(x => new Pair<K,S>(key(x), x))
                ParameterExpression valueParam = keySelectExpr.Parameters[0];
                Expression body = Expression.New(pairType.GetConstructors()[0], keySelectExpr.Body, valueParam);
                Type delegateType = typeof(Func<,>).MakeGenericType(valueParam.Type, body.Type);
                LambdaExpression selectExpr = Expression.Lambda(delegateType, body, valueParam);
                child = new DLinqSelectNode(QueryNodeType.Select, selectExpr, null, queryExpr, child);
                
                // Change keySelector to e => e.Key
                PropertyInfo keyInfo = pairParam.Type.GetProperty("Key");
                body = Expression.Property(pairParam, keyInfo);
                delegateType = typeof(Func<,>).MakeGenericType(pairParam.Type, body.Type);
                keySelectExpr = Expression.Lambda(delegateType, body, pairParam);

                // Add or change elementSelector with e.Value
                PropertyInfo valueInfo = pairParam.Type.GetProperty("Value");
                body = Expression.Property(pairParam, valueInfo);
                if (elemSelectExpr != null)
                {
                    ParameterSubst subst = new ParameterSubst(elemSelectExpr.Parameters[0], body);
                    body = subst.Visit(elemSelectExpr.Body);
                }
                delegateType = typeof(Func<,>).MakeGenericType(pairParam.Type, body.Type);
                elemSelectExpr = Expression.Lambda(delegateType, body, pairParam);
            }

            Type keyType = keySelectExpr.Type.GetGenericArguments()[1];
            if (comparerExpr == null && !TypeSystem.HasDefaultEqualityComparer(keyType))
            {
                throw DryadLinqException.Create(DryadLinqErrorCode.ComparerMustBeSpecifiedOrKeyTypeMustBeIEquatable,
                                                string.Format(SR.ComparerMustBeSpecifiedOrKeyTypeMustBeIEquatable, keyType),
                                                queryExpr);
            }
            Type elemType;
            if (elemSelectExpr == null)
            {
                elemType = keySelectExpr.Type.GetGenericArguments()[0];
            }
            else
            {
                elemType = elemSelectExpr.Type.GetGenericArguments()[1];
            }
                 
            // The comparer object:
            object comparer = null;
            if (comparerExpr != null)
            {
                ExpressionSimplifier<object> evaluator = new ExpressionSimplifier<object>();
                comparer = evaluator.Eval(comparerExpr);
            }

            LambdaExpression keySelectExpr1 = keySelectExpr;
            LambdaExpression elemSelectExpr1 = elemSelectExpr;
            LambdaExpression resultSelectExpr1 = resultSelectExpr;
            LambdaExpression seedExpr1 = null;
            LambdaExpression accumulateExpr1 = null;
            
            List<DecompositionInfo> dInfoList = null;
            if (resultSelectExpr != null)
            {
                dInfoList = Decomposition.GetDecompositionInfoList(resultSelectExpr, this.m_codeGen);
            }

            String groupByOpName = "GroupBy";
            DLinqQueryNode groupByNode = child;
            bool isPartitioned = child.OutputPartition.IsPartitionedBy(keySelectExpr, comparer);
            if (dInfoList != null)
            {
                // ** Decomposable GroupBy-Reduce
                // This block creates the first GroupByNode and does some preparation for subsequent nodes.
                if (child.IsOrderedBy(keySelectExpr, comparer))
                {
                    groupByOpName = "OrderedGroupBy";
                }

                int dcnt = dInfoList.Count;
                ParameterExpression keyParam;
                if (resultSelectExpr.Parameters.Count == 1)
                {
                    keyParam = Expression.Parameter(keyType, DryadLinqCodeGen.MakeUniqueName("k"));
                }
                else
                {
                    keyParam = resultSelectExpr.Parameters[0];
                }

                // Seed:
                ParameterExpression param2 = Expression.Parameter(
                                                 elemType, DryadLinqCodeGen.MakeUniqueName("e"));
                Expression zeroExpr = Expression.Constant(0, typeof(int));
                Expression seedBody = zeroExpr;
                if (dcnt != 0)
                {
                    LambdaExpression seed = dInfoList[dcnt-1].Seed;
                    ParameterSubst subst = new ParameterSubst(seed.Parameters[0], param2);
                    seedBody = subst.Visit(seed.Body);
                    for (int i = dcnt - 2; i >= 0; i--)
                    {
                        seed = dInfoList[i].Seed;
                        subst = new ParameterSubst(seed.Parameters[0], param2);
                        Expression firstExpr = subst.Visit(seed.Body);
                        Type newPairType = typeof(Pair<,>).MakeGenericType(firstExpr.Type, seedBody.Type);
                        seedBody = Expression.New(newPairType.GetConstructors()[0], firstExpr, seedBody);
                    }
                }
                LambdaExpression seedExpr = Expression.Lambda(seedBody, param2);

                // Accumulate:
                ParameterExpression param1 = Expression.Parameter(
                                                 seedBody.Type, DryadLinqCodeGen.MakeUniqueName("a"));
                Expression accumulateBody = zeroExpr;
                if (dcnt != 0)
                {
                    accumulateBody = Decomposition.AccumulateList(param1, param2, dInfoList, 0);
                }
                LambdaExpression accumulateExpr = Expression.Lambda(accumulateBody, param1, param2);

                // Now prepare for the merge-aggregator and/or in the secondary group-by.
                // keySelectExpr1: e => e.Key
                Type reducerResType = typeof(Pair<,>).MakeGenericType(keyParam.Type, accumulateBody.Type);                
                ParameterExpression reducerResParam = Expression.Parameter(reducerResType, "e");
                PropertyInfo keyInfo = reducerResParam.Type.GetProperty("Key");
                Expression body = Expression.Property(reducerResParam, keyInfo);
                Type delegateType = typeof(Func<,>).MakeGenericType(reducerResParam.Type, body.Type);
                keySelectExpr1 = Expression.Lambda(delegateType, body, reducerResParam);

                // elemSelectExpr1: e => e.Value
                PropertyInfo valueInfo = reducerResParam.Type.GetProperty("Value");
                body = Expression.Property(reducerResParam, valueInfo);
                delegateType = typeof(Func<,>).MakeGenericType(reducerResParam.Type, body.Type);
                elemSelectExpr1 = Expression.Lambda(delegateType, body, reducerResParam);

                // SeedExpr1
                param2 = Expression.Parameter(elemSelectExpr1.Body.Type,
                                              DryadLinqCodeGen.MakeUniqueName("e"));
                seedExpr1 = Expression.Lambda(param2, param2);
                
                // AccumulateExpr1
                Expression recursiveAccumulateBody = zeroExpr;
                if (dcnt != 0)
                {
                    recursiveAccumulateBody = Decomposition.RecursiveAccumulateList(param1, param2, dInfoList, 0);
                }
                accumulateExpr1 = Expression.Lambda(recursiveAccumulateBody, param1, param2);
                
                // resultSelectExpr1
                resultSelectExpr1 = null;

                // The first groupByNode.  
                // If the input was already correctly partitioned, this will be the only groupByNode.
                bool isPartial = StaticConfig.GroupByLocalAggregationIsPartial && !isPartitioned;
                groupByNode = new DLinqGroupByNode(
                                       groupByOpName, keySelectExpr, elemSelectExpr, null,
                                       seedExpr, accumulateExpr, accumulateExpr1, comparerExpr,
                                       isPartial, queryExpr, child);
            }
            else
            {
                // Can't do partial aggregation.
                // Use sort, mergesort, and ordered groupby, if TKey implements IComparable.
                if ((comparer != null && TypeSystem.IsComparer(comparer, keyType)) ||
                    (comparer == null && TypeSystem.HasDefaultComparer(keyType)))
                {
                    if (!child.IsOrderedBy(keySelectExpr, comparer))
                    {
                        groupByNode = new DLinqOrderByNode(keySelectExpr, comparerExpr, true, queryExpr, child);
                    }
                    groupByOpName = "OrderedGroupBy";
                }

                // Add a GroupByNode if it is partitioned or has elementSelector.
                // If the input was already correctly partitioned, this will be the only groupByNode.
                if (isPartitioned)
                {
                    groupByNode = new DLinqGroupByNode(groupByOpName,
                                                       keySelectExpr,
                                                       elemSelectExpr,
                                                       resultSelectExpr,
                                                       null,  // seed
                                                       null,  // accumulate
                                                       null,  // recursiveAccumulate
                                                       comparerExpr,
                                                       false, // isPartial
                                                       queryExpr,
                                                       groupByNode);
                }
                else if (elemSelectExpr != null)
                {
                    // Local GroupBy without resultSelector:
                    groupByNode = new DLinqGroupByNode(groupByOpName,
                                                       keySelectExpr,
                                                       elemSelectExpr,
                                                       null,  // resultSelect
                                                       null,  // seed
                                                       null,  // accumulate
                                                       null,  // recursiveAccumulate
                                                       comparerExpr,
                                                       StaticConfig.GroupByLocalAggregationIsPartial,  // isPartial
                                                       queryExpr,
                                                       groupByNode);
                    
                    // keySelectExpr1: g => g.Key
                    ParameterExpression groupParam = Expression.Parameter(groupByNode.OutputTypes[0], "g");
                    PropertyInfo keyInfo = groupParam.Type.GetProperty("Key");
                    Expression body = Expression.Property(groupParam, keyInfo);
                    Type delegateType = typeof(Func<,>).MakeGenericType(groupParam.Type, body.Type);
                    keySelectExpr1 = Expression.Lambda(delegateType, body, groupParam);

                    // No elementSelector
                    elemSelectExpr1 = null;

                    // resultSelectExpr1
                    ParameterExpression keyParam;
                    Type groupType = typeof(IEnumerable<>).MakeGenericType(groupByNode.OutputTypes[0]);
                    groupParam = Expression.Parameter(groupType, DryadLinqCodeGen.MakeUniqueName("g"));
                    if (resultSelectExpr == null)
                    {
                        // resultSelectExpr1: (k, g) => MakeDryadLinqGroup(k, g)
                        keyParam = Expression.Parameter(keySelectExpr1.Body.Type, DryadLinqCodeGen.MakeUniqueName("k"));
                        MethodInfo groupingInfo = typeof(DryadLinqEnumerable).GetMethod("MakeDryadLinqGroup");
                        groupingInfo = groupingInfo.MakeGenericMethod(keyParam.Type, elemType);
                        body = Expression.Call(groupingInfo, keyParam, groupParam);
                    }
                    else
                    {
                        // resultSelectExpr1: (k, g) => resultSelectExpr(k, FlattenGroups(g))
                        keyParam = resultSelectExpr.Parameters[0];
                        MethodInfo flattenInfo = typeof(DryadLinqEnumerable).GetMethod("FlattenGroups");
                        flattenInfo = flattenInfo.MakeGenericMethod(keyParam.Type, elemType);
                        Expression groupExpr = Expression.Call(flattenInfo, groupParam);
                        ParameterSubst subst = new ParameterSubst(resultSelectExpr.Parameters[1], groupExpr);
                        body = subst.Visit(resultSelectExpr.Body);
                    }
                    delegateType = typeof(Func<,,>).MakeGenericType(keyParam.Type, groupParam.Type, body.Type);
                    resultSelectExpr1 = Expression.Lambda(delegateType, body, keyParam, groupParam);
                }
            }

            // At this point, the first GroupByNode has been created.
            DLinqQueryNode groupByNode1 = groupByNode;
            DLinqMergeNode mergeNode = null;
            if (!isPartitioned)
            {
                // Create HashPartitionNode, MergeNode, and second GroupByNode

                // Note, if we are doing decomposable-GroupByReduce, there is still some work to go after this
                //   - attach the combiner to the first merge-node
                //   - attach the combiner to the merge-node
                //   - attach finalizer to second GroupBy
                int parCount = (groupByNode.IsDynamic) ? StaticConfig.DefaultPartitionCount : groupByNode.OutputPartition.Count;
                bool isDynamic = (StaticConfig.DynamicOptLevel & StaticConfig.DynamicHashPartitionLevel) != 0;
                DLinqQueryNode hdistNode = new DLinqHashPartitionNode(keySelectExpr1, comparerExpr, parCount,
                                                                      isDynamic, queryExpr, groupByNode);

                // Create the Merge Node
                if (groupByOpName == "OrderedGroupBy")
                {
                    // Mergesort with the same keySelector of the hash partition 
                    mergeNode = new DLinqMergeNode(keySelectExpr1, comparerExpr, true, queryExpr, hdistNode);
                }
                else
                {
                    // Random merge
                    mergeNode = new DLinqMergeNode(false, queryExpr, hdistNode);
                }
                groupByNode1 = new DLinqGroupByNode(groupByOpName, keySelectExpr1, elemSelectExpr1,
                                                    resultSelectExpr1, seedExpr1, accumulateExpr1,
                                                    accumulateExpr1, comparerExpr, false, queryExpr,
                                                    mergeNode);
            }

            // Final tidy-up for decomposable GroupBy-Reduce pattern.
            //   - attach combiner to first GroupByNode
            //   - attache combiner to MergeNode as an aggregator
            //   - build a SelectNode to project out results and call finalizer on them.
            if (dInfoList != null)
            {
                // Add dynamic aggregator to the merge-node, if applicable
                if (StaticConfig.GroupByDynamicReduce && !isPartitioned)
                {
                    mergeNode.AddAggregateNode(groupByNode1);
                }
                
                // Add the final Select node
                Type keyResultPairType = typeof(Pair<,>).MakeGenericType(keyType, seedExpr1.Body.Type);
                ParameterExpression keyResultPairParam = Expression.Parameter(keyResultPairType,
                                                                              DryadLinqCodeGen.MakeUniqueName("e"));
                PropertyInfo valuePropInfo_1 = keyResultPairType.GetProperty("Value");
                Expression combinedValueExpr = Expression.Property(keyResultPairParam, valuePropInfo_1);

                // First, build the combinerList
                int dcnt = dInfoList.Count;
                Expression[] combinerList = new Expression[dcnt];
                for (int i = 0; i < dcnt; i++)
                {
                    if (i + 1 == dcnt)
                    {
                        combinerList[i] = combinedValueExpr;
                    }
                    else
                    {
                        PropertyInfo keyPropInfo = combinedValueExpr.Type.GetProperty("Key");
                        combinerList[i] = Expression.Property(combinedValueExpr, keyPropInfo);
                        PropertyInfo valuePropInfo = combinedValueExpr.Type.GetProperty("Value");
                        combinedValueExpr = Expression.Property(combinedValueExpr, valuePropInfo);
                    }
                    LambdaExpression finalizerExpr = dInfoList[i].FinalReducer;
                    if (finalizerExpr != null)
                    {
                        ParameterSubst subst = new ParameterSubst(finalizerExpr.Parameters[0], combinerList[i]);
                        combinerList[i] = subst.Visit(finalizerExpr.Body);
                    }
                }

                // Build the funcList
                Expression[] funcList = new Expression[dcnt];
                for (int i = 0; i < dcnt; i++)
                {
                    funcList[i] = dInfoList[i].Func;
                }

                // Apply the substitutions
                CombinerSubst combinerSubst = new CombinerSubst(resultSelectExpr, keyResultPairParam, funcList, combinerList);
                Expression finalizerSelectBody = combinerSubst.Visit();

                // Finally, the Select node
                Type delegateType = typeof(Func<,>).MakeGenericType(keyResultPairType, finalizerSelectBody.Type);
                LambdaExpression selectExpr = Expression.Lambda(delegateType, finalizerSelectBody, keyResultPairParam);
                groupByNode1 = new DLinqSelectNode(QueryNodeType.Select, selectExpr, null, queryExpr, groupByNode1);
            }
            return groupByNode1;
        }
Example #2
0
 internal virtual string Visit(DLinqSelectNode node,
                               CodeMemberMethod vertexMethod,
                               string[] readerNames,
                               string[] writerNames)
 {
     return node.AddVertexCode(vertexMethod, readerNames, writerNames);
 }
        private DLinqQueryNode VisitSelect(QueryNodeInfo source,
                                           QueryNodeType nodeType,
                                           LambdaExpression selector,
                                           LambdaExpression resultSelector,
                                           MethodCallExpression queryExpr)
        {
            DLinqQueryNode selectNode;
            if (selector.Type.GetGenericArguments().Length == 2)
            {
                // If this select's child is a groupby node, push this select into its child, if
                //   1. The groupby node is not tee'd, and
                //   2. The groupby node has no result selector, and
                //   3. The selector is decomposable
                if (!source.IsForked &&
                    IsGroupByWithoutResultSelector(source.QueryExpression) &&
                    Decomposition.GetDecompositionInfoList(selector, m_codeGen) != null)
                {
                    MethodCallExpression expr = (MethodCallExpression)source.QueryExpression;
                    LambdaExpression keySelectExpr = DryadLinqExpression.GetLambda(expr.Arguments[1]);

                    // Figure out elemSelectExpr and comparerExpr
                    LambdaExpression elemSelectExpr = null;
                    Expression comparerExpr = null;
                    if (expr.Arguments.Count == 3)
                    {
                        elemSelectExpr = DryadLinqExpression.GetLambda(expr.Arguments[2]);
                        if (elemSelectExpr == null)
                        {
                            comparerExpr = expr.Arguments[2];
                        }
                    }
                    else if (expr.Arguments.Count == 4)
                    {
                        elemSelectExpr = DryadLinqExpression.GetLambda(expr.Arguments[2]);
                        comparerExpr = expr.Arguments[3];
                    }

                    // Construct new query expression by building result selector expression
                    // and pushing it to groupby node.
                    selectNode = VisitGroupBy(source.Children[0].Child, keySelectExpr,
                                              elemSelectExpr, selector, comparerExpr, queryExpr);
                    if (nodeType == QueryNodeType.SelectMany)
                    {
                        Type selectorRetType = selector.Type.GetGenericArguments()[1];
                        LambdaExpression id = IdentityFunction.Instance(selectorRetType);
                        selectNode = new DLinqSelectNode(nodeType, id, resultSelector, queryExpr, selectNode);
                    }
                }
                else
                {
                    DLinqQueryNode child = this.Visit(source);
                    selectNode = this.PromoteConcat(
                                         source, child,
                                         x => new DLinqSelectNode(nodeType, selector, resultSelector, queryExpr, x));
                }
            }
            else
            {
                // The "indexed" version
                DLinqQueryNode child = this.Visit(source);
                if (!child.IsDynamic && child.OutputPartition.Count == 1)
                {
                    selectNode = this.PromoteConcat(
                                         source, child,
                                         x => new DLinqSelectNode(nodeType, selector, resultSelector, queryExpr, x));
                }
                else
                {
                    child.IsForked = true;

                    // Create (x, y) => Select(x, y, selector)
                    Type ptype1 = typeof(IEnumerable<>).MakeGenericType(child.OutputTypes[0]);
                    Type ptype2 = typeof(IEnumerable<>).MakeGenericType(typeof(IndexedValue<long>));
                    ParameterExpression param1 = Expression.Parameter(ptype1, DryadLinqCodeGen.MakeUniqueName("x"));
                    ParameterExpression param2 = Expression.Parameter(ptype2, DryadLinqCodeGen.MakeUniqueName("y"));
                    
                    string methodName = queryExpr.Method.Name;
                    Type[] selectorTypeArgs = selector.Type.GetGenericArguments();
                    Type typeArg2 = selectorTypeArgs[selectorTypeArgs.Length - 1];
                    if (nodeType == QueryNodeType.SelectMany)
                    {
                        if (resultSelector != null)
                        {
                            methodName += "Result";
                        }
                        typeArg2 = typeArg2.GetGenericArguments()[0];
                    }

                    string targetMethodName = methodName + "WithStartIndex";
                    MethodInfo minfo = typeof(DryadLinqEnumerable).GetMethod(targetMethodName);
                    Expression body;
                    if (resultSelector == null)
                    {
                        minfo = minfo.MakeGenericMethod(child.OutputTypes[0], typeArg2);
                        body = Expression.Call(minfo, param1, param2, selector);
                    }
                    else
                    {
                        minfo = minfo.MakeGenericMethod(child.OutputTypes[0], typeArg2, resultSelector.Body.Type);
                        body = Expression.Call(minfo, param1, param2, selector, resultSelector);
                    }
                    Type type = typeof(Func<,,>).MakeGenericType(ptype1, ptype2, body.Type);
                    LambdaExpression procFunc = Expression.Lambda(type, body, param1, param2);

                    bool isLong = methodName.StartsWith("Long", StringComparison.Ordinal);
                    DLinqQueryNode offsetNode = this.CreateOffset(isLong, queryExpr, child);
                    selectNode = new DLinqApplyNode(procFunc, queryExpr, child, offsetNode);
                }
            }
            return selectNode;
        }