Пример #1
0
        private DryadQueryNode CreateOffset(bool isLong, Expression queryExpr, DryadQueryNode child)
        {
            // Count node
            DryadQueryNode countNode = new DryadBasicAggregateNode(null, AggregateOpType.LongCount,
                                                                   true, false, queryExpr, child);

            // Apply node for x => Offsets(x)
            Type paramType = typeof(IEnumerable<>).MakeGenericType(typeof(long));
            ParameterExpression param = Expression.Parameter(paramType, "x");
            MethodInfo minfo = typeof(HpcLinqEnumerable).GetMethod("Offsets");
            Expression body = Expression.Call(minfo, param, Expression.Constant(isLong, typeof(bool)));
            Type type = typeof(Func<,>).MakeGenericType(param.Type, body.Type);
            LambdaExpression procFunc = Expression.Lambda(type, body, param);
            DryadQueryNode mergeCountNode = new DryadMergeNode(true, true, queryExpr, countNode);
            DryadQueryNode offsetsNode = new DryadApplyNode(procFunc, queryExpr, mergeCountNode);

            // HashPartition
            LambdaExpression keySelectExpr = IdentityFunction.Instance(typeof(IndexedValue<long>));
            int pcount = child.OutputPartition.Count;
            DryadQueryNode hdistNode = new DryadHashPartitionNode(keySelectExpr, null, null, pcount,
                                                                  false, queryExpr, offsetsNode);
            DryadQueryNode resNode = new DryadMergeNode(false, true, queryExpr, hdistNode);
            return resNode;
        }
Пример #2
0
        private DryadQueryNode VisitWhere(QueryNodeInfo source,
                                          LambdaExpression predicate,
                                          MethodCallExpression queryExpr)
        {
            DryadQueryNode child = this.Visit(source);
            DryadQueryNode whereNode;
            if (predicate.Type.GetGenericArguments().Length == 2 ||
                (!child.IsDynamic && child.OutputPartition.Count == 1))
            {
                whereNode = this.PromoteConcat(source, child, x => new DryadWhereNode(predicate, queryExpr, x));
            }
            else
            {
                // The "indexed" version
                // Create (x, y) => DryadWhere(x, y, predicate)
                Type ptype1 = typeof(IEnumerable<>).MakeGenericType(child.OutputTypes[0]);
                Type ptype2 = typeof(IEnumerable<>).MakeGenericType(typeof(IndexedValue<long>));
                ParameterExpression param1 = Expression.Parameter(ptype1, HpcLinqCodeGen.MakeUniqueName("x"));
                ParameterExpression param2 = Expression.Parameter(ptype2, HpcLinqCodeGen.MakeUniqueName("y"));
                string targetMethod = queryExpr.Method.Name + "WithStartIndex";
                MethodInfo minfo = typeof(HpcLinqEnumerable).GetMethod(targetMethod);
                minfo = minfo.MakeGenericMethod(child.OutputTypes[0]);
                Expression body = Expression.Call(minfo, param1, param2, predicate);
                Type type = typeof(Func<,,>).MakeGenericType(ptype1, ptype2, body.Type);
                LambdaExpression procFunc = Expression.Lambda(type, body, param1, param2);

                child.IsForked = true;
                bool isLong = (queryExpr.Method.Name == "LongWhere");
                DryadQueryNode offsetNode = this.CreateOffset(isLong, queryExpr, child);
                whereNode = new DryadApplyNode(procFunc, queryExpr, child, offsetNode);
            }
            return whereNode;
        }
Пример #3
0
        private DryadQueryNode VisitZip(QueryNodeInfo first,
                                        QueryNodeInfo second,
                                        LambdaExpression resultSelector,
                                        MethodCallExpression queryExpr)
        {
            DryadQueryNode child1 = this.Visit(first);
            DryadQueryNode child2 = this.Visit(second);

            if (child1.IsDynamic || child2.IsDynamic)
            {
                // Well, let us for now do it on a single machine
                child1 = new DryadMergeNode(true, false, queryExpr, child1);
                child2 = new DryadMergeNode(true, false, queryExpr, child2);

                // Apply node for (x, y) => Zip(x, y, resultSelector)
                Type paramType1 = typeof(IEnumerable<>).MakeGenericType(child1.OutputTypes[0]);
                ParameterExpression param1 = Expression.Parameter(paramType1, "s1");
                Type paramType2 = typeof(IEnumerable<>).MakeGenericType(child2.OutputTypes[0]);
                ParameterExpression param2 = Expression.Parameter(paramType2, "s2");
                MethodInfo minfo = typeof(HpcLinqHelper).GetMethod("Zip");
                minfo = minfo.MakeGenericMethod(child1.OutputTypes[0]);
                Expression body = Expression.Call(minfo, param1, param2, resultSelector);
                Type funcType = typeof(Func<,>).MakeGenericType(param1.Type, param2.Type, body.Type);
                LambdaExpression procFunc = Expression.Lambda(funcType, body, param1, param2);
                return new DryadApplyNode(procFunc, queryExpr, child1, child2);
            }
            else
            {
                int parCount1 = child1.OutputPartition.Count;
                int parCount2 = child2.OutputPartition.Count;

                // Count nodes
                DryadQueryNode countNode1 = new DryadBasicAggregateNode(null, AggregateOpType.LongCount,
                                                                        true, false, queryExpr, child1);
                DryadQueryNode countNode2 = new DryadBasicAggregateNode(null, AggregateOpType.LongCount,
                                                                        true, false, queryExpr, child2);
                countNode1 = new DryadMergeNode(true, false, queryExpr, countNode1);
                countNode2 = new DryadMergeNode(true, false, queryExpr, countNode2);

                // Apply node for (x, y) => ZipCount(x, y)
                Type paramType1 = typeof(IEnumerable<>).MakeGenericType(typeof(long));
                ParameterExpression param1 = Expression.Parameter(paramType1, "x");
                ParameterExpression param2 = Expression.Parameter(paramType1, "y");
                MethodInfo minfo = typeof(HpcLinqHelper).GetMethod("ZipCount");
                Expression body = Expression.Call(minfo, param1, param2);
                Type funcType = typeof(Func<,,>).MakeGenericType(param1.Type, param2.Type, body.Type);
                LambdaExpression zipCount = Expression.Lambda(funcType, body, param1, param2);
                DryadQueryNode indexedCountNode = new DryadApplyNode(zipCount, queryExpr, countNode1, countNode2);

                // HashPartition(x => x.index, parCount2)
                ParameterExpression param = Expression.Parameter(body.Type.GetGenericArguments()[0], "x");
                Expression keySelectBody = Expression.Property(param, "Index");
                funcType = typeof(Func<,>).MakeGenericType(param.Type, keySelectBody.Type);
                LambdaExpression keySelectExpr = Expression.Lambda(funcType, keySelectBody, param);
                DryadQueryNode distCountNode = new DryadHashPartitionNode(keySelectExpr,
                                                                          null,
                                                                          parCount2,
                                                                          queryExpr,
                                                                          indexedCountNode);

                // Apply node for (x, y) => AssignPartitionIndex(x, y)
                param1 = Expression.Parameter(body.Type, "x");
                Type paramType2 = typeof(IEnumerable<>).MakeGenericType(child2.OutputTypes[0]);
                param2 = Expression.Parameter(paramType2, "y");
                minfo = typeof(HpcLinqHelper).GetMethod("AssignPartitionIndex");
                minfo = minfo.MakeGenericMethod(child2.OutputTypes[0]);
                body = Expression.Call(minfo, param1, param2);
                funcType = typeof(Func<,,>).MakeGenericType(param1.Type, param2.Type, body.Type);
                LambdaExpression assignIndex = Expression.Lambda(funcType, body, param1, param2);
                DryadQueryNode addIndexNode = new DryadApplyNode(assignIndex, queryExpr, distCountNode, child2);

                // HashPartition(x => x.index, x => x.value, parCount1)
                param = Expression.Parameter(body.Type.GetGenericArguments()[0], "x");
                body = Expression.Property(param, "Index");
                funcType = typeof(Func<,>).MakeGenericType(param.Type, body.Type);
                keySelectExpr = Expression.Lambda(funcType, body, param);
                body = Expression.Property(param, "Value");
                funcType = typeof(Func<,>).MakeGenericType(param.Type, body.Type);
                LambdaExpression resultSelectExpr = Expression.Lambda(funcType, body, param);
                DryadQueryNode newChild2 = new DryadHashPartitionNode(keySelectExpr,
                                                                      resultSelectExpr,
                                                                      null,
                                                                      parCount1,
                                                                      false,
                                                                      queryExpr,
                                                                      addIndexNode);
                newChild2 = new DryadMergeNode(true, true, queryExpr, newChild2);

                // Finally the zip node
                return new DryadZipNode(resultSelector, queryExpr, child1, newChild2);
            }
        }
Пример #4
0
        private DryadQueryNode VisitSelect(QueryNodeInfo source,
                                           QueryNodeType nodeType,
                                           LambdaExpression selector,
                                           LambdaExpression resultSelector,
                                           MethodCallExpression queryExpr)
        {
            DryadQueryNode selectNode;
            if (selector.Type.GetGenericArguments().Length == 2)
            {
                // If this select's child is a groupby node, push this select into its child, if
                //   1. The groupby node is not tee'd, and
                //   2. The groupby node has no result selector, and
                //   3. The selector is decomposable
                if (!source.IsForked &&
                    IsGroupByWithoutResultSelector(source.queryExpression) &&
                    Decomposition.GetDecompositionInfoList(selector, m_codeGen) != null)
                {
                    MethodCallExpression expr = (MethodCallExpression)source.queryExpression;
                    LambdaExpression keySelectExpr = HpcLinqExpression.GetLambda(expr.Arguments[1]);

                    // Figure out elemSelectExpr and comparerExpr
                    LambdaExpression elemSelectExpr = null;
                    Expression comparerExpr = null;
                    if (expr.Arguments.Count == 3)
                    {
                        elemSelectExpr = HpcLinqExpression.GetLambda(expr.Arguments[2]);
                        if (elemSelectExpr == null)
                        {
                            comparerExpr = expr.Arguments[2];
                        }
                    }
                    else if (expr.Arguments.Count == 4)
                    {
                        elemSelectExpr = HpcLinqExpression.GetLambda(expr.Arguments[2]);
                        comparerExpr = expr.Arguments[3];
                    }

                    // Construct new query expression by building result selector expression
                    // and pushing it to groupby node.
                    selectNode = VisitGroupBy(source.children[0].child, keySelectExpr,
                                              elemSelectExpr, selector, comparerExpr, queryExpr);
                    if (nodeType == QueryNodeType.SelectMany)
                    {
                        Type selectorRetType = selector.Type.GetGenericArguments()[1];
                        LambdaExpression id = IdentityFunction.Instance(selectorRetType);
                        selectNode = new DryadSelectNode(nodeType, id, resultSelector, queryExpr, selectNode);
                    }
                }
                else
                {
                    DryadQueryNode child = this.Visit(source);
                    selectNode = this.PromoteConcat(
                                         source, child,
                                         x => new DryadSelectNode(nodeType, selector, resultSelector, queryExpr, x));
                }
            }
            else
            {
                // The "indexed" version
                DryadQueryNode child = this.Visit(source);
                if (!child.IsDynamic && child.OutputPartition.Count == 1)
                {
                    selectNode = this.PromoteConcat(
                                         source, child,
                                         x => new DryadSelectNode(nodeType, selector, resultSelector, queryExpr, x));
                }
                else
                {
                    child.IsForked = true;

                    // Create (x, y) => Select(x, y, selector)
                    Type ptype1 = typeof(IEnumerable<>).MakeGenericType(child.OutputTypes[0]);
                    Type ptype2 = typeof(IEnumerable<>).MakeGenericType(typeof(IndexedValue<long>));
                    ParameterExpression param1 = Expression.Parameter(ptype1, HpcLinqCodeGen.MakeUniqueName("x"));
                    ParameterExpression param2 = Expression.Parameter(ptype2, HpcLinqCodeGen.MakeUniqueName("y"));

                    string methodName = queryExpr.Method.Name;
                    Type[] selectorTypeArgs = selector.Type.GetGenericArguments();
                    Type typeArg2 = selectorTypeArgs[selectorTypeArgs.Length - 1];
                    if (nodeType == QueryNodeType.SelectMany)
                    {
                        if (resultSelector != null)
                        {
                            methodName += "Result";
                        }
                        typeArg2 = typeArg2.GetGenericArguments()[0];
                    }

                    string targetMethodName = methodName + "WithStartIndex";
                    MethodInfo minfo = typeof(HpcLinqEnumerable).GetMethod(targetMethodName);
                    Expression body;
                    if (resultSelector == null)
                    {
                        minfo = minfo.MakeGenericMethod(child.OutputTypes[0], typeArg2);
                        body = Expression.Call(minfo, param1, param2, selector);
                    }
                    else
                    {
                        minfo = minfo.MakeGenericMethod(child.OutputTypes[0], typeArg2, resultSelector.Body.Type);
                        body = Expression.Call(minfo, param1, param2, selector, resultSelector);
                    }
                    Type type = typeof(Func<,,>).MakeGenericType(ptype1, ptype2, body.Type);
                    LambdaExpression procFunc = Expression.Lambda(type, body, param1, param2);

                    bool isLong = methodName.StartsWith("Long", StringComparison.Ordinal);
                    DryadQueryNode offsetNode = this.CreateOffset(isLong, queryExpr, child);
                    selectNode = new DryadApplyNode(procFunc, queryExpr, child, offsetNode);
                }
            }
            return selectNode;
        }
Пример #5
0
        private DryadQueryNode VisitSlidingWindow(QueryNodeInfo source,
                                                  LambdaExpression procLambda,
                                                  Expression windowSizeExpr,
                                                  Expression queryExpr)
        {
            // var windows = source.Apply(s => HpcLinqHelper.Last(s, windowSize));
            // var slided = windows.Apply(s => HpcLinqHelper.Slide(s)).HashPartition(x => x.Index);
            // slided.Apply(source, (x, y) => HpcLinqHelper.ProcessWindows(x, y, procFunc, windowSize));
            DryadQueryNode child = this.Visit(source);
            if (child.IsDynamic)
            {
                throw new DryadLinqException("SlidingWindow is only supported for static partition count");
            }

            ExpressionSimplifier<int> evaluator = new ExpressionSimplifier<int>();
            Expression windowSize = Expression.Constant(evaluator.Eval(windowSizeExpr), typeof(int));

            child.IsForked = true;

            // Apply node for s => Last(s, windowSize)
            Type paramType = typeof(IEnumerable<>).MakeGenericType(child.OutputTypes[0]);
            ParameterExpression param = Expression.Parameter(paramType, HpcLinqCodeGen.MakeUniqueName("s"));
            MethodInfo minfo = typeof(HpcLinqHelper).GetMethod("Last");
            minfo = minfo.MakeGenericMethod(child.OutputTypes[0]);
            Expression body = Expression.Call(minfo, param, windowSize);
            Type funcType = typeof(Func<,>).MakeGenericType(param.Type, body.Type);
            LambdaExpression procFunc = Expression.Lambda(funcType, body, param);
            DryadQueryNode lastNode = new DryadApplyNode(procFunc, queryExpr, child);
            lastNode = new DryadMergeNode(true, true, queryExpr, lastNode);

            // Apply node for s => Slide(s)
            param = Expression.Parameter(body.Type, HpcLinqCodeGen.MakeUniqueName("s"));
            minfo = typeof(HpcLinqHelper).GetMethod("Slide");
            minfo = minfo.MakeGenericMethod(child.OutputTypes[0]);
            body = Expression.Call(minfo, param);
            funcType = typeof(Func<,>).MakeGenericType(param.Type, body.Type);
            procFunc = Expression.Lambda(funcType, body, param);
            DryadQueryNode slideNode = new DryadApplyNode(procFunc, queryExpr, lastNode);

            // Hash partition to distribute from partition i to i+1
            int pcount = child.OutputPartition.Count;
            param = Expression.Parameter(body.Type.GetGenericArguments()[0], "x");
            Expression keySelectBody = Expression.Property(param, "Index");
            funcType = typeof(Func<,>).MakeGenericType(param.Type, keySelectBody.Type);
            LambdaExpression keySelectExpr = Expression.Lambda(funcType, keySelectBody, param);
            DryadQueryNode hdistNode = new DryadHashPartitionNode(keySelectExpr,
                                                                  null,
                                                                  pcount,
                                                                  queryExpr,
                                                                  slideNode);

            // Apply node for (x, y) => ProcessWindows(x, y, proclambda, windowSize)
            Type paramType1 = typeof(IEnumerable<>).MakeGenericType(body.Type);
            ParameterExpression param1 = Expression.Parameter(paramType1, HpcLinqCodeGen.MakeUniqueName("x"));
            Type paramType2 = typeof(IEnumerable<>).MakeGenericType(child.OutputTypes[0]);
            ParameterExpression param2 = Expression.Parameter(paramType2, HpcLinqCodeGen.MakeUniqueName("y"));
            minfo = typeof(HpcLinqHelper).GetMethod("ProcessWindows");
            minfo = minfo.MakeGenericMethod(child.OutputTypes[0], procLambda.Body.Type);
            body = Expression.Call(minfo, param1, param2, procLambda, windowSize);
            funcType = typeof(Func<,,>).MakeGenericType(param1.Type, param2.Type, body.Type);
            procFunc = Expression.Lambda(funcType, body, param1, param2);
            return new DryadApplyNode(procFunc, queryExpr, hdistNode, child);
        }
Пример #6
0
        // Phase 1 of the query optimization
        internal void GenerateQueryPlanPhase1()
        {
            if (this.m_queryPlan1 != null) return;

            // Apply some simple rewrite rules
            SimpleRewriter rewriter = new SimpleRewriter(this.m_exprNodeInfoMap.Values.ToList());
            rewriter.Rewrite();

            // Generate the query plan of phase1
            var referencedNodes = this.m_referencedQueryMap.Values;
            this.m_queryPlan1 = new DryadQueryNode[this.m_queryExprs.Length + referencedNodes.Count];
            for (int i = 0; i < this.m_queryExprs.Length; i++)
            {
                this.m_queryPlan1[i] = this.Visit(this.m_queryNodeInfos[i].children[0].child);
            }
            int idx = this.m_queryExprs.Length;
            foreach (QueryNodeInfo nodeInfo in referencedNodes)
            {
                // Add a Tee'd Merge
                this.m_queryPlan1[idx] = this.Visit(nodeInfo.children[0].child);
                DryadQueryNode mergeNode = new DryadMergeNode(true, false, nodeInfo.queryExpression,
                                                              this.m_queryPlan1[idx]);
                this.m_queryPlan1[idx] =  new DryadTeeNode(mergeNode.OutputTypes[0], true,
                                                           mergeNode.QueryExpression, mergeNode);
                nodeInfo.queryNode = this.m_queryPlan1[idx];
                idx++;
            }

            // Finally, add the output nodes.
            Dictionary<DryadQueryNode, int> forkCounts = new Dictionary<DryadQueryNode, int>();
            for (int i = 0; i < this.m_queryExprs.Length; i++)
            {
                DryadQueryNode queryNode = this.m_queryPlan1[i];
                int cnt;
                if (!forkCounts.TryGetValue(queryNode, out cnt))
                {
                    cnt = queryNode.Parents.Count;
                }
                forkCounts[queryNode] = cnt + 1;
            }

            for (int i = 0; i < this.m_queryExprs.Length; i++)
            {
                HpcClientSideLog.Add("Query " + i + " Output: " + this.m_outputDatapaths[i]);

                DryadQueryNode queryNode = this.m_queryPlan1[i];
                if (TypeSystem.IsAnonymousType(queryNode.OutputTypes[0]))
                {
                    throw new DryadLinqException(HpcLinqErrorCode.OutputTypeCannotBeAnonymous,
                                               SR.OutputTypeCannotBeAnonymous);
                }

                // Add dummy Apply to make Dryad happy (it doesn't like to hook inputs straight to outputs)
                if ((queryNode is DryadInputNode) || (forkCounts[queryNode] > 1))
                {
                    // Add a dummy Apply
                    Type paramType = typeof(IEnumerable<>).MakeGenericType(queryNode.OutputTypes[0]);
                    ParameterExpression param = Expression.Parameter(paramType, "x");
                    Type type = typeof(Func<,>).MakeGenericType(paramType, paramType);
                    LambdaExpression applyExpr = Expression.Lambda(type, param, param);
                    DryadQueryNode applyNode = new DryadApplyNode(applyExpr, this.m_queryExprs[i], queryNode);
                    applyNode.OutputDataSetInfo = queryNode.OutputDataSetInfo;
                    queryNode = applyNode;
                }

                if (queryNode is DryadConcatNode)
                {
                    // Again, we add dummy Apply in certain cases to make Dryad happy
                    ((DryadConcatNode)queryNode).FixInputs();
                }

                // Add the output node
                DscCompressionScheme outputScheme = this.m_context.Configuration.OutputDataCompressionScheme;
                DryadOutputNode outputNode = new DryadOutputNode(this.m_context,
                                                                 this.m_outputDatapaths[i],
                                                                 this.m_isTempOutput[i],
                                                                 outputScheme,
                                                                 this.m_queryExprs[i],
                                                                 queryNode);

                this.m_queryPlan1[i] = outputNode;

                if (this.m_outputUriMap.ContainsKey(this.m_outputDatapaths[i].ToLower()))
                {
                    throw new DryadLinqException(HpcLinqErrorCode.MultipleOutputsWithSameDscUri,
                                               String.Format(SR.MultipleOutputsWithSameDscUri, this.m_outputDatapaths[i]));
                }

                this.m_outputUriMap.Add(this.m_outputDatapaths[i].ToLower(), outputNode);
                this.m_outputTypes[i] = this.m_queryPlan1[i].OutputTypes[0];

                // Remove useless Tees to make Dryad happy
                if ((queryNode is DryadTeeNode) && (forkCounts[queryNode] == 1))
                {
                    DryadQueryNode teeChild = queryNode.Children[0];
                    teeChild.UpdateParent(queryNode, outputNode);
                    outputNode.UpdateChildren(queryNode, teeChild);
                }
            }
        }
Пример #7
0
        // Basic plan: (reverse all partitions) then (reverse data in each partition)
        // The main complication is to perform the first step.
        // Approach:
        //   - tee the input.
        //   - have a dummy apply node that produces the singleton {0} at each partition
        //   - merge to get a seq {0,0,..} whose length = nPartition.
        //   - convert that seq to { (0,n), (1,n), ...}
        //   - hash-partition to send one item to each of the n workers.
        //   - use binary-apply to attach targetIndex to each source item
        //              Apply( seq1 = indexCountPair, seq2 = original data) => ({tgt, item0}, {tgt, item1}, .. )
        //   - hash-partition to move items to target partition.
        //   - use local LINQ reverse to do the local data reversal.
        private DryadQueryNode VisitReverse(QueryNodeInfo source, Expression queryExpr)
        {
            DryadQueryNode child = this.Visit(source);
            if (child.IsDynamic)
            {
                throw new DryadLinqException("Reverse is only supported for static partition count");
            }

            child.IsForked = true;

            // Apply node for s => ValueZero(s)
            Type paramType = typeof(IEnumerable<>).MakeGenericType(child.OutputTypes[0]);
            ParameterExpression param = Expression.Parameter(paramType, "s");
            MethodInfo minfo = typeof(HpcLinqHelper).GetMethod("ValueZero");
            minfo = minfo.MakeGenericMethod(child.OutputTypes[0]);
            Expression body = Expression.Call(minfo, param);
            Type funcType = typeof(Func<,>).MakeGenericType(param.Type, body.Type);
            LambdaExpression procFunc = Expression.Lambda(funcType, body, param);
            DryadQueryNode valueZeroNode = new DryadApplyNode(procFunc, queryExpr, child);

            // Apply node for s => ReverseIndex(s)
            paramType = typeof(IEnumerable<>).MakeGenericType(typeof(int));
            param = Expression.Parameter(paramType, "s");
            minfo = typeof(HpcLinqHelper).GetMethod("MakeIndexCountPairs");
            body = Expression.Call(minfo, param);
            funcType = typeof(Func<,>).MakeGenericType(param.Type, body.Type);
            procFunc = Expression.Lambda(funcType, body, param);
            DryadQueryNode mergeZeroNode = new DryadMergeNode(true, true, queryExpr, valueZeroNode);
            DryadQueryNode indexCountNode = new DryadApplyNode(procFunc, queryExpr, mergeZeroNode);

            // HashPartition to distribute the indexCounts -- one to each partition.
            // each partition will receive (myPartitionID, pcount).
            int pcount = child.OutputPartition.Count;
            param = Expression.Parameter(body.Type.GetGenericArguments()[0], "x");
            Expression keySelectBody = Expression.Property(param, "Index");
            funcType = typeof(Func<,>).MakeGenericType(param.Type, keySelectBody.Type);
            LambdaExpression keySelectExpr = Expression.Lambda(funcType, keySelectBody, param);
            DryadQueryNode hdistNode = new DryadHashPartitionNode(keySelectExpr,
                                                                  null,
                                                                  pcount,
                                                                  queryExpr,
                                                                  indexCountNode);

            // Apply node for (x, y) => AddIndexForReverse(x, y)
            ParameterExpression param1 = Expression.Parameter(body.Type, "x");
            Type paramType2 = typeof(IEnumerable<>).MakeGenericType(child.OutputTypes[0]);
            ParameterExpression param2 = Expression.Parameter(paramType2, "y");
            minfo = typeof(HpcLinqHelper).GetMethod("AddIndexForReverse");
            minfo = minfo.MakeGenericMethod(child.OutputTypes[0]);
            body = Expression.Call(minfo, param1, param2);
            funcType = typeof(Func<,,>).MakeGenericType(param1.Type, param2.Type, body.Type);
            LambdaExpression addIndexFunc = Expression.Lambda(funcType, body, param1, param2);
            DryadQueryNode addIndexNode = new DryadApplyNode(addIndexFunc, queryExpr, hdistNode, child);

            // HashPartition(x => x.index, x => x.value, pcount)
            // Moves all data to correct target partition.  (each worker will direct all its items to one target partition)
            param = Expression.Parameter(body.Type.GetGenericArguments()[0], "x");
            body = Expression.Property(param, "Index");
            funcType = typeof(Func<,>).MakeGenericType(param.Type, body.Type);
            keySelectExpr = Expression.Lambda(funcType, body, param);
            body = Expression.Property(param, "Value");
            funcType = typeof(Func<,>).MakeGenericType(param.Type, body.Type);
            LambdaExpression resultSelectExpr = Expression.Lambda(funcType, body, param);
            DryadQueryNode reversePartitionNode = new DryadHashPartitionNode(
                                                            keySelectExpr, resultSelectExpr, null,
                                                            pcount, false, queryExpr, addIndexNode);

            // Reverse node
            paramType = typeof(IEnumerable<>).MakeGenericType(reversePartitionNode.OutputTypes[0]);
            param = Expression.Parameter(paramType, "x");
            minfo = typeof(HpcLinqVertex).GetMethod("Reverse");
            minfo = minfo.MakeGenericMethod(child.OutputTypes[0]);
            body = Expression.Call(minfo, param);
            funcType = typeof(Func<,>).MakeGenericType(param.Type, body.Type);
            procFunc = Expression.Lambda(funcType, body, param);
            DryadQueryNode resNode = new DryadMergeNode(true, true, queryExpr, reversePartitionNode);
            resNode = new DryadApplyNode(procFunc, queryExpr, resNode);

            return resNode;
        }
Пример #8
0
        private DryadQueryNode VisitMultiApply(QueryNodeInfo source,
                                               LambdaExpression procLambda,
                                               bool perPartition,
                                               bool isFirstOnly,
                                               MethodCallExpression queryExpr)
        {
            DryadQueryNode[] childs = new DryadQueryNode[source.children.Count];
            for (int i = 0; i < source.children.Count; ++i)
            {
                childs[i] = this.Visit(source.children[i].child);
            }

            bool isDynamic = childs.Any(x => x.IsDynamic);
            if (perPartition && !isDynamic)
            {
                // Homomorphic case.
                if (isFirstOnly)
                {
                    for (int i = 1; i < childs.Length; ++i)
                    {
                        childs[i] = new DryadTeeNode(childs[i].OutputTypes[0], true, queryExpr, childs[i]);
                        childs[i].ConOpType = ConnectionOpType.CrossProduct;
                        childs[i] = new DryadMergeNode(childs[0].OutputPartition.Count, queryExpr, childs[i]);
                    }
                }
                else
                {
                    int count = childs[0].OutputPartition.Count;
                    for (int i = 1; i < childs.Length; ++i)
                    {
                        if (childs[i].OutputPartition.Count != count)
                        {
                            throw DryadLinqException.Create(HpcLinqErrorCode.HomomorphicApplyNeedsSamePartitionCount,
                                                          SR.HomomorphicApplyNeedsSamePartitionCount,
                                                          queryExpr);
                        }
                    }
                }
            }
            else
            {
                // Non-homomorphic case.
                for (int i = 0; i < childs.Length; ++i)
                {
                    if (childs[i].IsDynamic || childs[i].OutputPartition.Count > 1)
                    {
                        childs[i] = new DryadMergeNode(true, false, queryExpr, childs[i]);
                    }
                }
            }
            DryadQueryNode applyNode = new DryadApplyNode(procLambda, true, queryExpr, childs);
            return applyNode;
        }
Пример #9
0
        private DryadQueryNode VisitPartitionOp(string opName,
                                                QueryNodeInfo source,
                                                QueryNodeType nodeType,
                                                Expression controlExpr,
                                                MethodCallExpression queryExpr)
        {
            DryadQueryNode resNode;
            if (nodeType == QueryNodeType.TakeWhile &&
                controlExpr.Type.GetGenericArguments().Length != 2)
            {
                // The "indexed" version.
                resNode = this.Visit(source);

                // The following block used to be skipped for resNode.OutputPartition.Count == 1,
                // which causes compilation error (bug 13593)
                // @@TODO[p3] : implement a working optimization for nPartition==1 that calls
                //              directly to Linq TakeWhile.
                // Note: the test is: if (resNode.IsDynamic || resNode.OutputPartition.Count > 1)
                {
                    resNode.IsForked = true;

                    bool isLong = (queryExpr.Method.Name == "LongTakeWhile");
                    DryadQueryNode offsetNode = this.CreateOffset(isLong, queryExpr, resNode);

                    // Create (x, y) => GroupIndexedTakeWhile(x, y, controlExpr)
                    Type ptype1 = typeof(IEnumerable<>).MakeGenericType(resNode.OutputTypes[0]);
                    Type ptype2 = typeof(IEnumerable<>).MakeGenericType(typeof(IndexedValue<long>));
                    ParameterExpression param1 = Expression.Parameter(ptype1, HpcLinqCodeGen.MakeUniqueName("x"));
                    ParameterExpression param2 = Expression.Parameter(ptype2, HpcLinqCodeGen.MakeUniqueName("y"));
                    string methodName = "GroupIndexed" + queryExpr.Method.Name;
                    MethodInfo minfo = typeof(HpcLinqEnumerable).GetMethod(methodName);
                    minfo = minfo.MakeGenericMethod(resNode.OutputTypes[0]);
                    Expression body = Expression.Call(minfo, param1, param2, controlExpr);
                    Type type = typeof(Func<,,>).MakeGenericType(ptype1, ptype2, body.Type);
                    LambdaExpression procFunc = Expression.Lambda(type, body, param1, param2);
                    resNode = new DryadApplyNode(procFunc, queryExpr, resNode, offsetNode);
                }
            }
            else if (!source.IsForked &&
                     (nodeType == QueryNodeType.Take || nodeType == QueryNodeType.TakeWhile) &&
                     (source.OperatorName == "OrderBy" || source.OperatorName == "OrderByDescending"))
            {
                resNode = this.Visit(source.children[0].child);

                bool isDescending = (source.OperatorName == "OrderByDescending");
                MethodCallExpression sourceQueryExpr = (MethodCallExpression)source.queryExpression;
                LambdaExpression keySelectExpr = HpcLinqExpression.GetLambda(sourceQueryExpr.Arguments[1]);
                Expression comparerExpr = null;
                if (sourceQueryExpr.Arguments.Count == 3)
                {
                    comparerExpr = sourceQueryExpr.Arguments[2];
                }
                resNode = this.PromoteConcat(
                                  source.children[0].child,
                                  resNode,
                                  delegate(DryadQueryNode x) {
                                      DryadQueryNode y = new DryadOrderByNode(keySelectExpr, comparerExpr,
                                                                              isDescending, sourceQueryExpr, x);
                                      return FirstStagePartitionOp(opName, nodeType, controlExpr, queryExpr, y);
                                  });
                if (resNode.IsDynamic || resNode.OutputPartition.Count > 1)
                {
                    // Need a mergesort
                    resNode = new DryadMergeNode(keySelectExpr, comparerExpr, isDescending, false, sourceQueryExpr, resNode);
                }
            }
            else
            {
                resNode = this.Visit(source);
                if (nodeType == QueryNodeType.Take || nodeType == QueryNodeType.TakeWhile)
                {
                    resNode = this.PromoteConcat(
                                      source, resNode,
                                      x => FirstStagePartitionOp(opName, nodeType, controlExpr, queryExpr, x));
                }
            }
            resNode = new DryadPartitionOpNode(opName, nodeType, controlExpr, false, queryExpr, resNode);
            return resNode;
        }
Пример #10
0
        private DryadQueryNode VisitConcat(QueryNodeInfo source, MethodCallExpression queryExpr)
        {
            DryadQueryNode[] childs = new DryadQueryNode[source.children.Count];
            for (int i = 0; i < source.children.Count; ++i)
            {
                childs[i] = this.Visit(source.children[i].child);
            }
            DryadQueryNode resNode = new DryadConcatNode(queryExpr, childs);

            int parCount = resNode.OutputPartition.Count;
            if (!resNode.IsDynamic && parCount > StaticConfig.MaxPartitionCount)
            {
                // Too many partitions, need to repartition
                int newParCount = parCount / 2;
                DryadQueryNode countNode = new DryadBasicAggregateNode(null, AggregateOpType.LongCount,
                                                                       true, false, queryExpr, resNode);
                DryadQueryNode mergeCountNode = new DryadMergeNode(true, false, queryExpr, countNode);

                // Apply node for s => IndexedCount(s)
                Type paramType = typeof(IEnumerable<>).MakeGenericType(typeof(long));
                ParameterExpression param = Expression.Parameter(paramType, "s");
                MethodInfo minfo = typeof(HpcLinqHelper).GetMethod("IndexedCount");
                minfo = minfo.MakeGenericMethod(typeof(long));
                Expression body = Expression.Call(minfo, param);
                Type funcType = typeof(Func<,>).MakeGenericType(param.Type, body.Type);
                LambdaExpression indexedCountFunc = Expression.Lambda(funcType, body, param);
                DryadQueryNode indexedCountNode = new DryadApplyNode(indexedCountFunc, queryExpr, mergeCountNode);

                // HashPartition(x => x.index, parCount)
                param = Expression.Parameter(body.Type.GetGenericArguments()[0], "x");
                Expression keySelectBody = Expression.Property(param, "Index");
                funcType = typeof(Func<,>).MakeGenericType(param.Type, keySelectBody.Type);
                LambdaExpression keySelectExpr = Expression.Lambda(funcType, keySelectBody, param);
                DryadQueryNode distCountNode = new DryadHashPartitionNode(keySelectExpr,
                                                                          null,
                                                                          parCount,
                                                                          queryExpr,
                                                                          indexedCountNode);

                // Apply node for (x, y) => AddPartitionIndex(x, y, newParCount)
                ParameterExpression param1 = Expression.Parameter(body.Type, "x");
                Type paramType2 = typeof(IEnumerable<>).MakeGenericType(resNode.OutputTypes[0]);
                ParameterExpression param2 = Expression.Parameter(paramType2, "y");
                minfo = typeof(HpcLinqHelper).GetMethod("AddPartitionIndex");
                minfo = minfo.MakeGenericMethod(resNode.OutputTypes[0]);
                body = Expression.Call(minfo, param1, param2, Expression.Constant(newParCount));
                funcType = typeof(Func<,,>).MakeGenericType(param1.Type, param2.Type, body.Type);
                LambdaExpression addIndexFunc = Expression.Lambda(funcType, body, param1, param2);
                DryadQueryNode addIndexNode = new DryadApplyNode(addIndexFunc, queryExpr, distCountNode, resNode);

                // HashPartition(x => x.index, x => x.value, newParCount)
                param = Expression.Parameter(body.Type.GetGenericArguments()[0], "x");
                body = Expression.Property(param, "Index");
                funcType = typeof(Func<,>).MakeGenericType(param.Type, body.Type);
                keySelectExpr = Expression.Lambda(funcType, body, param);
                body = Expression.Property(param, "Value");
                funcType = typeof(Func<,>).MakeGenericType(param.Type, body.Type);
                LambdaExpression resultSelectExpr = Expression.Lambda(funcType, body, param);
                resNode = new DryadHashPartitionNode(keySelectExpr,
                                                     resultSelectExpr,
                                                     null,
                                                     newParCount,
                                                     false,
                                                     queryExpr,
                                                     addIndexNode);
                resNode = new DryadMergeNode(true, true, queryExpr, resNode);
            }
            return resNode;
        }
Пример #11
0
        private DryadQueryNode VisitApplyWithPartitionIndex(
                                    QueryNodeInfo source,
                                    LambdaExpression procLambda,
                                    Expression queryExpr)
        {
            // var indices = source.Apply(s => ValueZero(s)).Apply(s => AssignIndex(s));
            // indices.Apply(source, (x, y) => ProcessWithIndex(x, y, procFunc));
            DryadQueryNode child = this.Visit(source);
            if (child.IsDynamic)
            {
                throw new DryadLinqException("ApplyWithPartitionIndex is only supported for static partition count");
            }

            child.IsForked = true;

            // Apply node for s => ValueZero(s)
            Type paramType = typeof(IEnumerable<>).MakeGenericType(child.OutputTypes[0]);
            ParameterExpression param = Expression.Parameter(paramType, "s");
            MethodInfo minfo = typeof(HpcLinqHelper).GetMethod("ValueZero");
            minfo = minfo.MakeGenericMethod(child.OutputTypes[0]);
            Expression body = Expression.Call(minfo, param);
            Type funcType = typeof(Func<,>).MakeGenericType(param.Type, body.Type);
            LambdaExpression procFunc = Expression.Lambda(funcType, body, param);
            DryadQueryNode valueZeroNode = new DryadApplyNode(procFunc, queryExpr, child);
            valueZeroNode = new DryadMergeNode(true, true, queryExpr, valueZeroNode);

            // Apply node for s => AssignIndex(s)
            paramType = typeof(IEnumerable<>).MakeGenericType(typeof(int));
            param = Expression.Parameter(paramType, "s");
            minfo = typeof(HpcLinqHelper).GetMethod("AssignIndex");
            body = Expression.Call(minfo, param);
            funcType = typeof(Func<,>).MakeGenericType(param.Type, body.Type);
            procFunc = Expression.Lambda(funcType, body, param);
            DryadQueryNode assignIndexNode = new DryadApplyNode(procFunc, queryExpr, valueZeroNode);

            // HashPartition to distribute the indices -- one to each partition.
            int pcount = child.OutputPartition.Count;
            param = Expression.Parameter(body.Type, "x");
            funcType = typeof(Func<,>).MakeGenericType(param.Type, param.Type);
            LambdaExpression keySelectExpr = Expression.Lambda(funcType, param, param);
            DryadQueryNode hdistNode = new DryadHashPartitionNode(keySelectExpr,
                                                                  null,
                                                                  pcount,
                                                                  queryExpr,
                                                                  assignIndexNode);

            // Apply node for (x, y) => ProcessWithIndex(x, y, procLambda));
            Type paramType1 = typeof(IEnumerable<>).MakeGenericType(child.OutputTypes[0]);
            ParameterExpression param1 = Expression.Parameter(paramType1, HpcLinqCodeGen.MakeUniqueName("x"));
            Type paramType2 = typeof(IEnumerable<>).MakeGenericType(typeof(int));
            ParameterExpression param2 = Expression.Parameter(paramType2, HpcLinqCodeGen.MakeUniqueName("y"));
            minfo = typeof(HpcLinqHelper).GetMethod("ProcessWithIndex");
            minfo = minfo.MakeGenericMethod(child.OutputTypes[0], procLambda.Body.Type);
            body = Expression.Call(minfo, param1, param2, procLambda);
            funcType = typeof(Func<,,>).MakeGenericType(param1.Type, param2.Type, body.Type);
            procFunc = Expression.Lambda(funcType, body, param1, param2);
            return new DryadApplyNode(procFunc, queryExpr, child, hdistNode);
        }
Пример #12
0
        private DryadQueryNode VisitApply(QueryNodeInfo source1,
                                          QueryNodeInfo source2,
                                          LambdaExpression procLambda,
                                          bool perPartition,
                                          bool isFirstOnly,
                                          Expression queryExpr)
        {
            DryadQueryNode child1 = this.Visit(source1);

            DryadQueryNode applyNode;
            if (source2 == null)
            {
                // Unary-apply case:
                if (perPartition)
                {
                    //homomorphic
                    applyNode = this.PromoteConcat(source1, child1, x => new DryadApplyNode(procLambda, queryExpr, x));
                }
                else
                {
                    //non-homomorphic
                    if (child1.IsDynamic || child1.OutputPartition.Count > 1)
                    {
                        child1 = new DryadMergeNode(true, false, queryExpr, child1);
                    }
                    applyNode = new DryadApplyNode(procLambda, queryExpr, child1);
                }
            }
            else
            {
                // Binary-apply case:
                DryadQueryNode child2 = this.Visit(source2);

                if (perPartition && isFirstOnly)
                {
                    // The function is left homomorphic:
                    if (!child2.IsForked && (child1.IsDynamic || child1.OutputPartition.Count > 1))
                    {
                        // The normal cases..
                        if (IsMergeNodeNeeded(child2))
                        {
                            if (child1.IsDynamic)
                            {
                                child2 = new DryadMergeNode(true, false, queryExpr, child2);
                                child2.IsForked = true;
                            }
                            else
                            {
                                // Rather than do full merge and broadcast, which has lots of data-movement
                                //   1. Tee output2 with output cross-product
                                //   2. Do a merge-stage which will have input1.nPartition nodes each performing a merge.
                                //  This acheives a distribution of the entire input2 to the Apply nodes with least data-movement.
                                child2 = new DryadTeeNode(child2.OutputTypes[0], true, queryExpr, child2);
                                child2.ConOpType = ConnectionOpType.CrossProduct;
                                child2 = new DryadMergeNode(child1.OutputPartition.Count, queryExpr, child2);
                            }
                        }
                        else
                        {
                            // the right-data is alread a single partition, so just tee it.
                            // this will provide a copy to each of the apply nodes.
                            child2 = new DryadTeeNode(child2.OutputTypes[0], true, queryExpr, child2);
                        }
                    }
                    else
                    {
                        // Less common cases..
                        // a full merge of the right-data may be necessary.
                        if (child2.IsDynamic || child2.OutputPartition.Count > 1)
                        {
                            child2 = new DryadMergeNode(true, false, queryExpr, child2);
                            if (child1.IsDynamic || child1.OutputPartition.Count > 1)
                            {
                                child2.IsForked = true;
                            }
                        }
                    }
                    applyNode = new DryadApplyNode(procLambda, queryExpr, child1, child2);
                }
                else if (perPartition && !isFirstOnly && !child1.IsDynamic && !child2.IsDynamic)
                {
                    // Full homomorphic
                    // No merging occurs.
                    // NOTE: We generally expect that both the left and right datasets have matching partitionCount.
                    //       however, we don't test for it yet as users might know what they are doing, and it makes
                    //       LocalDebug inconsistent as LocalDebug doesn't throw in that situation.
                    applyNode = new DryadApplyNode(procLambda, queryExpr, child1, child2);
                }
                else
                {
                    // Non-homomorphic
                    // Full merges of both data sets is necessary.
                    if (child1.IsDynamic || child1.OutputPartition.Count > 1)
                    {
                        child1 = new DryadMergeNode(true, false, queryExpr, child1);
                    }
                    if (child2.IsDynamic || child2.OutputPartition.Count > 1)
                    {
                        child2 = new DryadMergeNode(true, false, queryExpr, child2);
                    }
                    applyNode = new DryadApplyNode(procLambda, queryExpr, child1, child2);
                }
            }
            return applyNode;
        }
Пример #13
0
        // Creates an "auto-sampling range-partition sub-query"
        private DryadQueryNode CreateRangePartition(bool isDynamic,
                                                    LambdaExpression keySelectExpr,
                                                    LambdaExpression resultSelectExpr,
                                                    Expression comparerExpr,
                                                    Expression isDescendingExpr,
                                                    Expression queryExpr,
                                                    Expression partitionCountExpr,
                                                    DryadQueryNode child)
        {
            // Make child a Tee node
            child.IsForked = true;

            // The partition count
            Expression countExpr = null;

            if (isDescendingExpr == null)
            {
                isDescendingExpr = Expression.Constant(false, typeof(bool));  //default for isDescending is false.
            }

            // NOTE: for MayRTM, isDynamic should never be true
            if (!isDynamic)
            {
                if (partitionCountExpr != null)
                {
                    countExpr = partitionCountExpr;
                }
                else
                {
                    // If partitionCount was not explicitly set, use the child's partition count.
                    countExpr = Expression.Constant(child.OutputPartition.Count);
                }
            }

            Type recordType = child.OutputTypes[0];
            Type keyType = keySelectExpr.Type.GetGenericArguments()[1];

            // Create x => Phase1Sampling(x_1, keySelector, denv)
            Type lambdaParamType1 = typeof(IEnumerable<>).MakeGenericType(recordType);
            ParameterExpression lambdaParam1 = Expression.Parameter(lambdaParamType1, "x_1");

            ParameterExpression denvParam = Expression.Parameter(typeof(HpcLinqVertexEnv), "denv");

            MethodInfo minfo = typeof(HpcLinqSampler).GetMethod("Phase1Sampling");
            Expression body = Expression.Call(minfo.MakeGenericMethod(recordType, keyType),
                                              lambdaParam1, keySelectExpr, denvParam);
            Type type = typeof(Func<,>).MakeGenericType(lambdaParam1.Type, body.Type);
            LambdaExpression samplingExpr = Expression.Lambda(type, body, lambdaParam1);

            // Create the Sampling node
            DryadApplyNode samplingNode = new DryadApplyNode(samplingExpr, queryExpr, child);

            // Create x => RangeSampler(x, keySelectExpr, comparer, isDescendingExpr)
            Type lambdaParamType = typeof(IEnumerable<>).MakeGenericType(keyType);
            ParameterExpression lambdaParam = Expression.Parameter(lambdaParamType, "x_2");

            //For RTM, isDynamic should never be true.
            //string methodName = (isDynamic) ? "RangeSampler_Dynamic" : "RangeSampler_Static";
            Debug.Assert(isDynamic == false, "Internal error: isDynamic is true.");
            string methodName = "RangeSampler_Static";

            minfo = typeof(HpcLinqSampler).GetMethod(methodName);
            minfo = minfo.MakeGenericMethod(keyType);
            Expression comparerArgExpr = comparerExpr;
            if (comparerExpr == null)
            {
                if (!TypeSystem.HasDefaultComparer(keyType))
                {
                    throw DryadLinqException.Create(HpcLinqErrorCode.ComparerMustBeSpecifiedOrKeyTypeMustBeIComparable,
                                                  string.Format(SR.ComparerMustBeSpecifiedOrKeyTypeMustBeIComparable, keyType),
                                                  queryExpr);
                }
                comparerArgExpr = Expression.Constant(null, typeof(IComparer<>).MakeGenericType(keyType));
            }

            Expression lastArg;
            if (isDynamic)
            {
                lastArg = denvParam;
            }
            else
            {
                lastArg = countExpr;
            }

            body = Expression.Call(minfo, lambdaParam, comparerArgExpr, isDescendingExpr, lastArg);
            type = typeof(Func<,>).MakeGenericType(lambdaParam.Type, body.Type);
            LambdaExpression samplerExpr = Expression.Lambda(type, body, lambdaParam);

            // Create the sample node
            DryadQueryNode sampleDataNode = new DryadMergeNode(false, true, queryExpr, samplingNode);
            DryadQueryNode sampleNode = new DryadApplyNode(samplerExpr, queryExpr, sampleDataNode);
            sampleNode.IsForked = true;

            // Create the range distribute node
            DryadQueryNode resNode = new DryadRangePartitionNode(keySelectExpr,
                                                                 resultSelectExpr,
                                                                 null,
                                                                 comparerExpr,
                                                                 isDescendingExpr,
                                                                 countExpr,
                                                                 queryExpr,
                                                                 child,
                                                                 sampleNode);
            resNode = new DryadMergeNode(false, true, queryExpr, resNode);

            // Set the dynamic manager for sampleNode
            if (isDynamic)
            {
                sampleDataNode.DynamicManager = new DynamicRangeDistributor(resNode);
            }

            return resNode;
        }
Пример #14
0
 internal virtual string Visit(DryadApplyNode node,
                               CodeMemberMethod vertexMethod,
                               string[] readerNames,
                               string[] writerNames)
 {
     return node.AddVertexCode(vertexMethod, readerNames, writerNames);
 }