VisitApplyWithPartitionIndex(QueryNodeInfo source, LambdaExpression procLambda, Expression queryExpr) { // The computation looks like this: // var indices = source.Apply(s => ValueZero(s)).Apply(s => AssignIndex(s)) // .HashPartition(x => x) // indices.Apply(source, (x, y) => ApplyWithPartitionIndex(x, y, procFunc)); DLinqQueryNode child = this.Visit(source); if (child.IsDynamic) { throw new DryadLinqException("ApplyWithPartitionIndex is only supported for static partition count"); } child.IsForked = true; // Apply node for s => ValueZero(s) Type paramType = typeof(IEnumerable<>).MakeGenericType(child.OutputTypes[0]); ParameterExpression param = Expression.Parameter(paramType, "s"); MethodInfo minfo = typeof(DryadLinqHelper).GetMethod("ValueZero"); minfo = minfo.MakeGenericMethod(child.OutputTypes[0]); Expression body = Expression.Call(minfo, param); Type funcType = typeof(Func<,>).MakeGenericType(param.Type, body.Type); LambdaExpression procFunc = Expression.Lambda(funcType, body, param); DLinqQueryNode valueZeroNode = new DLinqApplyNode(procFunc, queryExpr, child); valueZeroNode = new DLinqMergeNode(true, queryExpr, valueZeroNode); // Apply node for s => AssignIndex(s) paramType = typeof(IEnumerable<>).MakeGenericType(typeof(int)); param = Expression.Parameter(paramType, "s"); minfo = typeof(DryadLinqHelper).GetMethod("AssignIndex"); body = Expression.Call(minfo, param); funcType = typeof(Func<,>).MakeGenericType(param.Type, body.Type); procFunc = Expression.Lambda(funcType, body, param); DLinqQueryNode assignIndexNode = new DLinqApplyNode(procFunc, queryExpr, valueZeroNode); // HashPartition to distribute the indices -- one to each partition. int pcount = child.OutputPartition.Count; param = Expression.Parameter(body.Type, "x"); funcType = typeof(Func<,>).MakeGenericType(param.Type, param.Type); LambdaExpression keySelectExpr = Expression.Lambda(funcType, param, param); DLinqQueryNode hdistNode = new DLinqHashPartitionNode(keySelectExpr, null, pcount, queryExpr, assignIndexNode); // Apply node for (x, y) => ApplyWithPartitionIndex(x, y, procLambda)); Type paramType1 = typeof(IEnumerable<>).MakeGenericType(child.OutputTypes[0]); ParameterExpression param1 = Expression.Parameter(paramType1, DryadLinqCodeGen.MakeUniqueName("x")); Type paramType2 = typeof(IEnumerable<>).MakeGenericType(typeof(int)); ParameterExpression param2 = Expression.Parameter(paramType2, DryadLinqCodeGen.MakeUniqueName("y")); minfo = typeof(DryadLinqHelper).GetMethod("ProcessWithIndex"); minfo = minfo.MakeGenericMethod(child.OutputTypes[0], procLambda.Body.Type.GetGenericArguments()[0]); body = Expression.Call(minfo, param1, param2, procLambda); funcType = typeof(Func<,,>).MakeGenericType(param1.Type, param2.Type, body.Type); procFunc = Expression.Lambda(funcType, body, param1, param2); return new DLinqApplyNode(procFunc, queryExpr, child, hdistNode); }
internal virtual string Visit(DLinqApplyNode node, CodeMemberMethod vertexMethod, string[] readerNames, string[] writerNames) { return node.AddVertexCode(vertexMethod, readerNames, writerNames); }
private DLinqQueryNode VisitMultiApply(QueryNodeInfo source, LambdaExpression procLambda, bool perPartition, bool isFirstOnly, MethodCallExpression queryExpr) { DLinqQueryNode[] childs = new DLinqQueryNode[source.Children.Count]; for (int i = 0; i < source.Children.Count; ++i) { childs[i] = this.Visit(source.Children[i].Child); } bool isDynamic = childs.Any(x => x.IsDynamic); if (perPartition && !isDynamic) { // Homomorphic case. if (isFirstOnly) { for (int i = 1; i < childs.Length; ++i) { childs[i] = new DLinqTeeNode(childs[i].OutputTypes[0], true, queryExpr, childs[i]); childs[i].ConOpType = ConnectionOpType.CrossProduct; childs[i] = new DLinqMergeNode(childs[0].OutputPartition.Count, queryExpr, childs[i]); } } else { int count = childs[0].OutputPartition.Count; for (int i = 1; i < childs.Length; ++i) { if (childs[i].OutputPartition.Count != count) { throw DryadLinqException.Create(DryadLinqErrorCode.HomomorphicApplyNeedsSamePartitionCount, SR.HomomorphicApplyNeedsSamePartitionCount, queryExpr); } } } } else { // Non-homomorphic case. for (int i = 0; i < childs.Length; ++i) { if (childs[i].IsDynamic || childs[i].OutputPartition.Count > 1) { childs[i] = new DLinqMergeNode(true, queryExpr, childs[i]); } } } DLinqQueryNode applyNode = new DLinqApplyNode(procLambda, true, queryExpr, childs); return applyNode; }
private DLinqQueryNode VisitSlidingWindow(QueryNodeInfo source, LambdaExpression procLambda, Expression windowSizeExpr, Expression queryExpr) { // var windows = source.Apply(s => DryadLinqHelper.Last(s, windowSize)); // var slided = windows.Apply(s => DryadLinqHelper.Slide(s)).HashPartition(x => x.Index); // slided.Apply(source, (x, y) => DryadLinqHelper.ProcessWindows(x, y, procFunc, windowSize)); DLinqQueryNode child = this.Visit(source); if (child.IsDynamic) { throw new DryadLinqException("SlidingWindow is only supported for static partition count"); } ExpressionSimplifier<int> evaluator = new ExpressionSimplifier<int>(); Expression windowSize = Expression.Constant(evaluator.Eval(windowSizeExpr), typeof(int)); child.IsForked = true; // Apply node for s => Last(s, windowSize) Type paramType = typeof(IEnumerable<>).MakeGenericType(child.OutputTypes[0]); ParameterExpression param = Expression.Parameter(paramType, DryadLinqCodeGen.MakeUniqueName("s")); MethodInfo minfo = typeof(DryadLinqHelper).GetMethod("Last"); minfo = minfo.MakeGenericMethod(child.OutputTypes[0]); Expression body = Expression.Call(minfo, param, windowSize); Type funcType = typeof(Func<,>).MakeGenericType(param.Type, body.Type); LambdaExpression procFunc = Expression.Lambda(funcType, body, param); DLinqQueryNode lastNode = new DLinqApplyNode(procFunc, queryExpr, child); lastNode = new DLinqMergeNode(true, queryExpr, lastNode); // Apply node for s => Slide(s) param = Expression.Parameter(body.Type, DryadLinqCodeGen.MakeUniqueName("s")); minfo = typeof(DryadLinqHelper).GetMethod("Slide"); minfo = minfo.MakeGenericMethod(child.OutputTypes[0]); body = Expression.Call(minfo, param); funcType = typeof(Func<,>).MakeGenericType(param.Type, body.Type); procFunc = Expression.Lambda(funcType, body, param); DLinqQueryNode slideNode = new DLinqApplyNode(procFunc, queryExpr, lastNode); // Hash partition to distribute from partition i to i+1 int pcount = child.OutputPartition.Count; param = Expression.Parameter(body.Type.GetGenericArguments()[0], "x"); Expression keySelectBody = Expression.Property(param, "Index"); funcType = typeof(Func<,>).MakeGenericType(param.Type, keySelectBody.Type); LambdaExpression keySelectExpr = Expression.Lambda(funcType, keySelectBody, param); DLinqQueryNode hdistNode = new DLinqHashPartitionNode(keySelectExpr, null, pcount, queryExpr, slideNode); // Apply node for (x, y) => ProcessWindows(x, y, proclambda, windowSize) Type paramType1 = typeof(IEnumerable<>).MakeGenericType(body.Type); ParameterExpression param1 = Expression.Parameter(paramType1, DryadLinqCodeGen.MakeUniqueName("x")); Type paramType2 = typeof(IEnumerable<>).MakeGenericType(child.OutputTypes[0]); ParameterExpression param2 = Expression.Parameter(paramType2, DryadLinqCodeGen.MakeUniqueName("y")); minfo = typeof(DryadLinqHelper).GetMethod("ProcessWindows"); minfo = minfo.MakeGenericMethod(child.OutputTypes[0], procLambda.Body.Type); body = Expression.Call(minfo, param1, param2, procLambda, windowSize); funcType = typeof(Func<,,>).MakeGenericType(param1.Type, param2.Type, body.Type); procFunc = Expression.Lambda(funcType, body, param1, param2); return new DLinqApplyNode(procFunc, queryExpr, hdistNode, child); }
// Basic plan: (reverse all partitions) then (reverse data in each partition) // The main complication is to perform the first step. // Approach: // - tee the input. // - have a dummy apply node that produces the singleton {0} at each partition // - merge to get a seq {0,0,..} whose length = nPartition. // - convert that seq to { (0,n), (1,n), ...} // - hash-partition to send one item to each of the n workers. // - use binary-apply to attach targetIndex to each source item // Apply( seq1 = indexCountPair, seq2 = original data) => ({tgt, item0}, {tgt, item1}, .. ) // - hash-partition to move items to target partition. // - use local LINQ reverse to do the local data reversal. private DLinqQueryNode VisitReverse(QueryNodeInfo source, Expression queryExpr) { DLinqQueryNode child = this.Visit(source); if (child.IsDynamic) { throw new DryadLinqException("Reverse is only supported for static partition count"); } child.IsForked = true; // Apply node for s => ValueZero(s) Type paramType = typeof(IEnumerable<>).MakeGenericType(child.OutputTypes[0]); ParameterExpression param = Expression.Parameter(paramType, "s"); MethodInfo minfo = typeof(DryadLinqHelper).GetMethod("ValueZero"); minfo = minfo.MakeGenericMethod(child.OutputTypes[0]); Expression body = Expression.Call(minfo, param); Type funcType = typeof(Func<,>).MakeGenericType(param.Type, body.Type); LambdaExpression procFunc = Expression.Lambda(funcType, body, param); DLinqQueryNode valueZeroNode = new DLinqApplyNode(procFunc, queryExpr, child); // Apply node for s => ReverseIndex(s) paramType = typeof(IEnumerable<>).MakeGenericType(typeof(int)); param = Expression.Parameter(paramType, "s"); minfo = typeof(DryadLinqHelper).GetMethod("MakeIndexCountPairs"); body = Expression.Call(minfo, param); funcType = typeof(Func<,>).MakeGenericType(param.Type, body.Type); procFunc = Expression.Lambda(funcType, body, param); DLinqQueryNode mergeZeroNode = new DLinqMergeNode(true, queryExpr, valueZeroNode); DLinqQueryNode indexCountNode = new DLinqApplyNode(procFunc, queryExpr, mergeZeroNode); // HashPartition to distribute the indexCounts -- one to each partition. // each partition will receive (myPartitionID, pcount). int pcount = child.OutputPartition.Count; param = Expression.Parameter(body.Type.GetGenericArguments()[0], "x"); Expression keySelectBody = Expression.Property(param, "Index"); funcType = typeof(Func<,>).MakeGenericType(param.Type, keySelectBody.Type); LambdaExpression keySelectExpr = Expression.Lambda(funcType, keySelectBody, param); DLinqQueryNode hdistNode = new DLinqHashPartitionNode(keySelectExpr, null, pcount, queryExpr, indexCountNode); // Apply node for (x, y) => AddIndexForReverse(x, y) ParameterExpression param1 = Expression.Parameter(body.Type, "x"); Type paramType2 = typeof(IEnumerable<>).MakeGenericType(child.OutputTypes[0]); ParameterExpression param2 = Expression.Parameter(paramType2, "y"); minfo = typeof(DryadLinqHelper).GetMethod("AddIndexForReverse"); minfo = minfo.MakeGenericMethod(child.OutputTypes[0]); body = Expression.Call(minfo, param1, param2); funcType = typeof(Func<,,>).MakeGenericType(param1.Type, param2.Type, body.Type); LambdaExpression addIndexFunc = Expression.Lambda(funcType, body, param1, param2); DLinqQueryNode addIndexNode = new DLinqApplyNode(addIndexFunc, queryExpr, hdistNode, child); // HashPartition(x => x.index, x => x.value, pcount) // Moves all data to correct target partition. (each worker will direct all its items to one target partition) param = Expression.Parameter(body.Type.GetGenericArguments()[0], "x"); body = Expression.Property(param, "Index"); funcType = typeof(Func<,>).MakeGenericType(param.Type, body.Type); keySelectExpr = Expression.Lambda(funcType, body, param); body = Expression.Property(param, "Value"); funcType = typeof(Func<,>).MakeGenericType(param.Type, body.Type); LambdaExpression resultSelectExpr = Expression.Lambda(funcType, body, param); DLinqQueryNode reversePartitionNode = new DLinqHashPartitionNode( keySelectExpr, resultSelectExpr, null, pcount, false, queryExpr, addIndexNode); // Reverse node paramType = typeof(IEnumerable<>).MakeGenericType(reversePartitionNode.OutputTypes[0]); param = Expression.Parameter(paramType, "x"); minfo = typeof(DryadLinqVertex).GetMethod("Reverse"); minfo = minfo.MakeGenericMethod(child.OutputTypes[0]); body = Expression.Call(minfo, param); funcType = typeof(Func<,>).MakeGenericType(param.Type, body.Type); procFunc = Expression.Lambda(funcType, body, param); DLinqQueryNode resNode = new DLinqMergeNode(true, queryExpr, reversePartitionNode); resNode = new DLinqApplyNode(procFunc, queryExpr, resNode); return resNode; }
private DLinqQueryNode VisitApply(QueryNodeInfo source1, QueryNodeInfo source2, LambdaExpression procLambda, bool perPartition, bool isFirstOnly, Expression queryExpr) { DLinqQueryNode child1 = this.Visit(source1); DLinqQueryNode applyNode; if (source2 == null) { // Unary-apply case: if (perPartition) { // homomorphic applyNode = this.PromoteConcat(source1, child1, x => new DLinqApplyNode(procLambda, queryExpr, x)); } else { // non-homomorphic if (child1.IsDynamic || child1.OutputPartition.Count > 1) { child1 = new DLinqMergeNode(true, queryExpr, child1); } applyNode = new DLinqApplyNode(procLambda, queryExpr, child1); } } else { // Binary-apply case: DLinqQueryNode child2 = this.Visit(source2); if (perPartition && isFirstOnly) { // The function is left homomorphic: if (child1.IsDynamic || child1.OutputPartition.Count > 1) { // The normal cases.. if (IsMergeNodeNeeded(child2)) { if (child1.IsDynamic) { child2 = new DLinqMergeNode(true, queryExpr, child2); child2.IsForked = true; } else { // Rather than do full merge and broadcast, which has lots of data-movement // 1. Tee output2 with output cross-product // 2. Do a merge-stage which will have input1.nPartition nodes each performing a merge. // This acheives a distribution of the entire input2 to the Apply nodes with least data-movement. child2 = new DLinqTeeNode(child2.OutputTypes[0], true, queryExpr, child2); child2.ConOpType = ConnectionOpType.CrossProduct; child2 = new DLinqMergeNode(child1.OutputPartition.Count, queryExpr, child2); } } else { // the right-data is alread a single partition, so just tee it. // this will provide a copy to each of the apply nodes. child2 = new DLinqTeeNode(child2.OutputTypes[0], true, queryExpr, child2); } } else { // a full merge of the right-data may be necessary. if (child2.IsDynamic || child2.OutputPartition.Count > 1) { child2 = new DLinqMergeNode(true, queryExpr, child2); if (child1.IsDynamic || child1.OutputPartition.Count > 1) { child2.IsForked = true; } } } applyNode = new DLinqApplyNode(procLambda, queryExpr, child1, child2); } else if (perPartition && !isFirstOnly && !child1.IsDynamic && !child2.IsDynamic) { // Full homomorphic // No merging occurs. // NOTE: We generally expect that both the left and right datasets have matching partitionCount. // however, we don't test for it yet as users might know what they are doing, and it makes // LocalDebug inconsistent as LocalDebug doesn't throw in that situation. applyNode = new DLinqApplyNode(procLambda, queryExpr, child1, child2); } else { // Non-homomorphic // Full merges of both data sets is necessary. if (child1.IsDynamic || child1.OutputPartition.Count > 1) { child1 = new DLinqMergeNode(true, queryExpr, child1); } if (child2.IsDynamic || child2.OutputPartition.Count > 1) { child2 = new DLinqMergeNode(true, queryExpr, child2); } applyNode = new DLinqApplyNode(procLambda, queryExpr, child1, child2); } } return applyNode; }
private DLinqQueryNode VisitZip(QueryNodeInfo first, QueryNodeInfo second, LambdaExpression resultSelector, MethodCallExpression queryExpr) { DLinqQueryNode child1 = this.Visit(first); DLinqQueryNode child2 = this.Visit(second); if (child1.IsDynamic || child2.IsDynamic) { // Well, let us for now do it on a single machine child1 = new DLinqMergeNode(true, queryExpr, child1); child2 = new DLinqMergeNode(true, queryExpr, child2); // Apply node for (x, y) => Zip(x, y, resultSelector) Type paramType1 = typeof(IEnumerable<>).MakeGenericType(child1.OutputTypes[0]); ParameterExpression param1 = Expression.Parameter(paramType1, "s1"); Type paramType2 = typeof(IEnumerable<>).MakeGenericType(child2.OutputTypes[0]); ParameterExpression param2 = Expression.Parameter(paramType2, "s2"); MethodInfo minfo = typeof(DryadLinqHelper).GetMethod("Zip"); minfo = minfo.MakeGenericMethod(child1.OutputTypes[0]); Expression body = Expression.Call(minfo, param1, param2, resultSelector); Type funcType = typeof(Func<,>).MakeGenericType(param1.Type, param2.Type, body.Type); LambdaExpression procFunc = Expression.Lambda(funcType, body, param1, param2); return new DLinqApplyNode(procFunc, queryExpr, child1, child2); } else { int parCount1 = child1.OutputPartition.Count; int parCount2 = child2.OutputPartition.Count; // Count nodes DLinqQueryNode countNode1 = new DLinqBasicAggregateNode(null, AggregateOpType.LongCount, true, false, queryExpr, child1); DLinqQueryNode countNode2 = new DLinqBasicAggregateNode(null, AggregateOpType.LongCount, true, false, queryExpr, child2); countNode1 = new DLinqMergeNode(true, queryExpr, countNode1); countNode2 = new DLinqMergeNode(true, queryExpr, countNode2); // Apply node for (x, y) => ZipCount(x, y) Type paramType1 = typeof(IEnumerable<>).MakeGenericType(typeof(long)); ParameterExpression param1 = Expression.Parameter(paramType1, "x"); ParameterExpression param2 = Expression.Parameter(paramType1, "y"); MethodInfo minfo = typeof(DryadLinqHelper).GetMethod("ZipCount"); Expression body = Expression.Call(minfo, param1, param2); Type funcType = typeof(Func<,,>).MakeGenericType(param1.Type, param2.Type, body.Type); LambdaExpression zipCount = Expression.Lambda(funcType, body, param1, param2); DLinqQueryNode indexedCountNode = new DLinqApplyNode(zipCount, queryExpr, countNode1, countNode2); // HashPartition(x => x.index, parCount2) ParameterExpression param = Expression.Parameter(body.Type.GetGenericArguments()[0], "x"); Expression keySelectBody = Expression.Property(param, "Index"); funcType = typeof(Func<,>).MakeGenericType(param.Type, keySelectBody.Type); LambdaExpression keySelectExpr = Expression.Lambda(funcType, keySelectBody, param); DLinqQueryNode distCountNode = new DLinqHashPartitionNode(keySelectExpr, null, parCount2, queryExpr, indexedCountNode); // Apply node for (x, y) => AssignPartitionIndex(x, y) param1 = Expression.Parameter(body.Type, "x"); Type paramType2 = typeof(IEnumerable<>).MakeGenericType(child2.OutputTypes[0]); param2 = Expression.Parameter(paramType2, "y"); minfo = typeof(DryadLinqHelper).GetMethod("AssignPartitionIndex"); minfo = minfo.MakeGenericMethod(child2.OutputTypes[0]); body = Expression.Call(minfo, param1, param2); funcType = typeof(Func<,,>).MakeGenericType(param1.Type, param2.Type, body.Type); LambdaExpression assignIndex = Expression.Lambda(funcType, body, param1, param2); DLinqQueryNode addIndexNode = new DLinqApplyNode(assignIndex, queryExpr, distCountNode, child2); // HashPartition(x => x.index, x => x.value, parCount1) param = Expression.Parameter(body.Type.GetGenericArguments()[0], "x"); body = Expression.Property(param, "Index"); funcType = typeof(Func<,>).MakeGenericType(param.Type, body.Type); keySelectExpr = Expression.Lambda(funcType, body, param); body = Expression.Property(param, "Value"); funcType = typeof(Func<,>).MakeGenericType(param.Type, body.Type); LambdaExpression resultSelectExpr = Expression.Lambda(funcType, body, param); DLinqQueryNode newChild2 = new DLinqHashPartitionNode(keySelectExpr, resultSelectExpr, null, parCount1, false, queryExpr, addIndexNode); newChild2 = new DLinqMergeNode(true, queryExpr, newChild2); // Finally the zip node return new DLinqZipNode(resultSelector, queryExpr, child1, newChild2); } }
// Phase 1 of the query optimization internal void GenerateQueryPlanPhase1() { if (this.m_queryPlan1 != null) return; // Apply some simple rewrite rules SimpleRewriter rewriter = new SimpleRewriter(this.m_exprNodeInfoMap.Values.ToList()); rewriter.Rewrite(); // Generate the query plan of phase1 var referencedNodes = this.m_referencedQueryMap.Values; this.m_queryPlan1 = new DLinqQueryNode[this.m_queryExprs.Length + referencedNodes.Count]; for (int i = 0; i < this.m_queryExprs.Length; i++) { this.m_queryPlan1[i] = this.Visit(this.m_queryNodeInfos[i].Children[0].Child); } int idx = this.m_queryExprs.Length; foreach (DummyQueryNodeInfo nodeInfo in referencedNodes) { if (nodeInfo.NeedsMerge) { // Add a Tee'd Merge this.m_queryPlan1[idx] = this.Visit(nodeInfo.Children[0].Child); DLinqQueryNode mergeNode = new DLinqMergeNode(true, nodeInfo.QueryExpression, this.m_queryPlan1[idx]); this.m_queryPlan1[idx] = new DLinqTeeNode(mergeNode.OutputTypes[0], true, mergeNode.QueryExpression, mergeNode); } else { this.m_queryPlan1[idx] = this.Visit(nodeInfo.Children[0].Child); } nodeInfo.QueryNode = this.m_queryPlan1[idx]; idx++; } // Finally, add the output nodes. Dictionary<DLinqQueryNode, int> forkCounts = new Dictionary<DLinqQueryNode, int>(); for (int i = 0; i < this.m_queryExprs.Length; i++) { DLinqQueryNode queryNode = this.m_queryPlan1[i]; int cnt; if (!forkCounts.TryGetValue(queryNode, out cnt)) { cnt = queryNode.Parents.Count; } forkCounts[queryNode] = cnt + 1; } for (int i = 0; i < this.m_queryExprs.Length; i++) { DryadLinqClientLog.Add("Query " + i + " Output: " + this.m_outputTableUris[i]); DLinqQueryNode queryNode = this.m_queryPlan1[i]; if (TypeSystem.IsAnonymousType(queryNode.OutputTypes[0])) { throw new DryadLinqException(DryadLinqErrorCode.OutputTypeCannotBeAnonymous, SR.OutputTypeCannotBeAnonymous); } if (this.m_serializers[i] != null) { // Add an Apply for the serializer if it is not null LambdaExpression serializer = DryadLinqExpression.GetLambda(this.m_serializers[i]); DLinqQueryNode applyNode = new DLinqApplyNode(serializer, this.m_queryExprs[i], queryNode); applyNode.OutputDataSetInfo = queryNode.OutputDataSetInfo; queryNode = applyNode; } else { // Add dummy Apply to make Dryad happy (it doesn't like to hook inputs straight to outputs) if ((queryNode is DLinqInputNode) || (forkCounts[queryNode] > 1)) { // Add a dummy Apply Type paramType = typeof(IEnumerable<>).MakeGenericType(queryNode.OutputTypes[0]); ParameterExpression param = Expression.Parameter(paramType, "x"); Type type = typeof(Func<,>).MakeGenericType(paramType, paramType); LambdaExpression applyExpr = Expression.Lambda(type, param, param); DLinqQueryNode applyNode = new DLinqApplyNode(applyExpr, this.m_queryExprs[i], queryNode); applyNode.OutputDataSetInfo = queryNode.OutputDataSetInfo; queryNode = applyNode; } if (queryNode is DLinqConcatNode) { // Again, we add dummy Apply in certain cases to make Dryad happy ((DLinqConcatNode)queryNode).FixInputs(); } } // Add the output node CompressionScheme outputScheme = this.m_context.OutputDataCompressionScheme; DLinqOutputNode outputNode = new DLinqOutputNode(this.m_context, this.m_outputTableUris[i], this.m_isTempOutput[i], outputScheme, this.m_queryExprs[i], queryNode); this.m_queryPlan1[i] = outputNode; string outputUri = this.m_outputTableUris[i].AbsoluteUri.ToLower(); if (this.m_outputUriMap.ContainsKey(outputUri)) { throw new DryadLinqException(DryadLinqErrorCode.MultipleOutputsWithSameDscUri, String.Format(SR.MultipleOutputsWithSameUri, this.m_outputTableUris[i])); } this.m_outputUriMap.Add(outputUri, outputNode); this.m_outputTypes[i] = this.m_queryPlan1[i].OutputTypes[0]; // Remove useless Tees to make Dryad happy if ((queryNode is DLinqTeeNode) && (forkCounts[queryNode] == 1)) { DLinqQueryNode teeChild = queryNode.Children[0]; teeChild.UpdateParent(queryNode, outputNode); outputNode.UpdateChildren(queryNode, teeChild); } } }
// Creates an "auto-sampling range-partition sub-query" private DLinqQueryNode CreateRangePartition(bool isDynamic, LambdaExpression keySelectExpr, LambdaExpression resultSelectExpr, Expression comparerExpr, Expression isDescendingExpr, Expression queryExpr, Expression partitionCountExpr, DLinqQueryNode child) { // Make child a Tee node child.IsForked = true; // The partition count Expression countExpr = null; if (isDescendingExpr == null) { isDescendingExpr = Expression.Constant(false, typeof(bool)); //default for isDescending is false. } // NOTE: for MayRTM, isDynamic should never be true if (!isDynamic) { if (partitionCountExpr != null) { countExpr = partitionCountExpr; } else { // If partitionCount was not explicitly set, use the child's partition count. countExpr = Expression.Constant(child.OutputPartition.Count); } } Type recordType = child.OutputTypes[0]; Type keyType = keySelectExpr.Type.GetGenericArguments()[1]; // Create x => Phase1Sampling(x_1, keySelector, denv) Type lambdaParamType1 = typeof(IEnumerable<>).MakeGenericType(recordType); ParameterExpression lambdaParam1 = Expression.Parameter(lambdaParamType1, "x_1"); ParameterExpression denvParam = Expression.Parameter(typeof(VertexEnv), "denv"); MethodInfo minfo = typeof(DryadLinqSampler).GetMethod("Phase1Sampling"); Expression body = Expression.Call(minfo.MakeGenericMethod(recordType, keyType), lambdaParam1, keySelectExpr, denvParam); Type type = typeof(Func<,>).MakeGenericType(lambdaParam1.Type, body.Type); LambdaExpression samplingExpr = Expression.Lambda(type, body, lambdaParam1); // Create the Sampling node DLinqApplyNode samplingNode = new DLinqApplyNode(samplingExpr, queryExpr, child); // Create x => RangeSampler(x, keySelectExpr, comparer, isDescendingExpr) Type lambdaParamType = typeof(IEnumerable<>).MakeGenericType(keyType); ParameterExpression lambdaParam = Expression.Parameter(lambdaParamType, "x_2"); //For RTM, isDynamic should never be true. //string methodName = (isDynamic) ? "RangeSampler_Dynamic" : "RangeSampler_Static"; Debug.Assert(isDynamic == false, "Internal error: isDynamic is true."); string methodName = "RangeSampler_Static"; minfo = typeof(DryadLinqSampler).GetMethod(methodName); minfo = minfo.MakeGenericMethod(keyType); Expression comparerArgExpr = comparerExpr; if (comparerExpr == null) { if (!TypeSystem.HasDefaultComparer(keyType)) { throw DryadLinqException.Create(DryadLinqErrorCode.ComparerMustBeSpecifiedOrKeyTypeMustBeIComparable, string.Format(SR.ComparerMustBeSpecifiedOrKeyTypeMustBeIComparable, keyType), queryExpr); } comparerArgExpr = Expression.Constant(null, typeof(IComparer<>).MakeGenericType(keyType)); } Expression lastArg; if (isDynamic) { lastArg = denvParam; } else { lastArg = countExpr; } body = Expression.Call(minfo, lambdaParam, comparerArgExpr, isDescendingExpr, lastArg); type = typeof(Func<,>).MakeGenericType(lambdaParam.Type, body.Type); LambdaExpression samplerExpr = Expression.Lambda(type, body, lambdaParam); // Create the sample node DLinqQueryNode sampleDataNode = new DLinqMergeNode(false, queryExpr, samplingNode); DLinqQueryNode sampleNode = new DLinqApplyNode(samplerExpr, queryExpr, sampleDataNode); sampleNode.IsForked = true; // Create the range distribute node DLinqQueryNode resNode = new DLinqRangePartitionNode(keySelectExpr, resultSelectExpr, null, comparerExpr, isDescendingExpr, countExpr, queryExpr, child, sampleNode); resNode = new DLinqMergeNode(false, queryExpr, resNode); // Set the dynamic manager for sampleNode if (isDynamic) { sampleDataNode.DynamicManager = new DynamicRangeDistributor(resNode); } return resNode; }
private DLinqQueryNode VisitPartitionOp(string opName, QueryNodeInfo source, QueryNodeType nodeType, Expression controlExpr, MethodCallExpression queryExpr) { DLinqQueryNode resNode; if (nodeType == QueryNodeType.TakeWhile && controlExpr.Type.GetGenericArguments().Length != 2) { // The "indexed" version. resNode = this.Visit(source); // The following block used to be skipped for resNode.OutputPartition.Count == 1, // which causes compilation error (bug 13593) // @@TODO[p3] : implement a working optimization for nPartition==1 that calls // directly to Linq TakeWhile. // Note: the test is: if (resNode.IsDynamic || resNode.OutputPartition.Count > 1) { resNode.IsForked = true; bool isLong = (queryExpr.Method.Name == "LongTakeWhile"); DLinqQueryNode offsetNode = this.CreateOffset(isLong, queryExpr, resNode); // Create (x, y) => GroupIndexedTakeWhile(x, y, controlExpr) Type ptype1 = typeof(IEnumerable<>).MakeGenericType(resNode.OutputTypes[0]); Type ptype2 = typeof(IEnumerable<>).MakeGenericType(typeof(IndexedValue<long>)); ParameterExpression param1 = Expression.Parameter(ptype1, DryadLinqCodeGen.MakeUniqueName("x")); ParameterExpression param2 = Expression.Parameter(ptype2, DryadLinqCodeGen.MakeUniqueName("y")); string methodName = "GroupIndexed" + queryExpr.Method.Name; MethodInfo minfo = typeof(DryadLinqEnumerable).GetMethod(methodName); minfo = minfo.MakeGenericMethod(resNode.OutputTypes[0]); Expression body = Expression.Call(minfo, param1, param2, controlExpr); Type type = typeof(Func<,,>).MakeGenericType(ptype1, ptype2, body.Type); LambdaExpression procFunc = Expression.Lambda(type, body, param1, param2); resNode = new DLinqApplyNode(procFunc, queryExpr, resNode, offsetNode); } } else if (!source.IsForked && (nodeType == QueryNodeType.Take || nodeType == QueryNodeType.TakeWhile) && (source.OperatorName == "OrderBy" || source.OperatorName == "OrderByDescending")) { resNode = this.Visit(source.Children[0].Child); bool isDescending = (source.OperatorName == "OrderByDescending"); MethodCallExpression sourceQueryExpr = (MethodCallExpression)source.QueryExpression; LambdaExpression keySelectExpr = DryadLinqExpression.GetLambda(sourceQueryExpr.Arguments[1]); Expression comparerExpr = null; if (sourceQueryExpr.Arguments.Count == 3) { comparerExpr = sourceQueryExpr.Arguments[2]; } resNode = this.PromoteConcat( source.Children[0].Child, resNode, delegate(DLinqQueryNode x) { DLinqQueryNode y = new DLinqOrderByNode(keySelectExpr, comparerExpr, isDescending, sourceQueryExpr, x); return FirstStagePartitionOp(opName, nodeType, controlExpr, queryExpr, y); }); if (resNode.IsDynamic || resNode.OutputPartition.Count > 1) { // Need a mergesort resNode = new DLinqMergeNode(keySelectExpr, comparerExpr, isDescending, sourceQueryExpr, resNode); } } else { resNode = this.Visit(source); if (nodeType == QueryNodeType.Take || nodeType == QueryNodeType.TakeWhile) { resNode = this.PromoteConcat( source, resNode, x => FirstStagePartitionOp(opName, nodeType, controlExpr, queryExpr, x)); } } resNode = new DLinqPartitionOpNode(opName, nodeType, controlExpr, false, queryExpr, resNode); return resNode; }
private DLinqQueryNode VisitConcat(QueryNodeInfo source, MethodCallExpression queryExpr) { DLinqQueryNode[] childs = new DLinqQueryNode[source.Children.Count]; for (int i = 0; i < source.Children.Count; ++i) { childs[i] = this.Visit(source.Children[i].Child); } DLinqQueryNode resNode = new DLinqConcatNode(queryExpr, childs); int parCount = resNode.OutputPartition.Count; if (!resNode.IsDynamic && parCount > StaticConfig.MaxPartitionCount) { // Too many partitions, need to repartition int newParCount = parCount / 2; DLinqQueryNode countNode = new DLinqBasicAggregateNode(null, AggregateOpType.LongCount, true, false, queryExpr, resNode); DLinqQueryNode mergeCountNode = new DLinqMergeNode(true, queryExpr, countNode); // Apply node for s => IndexedCount(s) Type paramType = typeof(IEnumerable<>).MakeGenericType(typeof(long)); ParameterExpression param = Expression.Parameter(paramType, "s"); MethodInfo minfo = typeof(DryadLinqHelper).GetMethod("IndexedCount"); minfo = minfo.MakeGenericMethod(typeof(long)); Expression body = Expression.Call(minfo, param); Type funcType = typeof(Func<,>).MakeGenericType(param.Type, body.Type); LambdaExpression indexedCountFunc = Expression.Lambda(funcType, body, param); DLinqQueryNode indexedCountNode = new DLinqApplyNode(indexedCountFunc, queryExpr, mergeCountNode); // HashPartition(x => x.index, parCount) param = Expression.Parameter(body.Type.GetGenericArguments()[0], "x"); Expression keySelectBody = Expression.Property(param, "Index"); funcType = typeof(Func<,>).MakeGenericType(param.Type, keySelectBody.Type); LambdaExpression keySelectExpr = Expression.Lambda(funcType, keySelectBody, param); DLinqQueryNode distCountNode = new DLinqHashPartitionNode(keySelectExpr, null, parCount, queryExpr, indexedCountNode); // Apply node for (x, y) => AddPartitionIndex(x, y, newParCount) ParameterExpression param1 = Expression.Parameter(body.Type, "x"); Type paramType2 = typeof(IEnumerable<>).MakeGenericType(resNode.OutputTypes[0]); ParameterExpression param2 = Expression.Parameter(paramType2, "y"); minfo = typeof(DryadLinqHelper).GetMethod("AddPartitionIndex"); minfo = minfo.MakeGenericMethod(resNode.OutputTypes[0]); body = Expression.Call(minfo, param1, param2, Expression.Constant(newParCount)); funcType = typeof(Func<,,>).MakeGenericType(param1.Type, param2.Type, body.Type); LambdaExpression addIndexFunc = Expression.Lambda(funcType, body, param1, param2); DLinqQueryNode addIndexNode = new DLinqApplyNode(addIndexFunc, queryExpr, distCountNode, resNode); // HashPartition(x => x.index, x => x.value, newParCount) param = Expression.Parameter(body.Type.GetGenericArguments()[0], "x"); body = Expression.Property(param, "Index"); funcType = typeof(Func<,>).MakeGenericType(param.Type, body.Type); keySelectExpr = Expression.Lambda(funcType, body, param); body = Expression.Property(param, "Value"); funcType = typeof(Func<,>).MakeGenericType(param.Type, body.Type); LambdaExpression resultSelectExpr = Expression.Lambda(funcType, body, param); resNode = new DLinqHashPartitionNode(keySelectExpr, resultSelectExpr, null, newParCount, false, queryExpr, addIndexNode); resNode = new DLinqMergeNode(true, queryExpr, resNode); } return resNode; }
private DLinqQueryNode VisitWhere(QueryNodeInfo source, LambdaExpression predicate, MethodCallExpression queryExpr) { DLinqQueryNode child = this.Visit(source); DLinqQueryNode whereNode; if (predicate.Type.GetGenericArguments().Length == 2 || (!child.IsDynamic && child.OutputPartition.Count == 1)) { whereNode = this.PromoteConcat(source, child, x => new DLinqWhereNode(predicate, queryExpr, x)); } else { // The "indexed" version // Create (x, y) => DryadWhere(x, y, predicate) Type ptype1 = typeof(IEnumerable<>).MakeGenericType(child.OutputTypes[0]); Type ptype2 = typeof(IEnumerable<>).MakeGenericType(typeof(IndexedValue<long>)); ParameterExpression param1 = Expression.Parameter(ptype1, DryadLinqCodeGen.MakeUniqueName("x")); ParameterExpression param2 = Expression.Parameter(ptype2, DryadLinqCodeGen.MakeUniqueName("y")); string targetMethod = queryExpr.Method.Name + "WithStartIndex"; MethodInfo minfo = typeof(DryadLinqEnumerable).GetMethod(targetMethod); minfo = minfo.MakeGenericMethod(child.OutputTypes[0]); Expression body = Expression.Call(minfo, param1, param2, predicate); Type type = typeof(Func<,,>).MakeGenericType(ptype1, ptype2, body.Type); LambdaExpression procFunc = Expression.Lambda(type, body, param1, param2); child.IsForked = true; bool isLong = (queryExpr.Method.Name == "LongWhere"); DLinqQueryNode offsetNode = this.CreateOffset(isLong, queryExpr, child); whereNode = new DLinqApplyNode(procFunc, queryExpr, child, offsetNode); } return whereNode; }
private DLinqQueryNode VisitSelect(QueryNodeInfo source, QueryNodeType nodeType, LambdaExpression selector, LambdaExpression resultSelector, MethodCallExpression queryExpr) { DLinqQueryNode selectNode; if (selector.Type.GetGenericArguments().Length == 2) { // If this select's child is a groupby node, push this select into its child, if // 1. The groupby node is not tee'd, and // 2. The groupby node has no result selector, and // 3. The selector is decomposable if (!source.IsForked && IsGroupByWithoutResultSelector(source.QueryExpression) && Decomposition.GetDecompositionInfoList(selector, m_codeGen) != null) { MethodCallExpression expr = (MethodCallExpression)source.QueryExpression; LambdaExpression keySelectExpr = DryadLinqExpression.GetLambda(expr.Arguments[1]); // Figure out elemSelectExpr and comparerExpr LambdaExpression elemSelectExpr = null; Expression comparerExpr = null; if (expr.Arguments.Count == 3) { elemSelectExpr = DryadLinqExpression.GetLambda(expr.Arguments[2]); if (elemSelectExpr == null) { comparerExpr = expr.Arguments[2]; } } else if (expr.Arguments.Count == 4) { elemSelectExpr = DryadLinqExpression.GetLambda(expr.Arguments[2]); comparerExpr = expr.Arguments[3]; } // Construct new query expression by building result selector expression // and pushing it to groupby node. selectNode = VisitGroupBy(source.Children[0].Child, keySelectExpr, elemSelectExpr, selector, comparerExpr, queryExpr); if (nodeType == QueryNodeType.SelectMany) { Type selectorRetType = selector.Type.GetGenericArguments()[1]; LambdaExpression id = IdentityFunction.Instance(selectorRetType); selectNode = new DLinqSelectNode(nodeType, id, resultSelector, queryExpr, selectNode); } } else { DLinqQueryNode child = this.Visit(source); selectNode = this.PromoteConcat( source, child, x => new DLinqSelectNode(nodeType, selector, resultSelector, queryExpr, x)); } } else { // The "indexed" version DLinqQueryNode child = this.Visit(source); if (!child.IsDynamic && child.OutputPartition.Count == 1) { selectNode = this.PromoteConcat( source, child, x => new DLinqSelectNode(nodeType, selector, resultSelector, queryExpr, x)); } else { child.IsForked = true; // Create (x, y) => Select(x, y, selector) Type ptype1 = typeof(IEnumerable<>).MakeGenericType(child.OutputTypes[0]); Type ptype2 = typeof(IEnumerable<>).MakeGenericType(typeof(IndexedValue<long>)); ParameterExpression param1 = Expression.Parameter(ptype1, DryadLinqCodeGen.MakeUniqueName("x")); ParameterExpression param2 = Expression.Parameter(ptype2, DryadLinqCodeGen.MakeUniqueName("y")); string methodName = queryExpr.Method.Name; Type[] selectorTypeArgs = selector.Type.GetGenericArguments(); Type typeArg2 = selectorTypeArgs[selectorTypeArgs.Length - 1]; if (nodeType == QueryNodeType.SelectMany) { if (resultSelector != null) { methodName += "Result"; } typeArg2 = typeArg2.GetGenericArguments()[0]; } string targetMethodName = methodName + "WithStartIndex"; MethodInfo minfo = typeof(DryadLinqEnumerable).GetMethod(targetMethodName); Expression body; if (resultSelector == null) { minfo = minfo.MakeGenericMethod(child.OutputTypes[0], typeArg2); body = Expression.Call(minfo, param1, param2, selector); } else { minfo = minfo.MakeGenericMethod(child.OutputTypes[0], typeArg2, resultSelector.Body.Type); body = Expression.Call(minfo, param1, param2, selector, resultSelector); } Type type = typeof(Func<,,>).MakeGenericType(ptype1, ptype2, body.Type); LambdaExpression procFunc = Expression.Lambda(type, body, param1, param2); bool isLong = methodName.StartsWith("Long", StringComparison.Ordinal); DLinqQueryNode offsetNode = this.CreateOffset(isLong, queryExpr, child); selectNode = new DLinqApplyNode(procFunc, queryExpr, child, offsetNode); } } return selectNode; }
private DLinqQueryNode CreateOffset(bool isLong, Expression queryExpr, DLinqQueryNode child) { // Count node DLinqQueryNode countNode = new DLinqBasicAggregateNode(null, AggregateOpType.LongCount, true, false, queryExpr, child); // Apply node for x => Offsets(x) Type paramType = typeof(IEnumerable<>).MakeGenericType(typeof(long)); ParameterExpression param = Expression.Parameter(paramType, "x"); MethodInfo minfo = typeof(DryadLinqEnumerable).GetMethod("Offsets"); Expression body = Expression.Call(minfo, param, Expression.Constant(isLong, typeof(bool))); Type type = typeof(Func<,>).MakeGenericType(param.Type, body.Type); LambdaExpression procFunc = Expression.Lambda(type, body, param); DLinqQueryNode mergeCountNode = new DLinqMergeNode(true, queryExpr, countNode); DLinqQueryNode offsetsNode = new DLinqApplyNode(procFunc, queryExpr, mergeCountNode); // HashPartition LambdaExpression keySelectExpr = IdentityFunction.Instance(typeof(IndexedValue<long>)); int pcount = child.OutputPartition.Count; DLinqQueryNode hdistNode = new DLinqHashPartitionNode(keySelectExpr, null, null, pcount, false, queryExpr, offsetsNode); DLinqQueryNode resNode = new DLinqMergeNode(false, queryExpr, hdistNode); return resNode; }
internal DLinqQueryNode Visit(QueryNodeInfo nodeInfo) { Expression expression = nodeInfo.QueryExpression; if (expression.NodeType == ExpressionType.Call) { MethodCallExpression mcExpr = (MethodCallExpression)expression; if (mcExpr.Method.IsStatic && TypeSystem.IsQueryOperatorCall(mcExpr)) { return this.VisitQueryOperatorCall(nodeInfo); } throw DryadLinqException.Create(DryadLinqErrorCode.OperatorNotSupported, String.Format(SR.OperatorNotSupported, mcExpr.Method.Name), expression); } else if (expression.NodeType == ExpressionType.Constant) { DLinqInputNode inputNode = new DLinqInputNode(this, (ConstantExpression)expression); string inputUri = inputNode.Table.DataSourceUri.AbsoluteUri.ToLower(); if (!this.m_inputUriMap.ContainsKey(inputUri)) { this.m_inputUriMap.Add(inputUri, inputNode); } DLinqQueryNode resNode = inputNode; if (inputNode.Table.Deserializer != null) { // Add an Apply for the deserializer resNode = new DLinqApplyNode(inputNode.Table.Deserializer, expression, inputNode); } return resNode; } else { string errMsg = "Can't handle expression of type " + expression.NodeType; throw DryadLinqException.Create(DryadLinqErrorCode.UnsupportedExpressionsType, String.Format(SR.UnsupportedExpressionsType,expression.NodeType), expression); } }