private void SwitchTo(DryadQueryNode curNode, DryadSuperNode node) { if (curNode.SuperNode == this) { curNode.SuperNode = node; foreach (DryadQueryNode child in curNode.Children) { this.SwitchTo(child, node); } } }
private static bool IsMergeNodeNeeded(DryadQueryNode node) { return node.IsDynamic || node.PartitionCount > 1; }
private void CodeGenVisit(DryadQueryNode node) { if (node.m_uniqueId == this.m_currentPhaseId) { node.m_uniqueId++; // We process the types first so that children will also know about all // proxies/mappings that should be used. node.CreateCodeAndMappingsForVertexTypes(false); // Recurse on the children foreach (DryadQueryNode child in node.Children) { this.CodeGenVisit(child); } if (node.NodeType == QueryNodeType.InputTable) { // not used as a vertex string t = ((DryadInputNode)node).Table.DataSourceUri; int index = t.LastIndexOf('/'); int bk = t.LastIndexOf('\\'); if (index < bk) index = bk; node.m_vertexEntryMethod = t.Substring(index + 1); } else if (node.NodeType == QueryNodeType.OutputTable) { // not used as a vertex string t = ((DryadOutputNode)node).MetaDataUri; int index = t.LastIndexOf('/'); int bk = t.LastIndexOf('\\'); if (index < bk) index = bk; int len = Math.Min(8, t.Length - index - 1); node.m_vertexEntryMethod = t.Substring(index + 1, len); } else if (node.NodeType == QueryNodeType.Tee) { // not used as a vertex node.m_vertexEntryMethod = HpcLinqCodeGen.MakeUniqueName("Tee"); // broadcast manager code generation if (node.DynamicManager.ManagerType != DynamicManagerType.None) { node.DynamicManager.CreateVertexCode(); } } else if (node.NodeType == QueryNodeType.Concat) { // not used as a vertex node.m_vertexEntryMethod = HpcLinqCodeGen.MakeUniqueName("Concat"); } else { CodeMemberMethod vertexMethod = this.m_codeGen.AddVertexMethod(node); node.m_vertexEntryMethod = vertexMethod.Name; node.DynamicManager.CreateVertexCode(); } } }
private DryadQueryNode VisitConcat(QueryNodeInfo source, MethodCallExpression queryExpr) { DryadQueryNode[] childs = new DryadQueryNode[source.children.Count]; for (int i = 0; i < source.children.Count; ++i) { childs[i] = this.Visit(source.children[i].child); } DryadQueryNode resNode = new DryadConcatNode(queryExpr, childs); int parCount = resNode.OutputPartition.Count; if (!resNode.IsDynamic && parCount > StaticConfig.MaxPartitionCount) { // Too many partitions, need to repartition int newParCount = parCount / 2; DryadQueryNode countNode = new DryadBasicAggregateNode(null, AggregateOpType.LongCount, true, false, queryExpr, resNode); DryadQueryNode mergeCountNode = new DryadMergeNode(true, false, queryExpr, countNode); // Apply node for s => IndexedCount(s) Type paramType = typeof(IEnumerable<>).MakeGenericType(typeof(long)); ParameterExpression param = Expression.Parameter(paramType, "s"); MethodInfo minfo = typeof(HpcLinqHelper).GetMethod("IndexedCount"); minfo = minfo.MakeGenericMethod(typeof(long)); Expression body = Expression.Call(minfo, param); Type funcType = typeof(Func<,>).MakeGenericType(param.Type, body.Type); LambdaExpression indexedCountFunc = Expression.Lambda(funcType, body, param); DryadQueryNode indexedCountNode = new DryadApplyNode(indexedCountFunc, queryExpr, mergeCountNode); // HashPartition(x => x.index, parCount) param = Expression.Parameter(body.Type.GetGenericArguments()[0], "x"); Expression keySelectBody = Expression.Property(param, "Index"); funcType = typeof(Func<,>).MakeGenericType(param.Type, keySelectBody.Type); LambdaExpression keySelectExpr = Expression.Lambda(funcType, keySelectBody, param); DryadQueryNode distCountNode = new DryadHashPartitionNode(keySelectExpr, null, parCount, queryExpr, indexedCountNode); // Apply node for (x, y) => AddPartitionIndex(x, y, newParCount) ParameterExpression param1 = Expression.Parameter(body.Type, "x"); Type paramType2 = typeof(IEnumerable<>).MakeGenericType(resNode.OutputTypes[0]); ParameterExpression param2 = Expression.Parameter(paramType2, "y"); minfo = typeof(HpcLinqHelper).GetMethod("AddPartitionIndex"); minfo = minfo.MakeGenericMethod(resNode.OutputTypes[0]); body = Expression.Call(minfo, param1, param2, Expression.Constant(newParCount)); funcType = typeof(Func<,,>).MakeGenericType(param1.Type, param2.Type, body.Type); LambdaExpression addIndexFunc = Expression.Lambda(funcType, body, param1, param2); DryadQueryNode addIndexNode = new DryadApplyNode(addIndexFunc, queryExpr, distCountNode, resNode); // HashPartition(x => x.index, x => x.value, newParCount) param = Expression.Parameter(body.Type.GetGenericArguments()[0], "x"); body = Expression.Property(param, "Index"); funcType = typeof(Func<,>).MakeGenericType(param.Type, body.Type); keySelectExpr = Expression.Lambda(funcType, body, param); body = Expression.Property(param, "Value"); funcType = typeof(Func<,>).MakeGenericType(param.Type, body.Type); LambdaExpression resultSelectExpr = Expression.Lambda(funcType, body, param); resNode = new DryadHashPartitionNode(keySelectExpr, resultSelectExpr, null, newParCount, false, queryExpr, addIndexNode); resNode = new DryadMergeNode(true, true, queryExpr, resNode); } return resNode; }
private DryadQueryNode VisitPhase2(DryadQueryNode node) { DryadQueryNode resNode = node; if (node.m_uniqueId == this.m_currentPhaseId) { if (node is DryadForkNode) { // For now, we require every branch of a fork be used: DryadForkNode forkNode = (DryadForkNode)node; for (int i = 0; i < forkNode.Parents.Count; i++) { if ((forkNode.Parents[i] is DryadTeeNode) && (forkNode.Parents[i].Parents.Count == 0)) { throw DryadLinqException.Create(HpcLinqErrorCode.BranchOfForkNotUsed, string.Format(SR.BranchOfForkNotUsed, i), node.QueryExpression); } } } resNode = node.SuperNode; if (resNode == null) { for (int i = 0; i < node.Children.Length; i++) { node.Children[i] = this.VisitPhase2(node.Children[i]); } resNode = node.PipelineReduce(); resNode.m_uniqueId++; // Insert a Tee node if needed: DryadQueryNode outputNode = resNode.OutputNode; if (outputNode.IsForked && !(outputNode is DryadForkNode) && !(outputNode is DryadTeeNode)) { resNode = resNode.InsertTee(true); } } } return resNode; }
internal virtual string AddVertexCode(DryadQueryNode node, CodeMemberMethod vertexMethod, string[] readerNames, string[] writerNames) { switch (node.NodeType) { case QueryNodeType.InputTable: { return this.Visit((DryadInputNode)node, vertexMethod, readerNames, writerNames); } case QueryNodeType.OutputTable: { return this.Visit((DryadOutputNode)node, vertexMethod, readerNames, writerNames); } case QueryNodeType.Aggregate: { return this.Visit((DryadAggregateNode)node, vertexMethod, readerNames, writerNames); } case QueryNodeType.Select: case QueryNodeType.SelectMany: { return this.Visit((DryadSelectNode)node, vertexMethod, readerNames, writerNames); } case QueryNodeType.Where: { return this.Visit((DryadWhereNode)node, vertexMethod, readerNames, writerNames); } case QueryNodeType.Distinct: { return this.Visit((DryadDistinctNode)node, vertexMethod, readerNames, writerNames); } case QueryNodeType.BasicAggregate: { return this.Visit((DryadBasicAggregateNode)node, vertexMethod, readerNames, writerNames); } case QueryNodeType.GroupBy: { return this.Visit((DryadGroupByNode)node, vertexMethod, readerNames, writerNames); } case QueryNodeType.OrderBy: { return this.Visit((DryadOrderByNode)node, vertexMethod, readerNames, writerNames); } case QueryNodeType.Skip: case QueryNodeType.SkipWhile: case QueryNodeType.Take: case QueryNodeType.TakeWhile: { return this.Visit((DryadPartitionOpNode)node, vertexMethod, readerNames, writerNames); } case QueryNodeType.Contains: { return this.Visit((DryadContainsNode)node, vertexMethod, readerNames, writerNames); } case QueryNodeType.Join: case QueryNodeType.GroupJoin: { return this.Visit((DryadJoinNode)node, vertexMethod, readerNames, writerNames); } case QueryNodeType.Union: case QueryNodeType.Intersect: case QueryNodeType.Except: { return this.Visit((DryadSetOperationNode)node, vertexMethod, readerNames, writerNames); } case QueryNodeType.Concat: { return this.Visit((DryadConcatNode)node, vertexMethod, readerNames, writerNames); } case QueryNodeType.Zip: { return this.Visit((DryadZipNode)node, vertexMethod, readerNames, writerNames); } case QueryNodeType.Super: { return this.Visit((DryadSuperNode)node, vertexMethod, readerNames, writerNames); } case QueryNodeType.RangePartition: { return this.Visit((DryadRangePartitionNode)node, vertexMethod, readerNames, writerNames); } case QueryNodeType.HashPartition: { return this.Visit((DryadHashPartitionNode)node, vertexMethod, readerNames, writerNames); } case QueryNodeType.Merge: { return this.Visit((DryadMergeNode)node, vertexMethod, readerNames, writerNames); } case QueryNodeType.Apply: { return this.Visit((DryadApplyNode)node, vertexMethod, readerNames, writerNames); } case QueryNodeType.Fork: { return this.Visit((DryadForkNode)node, vertexMethod, readerNames, writerNames); } case QueryNodeType.Tee: { return this.Visit((DryadTeeNode)node, vertexMethod, readerNames, writerNames); } case QueryNodeType.Dynamic: { return this.Visit((DryadDynamicNode)node, vertexMethod, readerNames, writerNames); } case QueryNodeType.Dummy: { return this.Visit((DryadDummyNode)node, vertexMethod, readerNames, writerNames); } default: throw new DryadLinqException("Internal error: unhandled node type " + node.NodeType); } }
private DryadQueryNode FirstStagePartitionOp(string opName, QueryNodeType nodeType, Expression controlExpr, MethodCallExpression queryExpr, DryadQueryNode child) { if (nodeType == QueryNodeType.TakeWhile) { Type ptype = typeof(IEnumerable<>).MakeGenericType(child.OutputTypes[0]); ParameterExpression param = Expression.Parameter(ptype, HpcLinqCodeGen.MakeUniqueName("x")); MethodInfo minfo = typeof(HpcLinqEnumerable).GetMethod("GroupTakeWhile"); minfo = minfo.MakeGenericMethod(child.OutputTypes[0]); Expression body = Expression.Call(minfo, param, controlExpr); Type type = typeof(Func<,>).MakeGenericType(ptype, body.Type); LambdaExpression procFunc = Expression.Lambda(type, body, param); return new DryadApplyNode(procFunc, queryExpr, child); } else { return new DryadPartitionOpNode(opName, nodeType, controlExpr, true, queryExpr, child); } }
internal DryadContainsNode(Expression valueExpr, Expression comparerExpr, Expression queryExpr, DryadQueryNode child) : base(QueryNodeType.Contains, child.QueryGen, queryExpr, child) { this.m_valueExpression = valueExpr; this.m_comparerExpression = comparerExpr; this.m_opName = "Contains"; this.m_valueIdx = HpcLinqObjectStore.Put(ExpressionSimplifier.Evaluate(valueExpr)); ExpressionSimplifier<object> evaluator = new ExpressionSimplifier<object>(); this.m_comparerIdx = -1; if (comparerExpr != null) { this.m_comparerIdx = HpcLinqObjectStore.Put(evaluator.Eval(comparerExpr)); } this.m_partitionCount = child.OutputDataSetInfo.partitionInfo.Count; this.m_outputDataSetInfo = new DataSetInfo(); this.m_outputDataSetInfo.partitionInfo = new RandomPartition(this.m_partitionCount); this.m_dynamicManager = this.InferDynamicManager(); }
internal void InsertVertexNode(int index, DryadQueryNode node) { if (index == -1) { this.m_vertexNodes.Add(node); } else { this.m_vertexNodes.Insert(index, node); } }
internal void Add(DryadQueryNode node) { this.m_nodes.Add(node); }
internal DryadBasicAggregateNode(LambdaExpression selectExpr, AggregateOpType aggType, bool isFirstStage, bool isQuery, Expression queryExpr, DryadQueryNode child) : base(QueryNodeType.BasicAggregate, child.QueryGen, queryExpr, child) { this.m_selectExpression = selectExpr; this.m_aggregateOpType = aggType; this.m_isFirstStage = isFirstStage; this.m_isQuery = isQuery; this.m_opName = aggType.ToString(); if (isFirstStage) { this.m_partitionCount = child.OutputDataSetInfo.partitionInfo.Count; this.m_outputDataSetInfo = new DataSetInfo(); this.m_outputDataSetInfo.partitionInfo = new RandomPartition(this.m_partitionCount); this.m_dynamicManager = this.InferDynamicManager(); } else { this.m_partitionCount = 1; this.m_outputDataSetInfo = new DataSetInfo(); this.m_dynamicManager = DynamicManager.None; } }
internal DryadZipNode(LambdaExpression selectExpr, Expression queryExpr, DryadQueryNode child1, DryadQueryNode child2) : base(QueryNodeType.Zip, child1.QueryGen, queryExpr, child1, child2) { this.m_opName = "Zip"; this.m_selectExpression = selectExpr; this.m_partitionCount = child1.OutputPartition.Count; this.m_outputDataSetInfo = this.ComputeOutputDataSetInfo(); }
internal DryadWhereNode(LambdaExpression whereExpr, Expression queryExpr, DryadQueryNode child) : base(QueryNodeType.Where, child.QueryGen, queryExpr, child) { this.m_whereExpression = whereExpr; //If indexed version and the index is a long, we will use opName=DryadLong. if (this.m_whereExpression.Parameters.Count() == 2 && this.m_whereExpression.Parameters[1].Type == typeof(long)) { this.m_opName = "LongWhere"; } else { this.m_opName = "Where"; } this.m_partitionCount = child.OutputPartition.Count; this.m_outputDataSetInfo = new DataSetInfo(child.OutputDataSetInfo); this.m_dynamicManager = this.InferDynamicManager(); }
internal DryadTeeNode(Type outputType, bool isForked, Expression queryExpr, DryadQueryNode child) : base(QueryNodeType.Tee, child.QueryGen, queryExpr, child) { this.m_outputType = outputType; this.m_opName = "Tee"; this.IsForked = isForked; this.m_partitionCount = child.OutputPartition.Count; PartitionInfo pinfo = new RandomPartition(child.OutputDataSetInfo.partitionInfo.Count); this.m_outputDataSetInfo = new DataSetInfo(pinfo, DataSetInfo.NoOrderBy, DataSetInfo.NoDistinct); this.m_dynamicManager = this.InferDynamicManager(); }
internal static CodeVariableDeclarationStatement MakeDryadVertexParamsDecl(DryadQueryNode node) { int inputArity = node.InputArity + node.GetReferencedQueries().Count; int outputArity = node.OutputArity; CodeExpression arg1 = new CodePrimitiveExpression(inputArity); CodeExpression arg2 = new CodePrimitiveExpression(outputArity); CodeExpression dVertexParamsInitExpr = new CodeObjectCreateExpression("HpcLinqVertexParams", arg1, arg2); CodeVariableDeclarationStatement dVertexParamsDecl = new CodeVariableDeclarationStatement("HpcLinqVertexParams", DryadVertexParamName, dVertexParamsInitExpr); return dVertexParamsDecl; }
internal DynamicRangeDistributor(DryadQueryNode node) : base(DynamicManagerType.RangeDistributor, node) { //@@TODO[P2]: This sample rate used here should really be its own constant. this.m_sampleRate = HpcLinqSampler.SAMPLE_RATE; }
public QueryNodeInfo(Expression queryExpression, bool isQueryOperator, params QueryNodeInfo[] children) { this.queryExpression = queryExpression; this.isQueryOperator = isQueryOperator; this.children = new List<NodeInfoEdge>(children.Length); foreach (QueryNodeInfo childInfo in children) { NodeInfoEdge edge = new NodeInfoEdge(this, childInfo); this.children.Add(edge); childInfo.parents.Add(edge); } this.parents = new List<NodeInfoEdge>(); this.queryNode = null; }
/// <summary> /// Create a dynamic manager with a single parameter node. /// </summary> /// <param name="type">Type of manager to create.</param> /// <param name="node">Node that the manager depends on.</param> internal DynamicManager(DynamicManagerType type, DryadQueryNode node) : this(type) { this.m_vertexNodes.Add(node); }
// Creates an "auto-sampling range-partition sub-query" private DryadQueryNode CreateRangePartition(bool isDynamic, LambdaExpression keySelectExpr, LambdaExpression resultSelectExpr, Expression comparerExpr, Expression isDescendingExpr, Expression queryExpr, Expression partitionCountExpr, DryadQueryNode child) { // Make child a Tee node child.IsForked = true; // The partition count Expression countExpr = null; if (isDescendingExpr == null) { isDescendingExpr = Expression.Constant(false, typeof(bool)); //default for isDescending is false. } // NOTE: for MayRTM, isDynamic should never be true if (!isDynamic) { if (partitionCountExpr != null) { countExpr = partitionCountExpr; } else { // If partitionCount was not explicitly set, use the child's partition count. countExpr = Expression.Constant(child.OutputPartition.Count); } } Type recordType = child.OutputTypes[0]; Type keyType = keySelectExpr.Type.GetGenericArguments()[1]; // Create x => Phase1Sampling(x_1, keySelector, denv) Type lambdaParamType1 = typeof(IEnumerable<>).MakeGenericType(recordType); ParameterExpression lambdaParam1 = Expression.Parameter(lambdaParamType1, "x_1"); ParameterExpression denvParam = Expression.Parameter(typeof(HpcLinqVertexEnv), "denv"); MethodInfo minfo = typeof(HpcLinqSampler).GetMethod("Phase1Sampling"); Expression body = Expression.Call(minfo.MakeGenericMethod(recordType, keyType), lambdaParam1, keySelectExpr, denvParam); Type type = typeof(Func<,>).MakeGenericType(lambdaParam1.Type, body.Type); LambdaExpression samplingExpr = Expression.Lambda(type, body, lambdaParam1); // Create the Sampling node DryadApplyNode samplingNode = new DryadApplyNode(samplingExpr, queryExpr, child); // Create x => RangeSampler(x, keySelectExpr, comparer, isDescendingExpr) Type lambdaParamType = typeof(IEnumerable<>).MakeGenericType(keyType); ParameterExpression lambdaParam = Expression.Parameter(lambdaParamType, "x_2"); //For RTM, isDynamic should never be true. //string methodName = (isDynamic) ? "RangeSampler_Dynamic" : "RangeSampler_Static"; Debug.Assert(isDynamic == false, "Internal error: isDynamic is true."); string methodName = "RangeSampler_Static"; minfo = typeof(HpcLinqSampler).GetMethod(methodName); minfo = minfo.MakeGenericMethod(keyType); Expression comparerArgExpr = comparerExpr; if (comparerExpr == null) { if (!TypeSystem.HasDefaultComparer(keyType)) { throw DryadLinqException.Create(HpcLinqErrorCode.ComparerMustBeSpecifiedOrKeyTypeMustBeIComparable, string.Format(SR.ComparerMustBeSpecifiedOrKeyTypeMustBeIComparable, keyType), queryExpr); } comparerArgExpr = Expression.Constant(null, typeof(IComparer<>).MakeGenericType(keyType)); } Expression lastArg; if (isDynamic) { lastArg = denvParam; } else { lastArg = countExpr; } body = Expression.Call(minfo, lambdaParam, comparerArgExpr, isDescendingExpr, lastArg); type = typeof(Func<,>).MakeGenericType(lambdaParam.Type, body.Type); LambdaExpression samplerExpr = Expression.Lambda(type, body, lambdaParam); // Create the sample node DryadQueryNode sampleDataNode = new DryadMergeNode(false, true, queryExpr, samplingNode); DryadQueryNode sampleNode = new DryadApplyNode(samplerExpr, queryExpr, sampleDataNode); sampleNode.IsForked = true; // Create the range distribute node DryadQueryNode resNode = new DryadRangePartitionNode(keySelectExpr, resultSelectExpr, null, comparerExpr, isDescendingExpr, countExpr, queryExpr, child, sampleNode); resNode = new DryadMergeNode(false, true, queryExpr, resNode); // Set the dynamic manager for sampleNode if (isDynamic) { sampleDataNode.DynamicManager = new DynamicRangeDistributor(resNode); } return resNode; }
/// <summary> /// Explain one query node. /// </summary> /// <param name="plan">Return plan here.</param> /// <param name="n">Node to explain.</param> internal static void ExplainNode(StringBuilder plan, DryadQueryNode n) { if (n is DryadTeeNode || n is DryadOutputNode) { return; } else if (n is DryadInputNode) { plan.AppendLine("Input:"); plan.Append("\t"); n.BuildString(plan); plan.AppendLine(); return; } plan.Append(n.m_vertexEntryMethod); plan.AppendLine(":"); HashSet<DryadQueryNode> allchildren = new HashSet<DryadQueryNode>(); if (n is DryadSuperNode) { DryadSuperNode sn = n as DryadSuperNode; List<DryadQueryNode> tovisit = new List<DryadQueryNode>(); tovisit.Add(sn.RootNode); while (tovisit.Count > 0) { DryadQueryNode t = tovisit[0]; tovisit.RemoveAt(0); if (!(t is DryadSuperNode)) allchildren.Add(t); foreach (DryadQueryNode tc in t.Children) { if (!allchildren.Contains(tc) && sn.Contains(tc)) tovisit.Add(tc); } } } else allchildren.Add(n); foreach (DryadQueryNode nc in allchildren.Reverse()) { Expression expression = null; // expression to print List<string> additional = new List<string>(); // additional arguments to print int argsToSkip = 0; string methodname = nc.OpName; plan.Append("\t"); if (nc is DryadMergeNode) { expression = ((DryadMergeNode)nc).ComparerExpression; } else if (nc is DryadHashPartitionNode) { DryadHashPartitionNode hp = (DryadHashPartitionNode)nc; expression = hp.KeySelectExpression; additional.Add(hp.NumberOfPartitions.ToString()); } else if (nc is DryadGroupByNode) { DryadGroupByNode gb = (DryadGroupByNode)nc; expression = gb.KeySelectExpression; if (gb.ElemSelectExpression != null) additional.Add(HpcLinqExpression.Summarize(gb.ElemSelectExpression)); if (gb.ResSelectExpression != null) additional.Add(HpcLinqExpression.Summarize(gb.ResSelectExpression)); if (gb.ComparerExpression != null) additional.Add(HpcLinqExpression.Summarize(gb.ComparerExpression)); if (gb.SeedExpression != null) additional.Add(HpcLinqExpression.Summarize(gb.SeedExpression)); if (gb.AccumulatorExpression != null) additional.Add(HpcLinqExpression.Summarize(gb.AccumulatorExpression)); } else if (nc is DryadOrderByNode) { DryadOrderByNode ob = (DryadOrderByNode)nc; expression = ob.KeySelectExpression; if (ob.ComparerExpression != null) additional.Add(HpcLinqExpression.Summarize(ob.ComparerExpression)); } else if (nc is DryadWhereNode) { expression = ((DryadWhereNode)nc).WhereExpression; } else if (nc is DryadSelectNode) { DryadSelectNode s = (DryadSelectNode)nc; expression = s.SelectExpression; if (s.ResultSelectExpression != null) additional.Add(HpcLinqExpression.Summarize(s.ResultSelectExpression)); } else if (nc is DryadAggregateNode) { DryadAggregateNode a = (DryadAggregateNode)nc; expression = a.FuncLambda; if (a.SeedExpression != null) additional.Add(HpcLinqExpression.Summarize(a.SeedExpression)); if (a.ResultLambda != null) additional.Add(HpcLinqExpression.Summarize(a.ResultLambda)); } else if (nc is DryadPartitionOpNode) { expression = ((DryadPartitionOpNode)nc).ControlExpression; } else if (nc is DryadJoinNode) { DryadJoinNode j = (DryadJoinNode)nc; expression = j.OuterKeySelectorExpression; additional.Add(HpcLinqExpression.Summarize(j.InnerKeySelectorExpression)); additional.Add(HpcLinqExpression.Summarize(j.ResultSelectorExpression)); if (j.ComparerExpression != null) additional.Add(HpcLinqExpression.Summarize(j.ComparerExpression)); } else if (nc is DryadDistinctNode) { expression = ((DryadDistinctNode)nc).ComparerExpression; } else if (nc is DryadContainsNode) { DryadContainsNode c = (DryadContainsNode)nc; expression = c.ValueExpression; if (c.ComparerExpression != null) additional.Add(HpcLinqExpression.Summarize(c.ComparerExpression)); } else if (nc is DryadBasicAggregateNode) { expression = ((DryadBasicAggregateNode)nc).SelectExpression; } else if (nc is DryadConcatNode) // nothing to do { } else if (nc is DryadSetOperationNode) { expression = ((DryadSetOperationNode)nc).ComparerExpression; } else if (nc is DryadRangePartitionNode) { DryadRangePartitionNode r = (DryadRangePartitionNode)nc; expression = r.CountExpression; // TODO: there's some other possible interesting info } else if (nc is DryadApplyNode) { expression = ((DryadApplyNode)nc).LambdaExpression; } else if (nc is DryadForkNode) { expression = ((DryadForkNode)nc).ForkLambda; } else if (nc is DryadTeeNode) { // nothing } else if (nc is DryadDynamicNode) { // nothing } else { expression = nc.QueryExpression; } if (expression is MethodCallExpression) { MethodCallExpression mc = (MethodCallExpression)expression; methodname = mc.Method.Name; // overwrite methodname // determine which arguments to skip #region LINQMETHODS switch (mc.Method.Name) { case "Aggregate": case "AggregateAsQuery": case "Select": case "LongSelect": case "SelectMany": case "LongSelectMany": case "OfType": case "Where": case "LongWhere": case "First": case "FirstOrDefault": case "FirstAsQuery": case "Single": case "SingleOrDefault": case "SingleAsQuery": case "Last": case "LastOrDefault": case "LastAsQuery": case "Distinct": case "Any": case "AnyAsQuery": case "All": case "AllAsQuery": case "Count": case "CountAsQuery": case "LongCount": case "LongCountAsQuery": case "Sum": case "SumAsQuery": case "Min": case "MinAsQuery": case "Max": case "MaxAsQuery": case "Average": case "AverageAsQuery": case "GroupBy": case "OrderBy": case "OrderByDescending": case "ThenBy": case "ThenByDescending": case "Take": case "TakeWhile": case "LongTakeWhile": case "Skip": case "SkipWhile": case "LongSkipWhile": case "Contains": case "ContainsAsQuery": case "Reverse": case "Merge": case "HashPartition": case "RangePartition": case "Fork": case "ForkChoose": case "AssumeHashPartition": case "AssumeRangePartition": case "AssumeOrderBy": case "ToPartitionedTableLazy": case "AddCacheEntry": case "SlidingWindow": case "SelectWithPartitionIndex": case "ApplyWithPartitionIndex": argsToSkip = 1; break; case "Join": case "GroupJoin": case "Concat": case "MultiConcat": case "Union": case "Intersect": case "Except": case "SequenceEqual": case "SequenceEqualAsQuery": case "Zip": argsToSkip = 2; break; case "Apply": case "ApplyPerPartition": if (mc.Arguments.Count < 3) argsToSkip = 1; else argsToSkip = 2; break; default: throw DryadLinqException.Create(HpcLinqErrorCode.OperatorNotSupported, String.Format(SR.OperatorNotSupported, mc.Method.Name), expression); } #endregion plan.Append(methodname); plan.Append("("); int argno = 0; foreach (var arg in mc.Arguments) { argno++; if (argno <= argsToSkip) continue; if (argno > argsToSkip + 1) { plan.Append(","); } plan.Append(HpcLinqExpression.Summarize(arg)); } plan.AppendLine(")"); } else { // expression is not methodcall plan.Append(methodname); plan.Append("("); if (expression != null) { plan.Append(HpcLinqExpression.Summarize(expression)); } foreach (string e in additional) { plan.Append(","); plan.Append(e); } plan.AppendLine(")"); } } }
private DryadQueryNode PromoteConcat(QueryNodeInfo source, DryadQueryNode sourceNode, Func<DryadQueryNode, DryadQueryNode> func) { DryadQueryNode resNode = sourceNode; if ((resNode is DryadConcatNode) && !source.IsForked) { DryadQueryNode[] children = resNode.Children; DryadQueryNode[] newChildren = new DryadQueryNode[children.Length]; for (int i = 0; i < children.Length; i++) { children[i].Parents.Remove(resNode); newChildren[i] = func(children[i]); } resNode = new DryadConcatNode(source.queryExpression, newChildren); } else { resNode = func(resNode); } return resNode; }
/// <summary> /// Visit the set of nodes in the query plan and build an explanation of the plan. /// </summary> /// <param name="plan">Return plan description here.</param> /// <param name="nodes">Nodes to explain.</param> internal void CodeShowVisit(StringBuilder plan, DryadQueryNode[] nodes) { HashSet<DryadQueryNode> visited = new HashSet<DryadQueryNode>(); foreach (DryadQueryNode n in nodes) { CodeShowVisit(plan, n, visited); } }
private DryadQueryNode VisitMultiApply(QueryNodeInfo source, LambdaExpression procLambda, bool perPartition, bool isFirstOnly, MethodCallExpression queryExpr) { DryadQueryNode[] childs = new DryadQueryNode[source.children.Count]; for (int i = 0; i < source.children.Count; ++i) { childs[i] = this.Visit(source.children[i].child); } bool isDynamic = childs.Any(x => x.IsDynamic); if (perPartition && !isDynamic) { // Homomorphic case. if (isFirstOnly) { for (int i = 1; i < childs.Length; ++i) { childs[i] = new DryadTeeNode(childs[i].OutputTypes[0], true, queryExpr, childs[i]); childs[i].ConOpType = ConnectionOpType.CrossProduct; childs[i] = new DryadMergeNode(childs[0].OutputPartition.Count, queryExpr, childs[i]); } } else { int count = childs[0].OutputPartition.Count; for (int i = 1; i < childs.Length; ++i) { if (childs[i].OutputPartition.Count != count) { throw DryadLinqException.Create(HpcLinqErrorCode.HomomorphicApplyNeedsSamePartitionCount, SR.HomomorphicApplyNeedsSamePartitionCount, queryExpr); } } } } else { // Non-homomorphic case. for (int i = 0; i < childs.Length; ++i) { if (childs[i].IsDynamic || childs[i].OutputPartition.Count > 1) { childs[i] = new DryadMergeNode(true, false, queryExpr, childs[i]); } } } DryadQueryNode applyNode = new DryadApplyNode(procLambda, true, queryExpr, childs); return applyNode; }
/// <summary> /// Helper for CodeShowVisit: do not revisit a node twice. /// </summary> /// <param name="plan">Return plan here.</param> /// <param name="n">Node to explain.</param> /// <param name="visited">Set of nodes already visited.</param> private void CodeShowVisit(StringBuilder plan, DryadQueryNode n, HashSet<DryadQueryNode> visited) { if (visited.Contains(n)) return; visited.Add(n); foreach (DryadQueryNode c in n.Children) { CodeShowVisit(plan, c, visited); } ExplainNode(plan, n); }
private void VisitPhase3(DryadQueryNode node) { if (node.m_uniqueId == this.m_currentPhaseId) { node.m_uniqueId++; // Remove some useless Tee nodes foreach (DryadQueryNode child in node.Children) { if ((child is DryadTeeNode) && !child.IsForked) { DryadQueryNode teeChild = child.Children[0]; teeChild.UpdateParent(child, node); node.UpdateChildren(child, teeChild); } } // Remove some useless Merge nodes if ((node is DryadMergeNode) && !node.IsForked && !(node.Parents[0] is DryadOutputNode) && !node.Children[0].IsDynamic && node.Children[0].PartitionCount == 1) { node.Children[0].UpdateParent(node, node.Parents[0]); node.Parents[0].UpdateChildren(node, node.Children[0]); } // Add dynamic managers for tee nodes. if ((StaticConfig.DynamicOptLevel & StaticConfig.NoDynamicOpt) != 0 && node is DryadTeeNode && node.DynamicManager.ManagerType == DynamicManagerType.None) { // insert a dynamic broadcast manager on Tee node.DynamicManager = DynamicManager.Broadcast; } // Recurse on the children of node foreach (DryadQueryNode child in node.Children) { this.VisitPhase3(child); } } }
// Add a new vertex method to the Dryad vertex class internal CodeMemberMethod AddVertexMethod(DryadQueryNode node) { CodeMemberMethod vertexMethod = new CodeMemberMethod(); vertexMethod.Attributes = MemberAttributes.Public | MemberAttributes.Static; vertexMethod.ReturnType = new CodeTypeReference(typeof(int)); vertexMethod.Parameters.Add(new CodeParameterDeclarationExpression(typeof(string), "args")); vertexMethod.Name = MakeUniqueName(node.NodeType.ToString()); CodeTryCatchFinallyStatement tryBlock = new CodeTryCatchFinallyStatement(); string startedMsg = "DryadLinqLog.Add(\"Vertex " + vertexMethod.Name + " started at {0}\", DateTime.Now.ToString(\"MM/dd/yyyy HH:mm:ss.fff\"))"; vertexMethod.Statements.Add(new CodeSnippetExpression(startedMsg)); // We need to call AddCopyResourcesMethod() vertexMethod.Statements.Add(new CodeSnippetExpression("CopyResources()")); if (StaticConfig.LaunchDebugger) { // If static config requests it, we do an unconditional Debugger.Launch() at vertex entry. // Currently this isn't used because StaticConfig.LaunchDebugger is hardcoded to false System.Console.WriteLine("Launch debugger: may block application"); CodeExpression launchExpr = new CodeSnippetExpression("System.Diagnostics.Debugger.Launch()"); vertexMethod.Statements.Add(new CodeExpressionStatement(launchExpr)); } else { // Otherwise (the default behavior), we first make sure we emit the debug check helper static method // and add a call to it at vertex entry. This helper checks an environment variable to decide whether // to launch the debugger, wait for a manual attach or simply skip straigt into vertex code. EnsureDebuggerHelperMethodEmitted(); CodeMethodInvokeExpression debuggerCheckExpr = new CodeMethodInvokeExpression( new CodeMethodReferenceExpression(new CodeTypeReferenceExpression(VertexClassName), DebugHelperMethodName)); vertexMethod.Statements.Add(new CodeExpressionStatement(debuggerCheckExpr)); } vertexMethod.Statements.Add(MakeDryadVertexParamsDecl(node)); vertexMethod.Statements.Add(SetDryadVertexParamField("VertexStageName", vertexMethod.Name)); vertexMethod.Statements.Add(SetDryadVertexParamField("UseLargeBuffer", node.UseLargeWriteBuffer)); vertexMethod.Statements.Add(SetDryadVertexParamField("KeepInputPortOrder", node.KeepInputPortOrder())); // Push the parallel-code settings into HpcLinqVertex bool multiThreading = this.m_context.Configuration.AllowConcurrentUserDelegatesInSingleProcess; vertexMethod.Statements.Add(SetDryadVertexParamField("MultiThreading", multiThreading)); vertexMethod.Statements.Add( new CodeAssignStatement( new CodeFieldReferenceExpression(DLVTypeExpr, "s_multiThreading"), new CodePrimitiveExpression(multiThreading))); vertexMethod.Statements.Add(MakeDryadEnvDecl(node)); Type[] outputTypes = node.OutputTypes; string[] writerNames = new string[outputTypes.Length]; for (int i = 0; i < outputTypes.Length; i++) { CodeVariableDeclarationStatement writerDecl = MakeDryadWriterDecl(outputTypes[i], this.GetStaticFactoryName(outputTypes[i])); vertexMethod.Statements.Add(writerDecl); writerNames[i] = writerDecl.Name; } // Add side readers: node.AddSideReaders(vertexMethod); // Generate code based on the node type: switch (node.NodeType) { case QueryNodeType.Where: case QueryNodeType.OrderBy: case QueryNodeType.Distinct: case QueryNodeType.Skip: case QueryNodeType.SkipWhile: case QueryNodeType.Take: case QueryNodeType.TakeWhile: case QueryNodeType.Merge: case QueryNodeType.Select: case QueryNodeType.SelectMany: case QueryNodeType.GroupBy: case QueryNodeType.BasicAggregate: case QueryNodeType.Aggregate: case QueryNodeType.Contains: case QueryNodeType.Join: case QueryNodeType.GroupJoin: case QueryNodeType.Union: case QueryNodeType.Intersect: case QueryNodeType.Except: case QueryNodeType.RangePartition: case QueryNodeType.HashPartition: case QueryNodeType.Apply: case QueryNodeType.Fork: case QueryNodeType.Dynamic: { Type[] inputTypes = node.InputTypes; string[] sourceNames = new string[inputTypes.Length]; for (int i = 0; i < inputTypes.Length; i++) { CodeVariableDeclarationStatement readerDecl = MakeDryadReaderDecl(inputTypes[i], this.GetStaticFactoryName(inputTypes[i])); vertexMethod.Statements.Add(readerDecl); sourceNames[i] = readerDecl.Name; } string sourceToSink = this.m_vertexCodeGen.AddVertexCode(node, vertexMethod, sourceNames, writerNames); if (sourceToSink != null && (node.NodeType == QueryNodeType.Dynamic || node.Parents.Count > 0)) { CodeExpression sinkExpr = new CodeMethodInvokeExpression( new CodeVariableReferenceExpression(writerNames[0]), "WriteItemSequence", new CodeVariableReferenceExpression(sourceToSink)); vertexMethod.Statements.Add(sinkExpr); } break; } case QueryNodeType.Super: { string sourceToSink = this.m_vertexCodeGen.AddVertexCode(node, vertexMethod, null, writerNames); if (sourceToSink != null && node.Parents.Count > 0) { CodeExpression sinkExpr = new CodeMethodInvokeExpression( new CodeVariableReferenceExpression(writerNames[0]), "WriteItemSequence", new CodeVariableReferenceExpression(sourceToSink)); vertexMethod.Statements.Add(sinkExpr); } break; } default: { //@@TODO: this should not be reachable. could change to Assert/InvalidOpEx throw new DryadLinqException(HpcLinqErrorCode.Internal, String.Format(SR.AddVertexNotHandled, node.NodeType)); } } string completedMsg = "DryadLinqLog.Add(\"Vertex " + vertexMethod.Name + " completed at {0}\", DateTime.Now.ToString(\"MM/dd/yyyy HH:mm:ss.fff\"))"; vertexMethod.Statements.Add(new CodeSnippetExpression(completedMsg)); // add a catch block CodeCatchClause catchBlock = new CodeCatchClause("e"); CodeTypeReferenceExpression errorReportClass = new CodeTypeReferenceExpression("HpcLinqVertexEnv"); CodeMethodReferenceExpression errorReportMethod = new CodeMethodReferenceExpression(errorReportClass, "ReportVertexError"); CodeVariableReferenceExpression exRef = new CodeVariableReferenceExpression(catchBlock.LocalName); catchBlock.Statements.Add(new CodeMethodInvokeExpression(errorReportMethod, exRef)); tryBlock.CatchClauses.Add(catchBlock); // wrap the entire vertex method in a try/catch block tryBlock.TryStatements.AddRange(vertexMethod.Statements); vertexMethod.Statements.Clear(); vertexMethod.Statements.Add(tryBlock); // Always add "return 0", to make CLR hosting happy... vertexMethod.Statements.Add(new CodeMethodReturnStatement(ZeroExpr)); this.m_dryadVertexClass.Members.Add(vertexMethod); return vertexMethod; }
private void AssignUniqueId(DryadQueryNode node) { if (node.m_uniqueId == this.m_currentPhaseId) { foreach (Pair<string, DryadQueryNode> refChild in node.GetReferencedQueries()) { this.AssignUniqueId(refChild.Value); } foreach (DryadQueryNode child in node.Children) { this.AssignUniqueId(child); } if (node.m_uniqueId == this.m_currentPhaseId) { node.m_uniqueId = this.m_nextVertexId++; if (node.OutputNode is DryadForkNode) { foreach (DryadQueryNode pnode in node.Parents) { if (pnode.m_uniqueId == this.m_currentPhaseId) { pnode.m_uniqueId = this.m_nextVertexId++; } } } } } }
internal static CodeVariableDeclarationStatement MakeDryadEnvDecl(DryadQueryNode node) { CodeExpression arg1 = new CodeArgumentReferenceExpression("args"); CodeExpression arg2 = new CodeArgumentReferenceExpression(DryadVertexParamName); CodeExpression denvInitExpr = new CodeObjectCreateExpression("HpcLinqVertexEnv", arg1, arg2); return new CodeVariableDeclarationStatement("HpcLinqVertexEnv", DryadEnvName, denvInitExpr); }
private DryadQueryNode CreateOffset(bool isLong, Expression queryExpr, DryadQueryNode child) { // Count node DryadQueryNode countNode = new DryadBasicAggregateNode(null, AggregateOpType.LongCount, true, false, queryExpr, child); // Apply node for x => Offsets(x) Type paramType = typeof(IEnumerable<>).MakeGenericType(typeof(long)); ParameterExpression param = Expression.Parameter(paramType, "x"); MethodInfo minfo = typeof(HpcLinqEnumerable).GetMethod("Offsets"); Expression body = Expression.Call(minfo, param, Expression.Constant(isLong, typeof(bool))); Type type = typeof(Func<,>).MakeGenericType(param.Type, body.Type); LambdaExpression procFunc = Expression.Lambda(type, body, param); DryadQueryNode mergeCountNode = new DryadMergeNode(true, true, queryExpr, countNode); DryadQueryNode offsetsNode = new DryadApplyNode(procFunc, queryExpr, mergeCountNode); // HashPartition LambdaExpression keySelectExpr = IdentityFunction.Instance(typeof(IndexedValue<long>)); int pcount = child.OutputPartition.Count; DryadQueryNode hdistNode = new DryadHashPartitionNode(keySelectExpr, null, null, pcount, false, queryExpr, offsetsNode); DryadQueryNode resNode = new DryadMergeNode(false, true, queryExpr, hdistNode); return resNode; }
private void MakeSuperBody(CodeMemberMethod vertexMethod, DryadQueryNode curNode, string[] writerNames, Pipeline pipeline) { bool isHomomorphic = curNode.IsHomomorphic; DryadQueryNode[] curChildren = curNode.Children; string[] curSources = new string[curChildren.Length]; for (int i = 0; i < curChildren.Length; i++) { DryadQueryNode child = curChildren[i]; if (this.Contains(child)) { this.MakeSuperBody(vertexMethod, child, writerNames, pipeline); if (!isHomomorphic) { curSources[i] = this.QueryGen.CodeGen.AddVertexCode(vertexMethod, pipeline); } } else { Type inputType = child.OutputTypes[0]; string factoryName = this.QueryGen.CodeGen.GetStaticFactoryName(inputType); CodeVariableDeclarationStatement readerDecl = this.QueryGen.CodeGen.MakeDryadReaderDecl(inputType, factoryName); vertexMethod.Statements.Add(readerDecl); curSources[i] = readerDecl.Name; pipeline.Reset(new string[] { readerDecl.Name }); } } if (!isHomomorphic) { pipeline.Reset(curSources); } pipeline.Add(curNode); }