/// <summary> /// Visit the set of nodes in the query plan and build an explanation of the plan. /// </summary> /// <param name="plan">Return plan description here.</param> /// <param name="nodes">Nodes to explain.</param> internal void CodeShowVisit(StringBuilder plan, DLinqQueryNode[] nodes) { HashSet<DLinqQueryNode> visited = new HashSet<DLinqQueryNode>(); foreach (DLinqQueryNode n in nodes) { CodeShowVisit(plan, n, visited); } }
/// <summary> /// Helper for CodeShowVisit: do not revisit a node twice. /// </summary> /// <param name="plan">Return plan here.</param> /// <param name="n">Node to explain.</param> /// <param name="visited">Set of nodes already visited.</param> private void CodeShowVisit(StringBuilder plan, DLinqQueryNode n, HashSet<DLinqQueryNode> visited) { if (visited.Contains(n)) return; visited.Add(n); foreach (DLinqQueryNode c in n.Children) { CodeShowVisit(plan, c, visited); } ExplainNode(plan, n); }
internal DLinqTeeNode(Type outputType, bool isForked, Expression queryExpr, DLinqQueryNode child) : base(QueryNodeType.Tee, child.QueryGen, queryExpr, child) { this.m_outputType = outputType; this.m_opName = "Tee"; this.IsForked = isForked; this.m_partitionCount = child.OutputPartition.Count; PartitionInfo pinfo = new RandomPartition(child.OutputDataSetInfo.partitionInfo.Count); this.m_outputDataSetInfo = new DataSetInfo(pinfo, DataSetInfo.NoOrderBy, DataSetInfo.NoDistinct); this.m_dynamicManager = this.InferDynamicManager(); }
// Add a new vertex method to the DryadLinq vertex class internal CodeMemberMethod AddVertexMethod(DLinqQueryNode node) { CodeMemberMethod vertexMethod = new CodeMemberMethod(); vertexMethod.Attributes = MemberAttributes.Public | MemberAttributes.Static; vertexMethod.ReturnType = new CodeTypeReference(typeof(int)); vertexMethod.Parameters.Add(new CodeParameterDeclarationExpression(typeof(string), "args")); vertexMethod.Name = MakeUniqueName(node.NodeType.ToString()); CodeTryCatchFinallyStatement tryBlock = new CodeTryCatchFinallyStatement(); string startedMsg = "DryadLinqLog.AddInfo(\"Vertex " + vertexMethod.Name + " started at {0}\", DateTime.Now.ToString(\"MM/dd/yyyy HH:mm:ss.fff\"))"; vertexMethod.Statements.Add(new CodeSnippetExpression(startedMsg)); // We need to add a call to CopyResources() vertexMethod.Statements.Add(new CodeSnippetExpression("CopyResources()")); if (StaticConfig.LaunchDebugger) { // If static config requests it, we do an unconditional Debugger.Launch() at vertex entry. // Currently this isn't used because StaticConfig.LaunchDebugger is hardcoded to false System.Console.WriteLine("Launch debugger: may block application"); CodeExpression launchExpr = new CodeSnippetExpression("System.Diagnostics.Debugger.Launch()"); vertexMethod.Statements.Add(new CodeExpressionStatement(launchExpr)); } else { // Otherwise (the default behavior), we check an environment variable to decide whether // to launch the debugger, wait for a manual attach or simply skip straigt into vertex code. CodeMethodInvokeExpression debuggerCheckExpr = new CodeMethodInvokeExpression( new CodeMethodReferenceExpression(new CodeTypeReferenceExpression(HelperClassName), DebugHelperMethodName)); vertexMethod.Statements.Add(new CodeExpressionStatement(debuggerCheckExpr)); } vertexMethod.Statements.Add(MakeVertexParamsDecl(node)); vertexMethod.Statements.Add(SetVertexParamField("VertexStageName", vertexMethod.Name)); vertexMethod.Statements.Add(SetVertexParamField("UseLargeBuffer", node.UseLargeWriteBuffer)); Int32[] portCountArray = node.InputPortCounts(); bool[] keepPortOrderArray = node.KeepInputPortOrders(); for (int i = 0; i < node.InputArity; i++) { CodeExpression setParamsExpr = new CodeMethodInvokeExpression( new CodeVariableReferenceExpression(VertexParamName), "SetInputParams", new CodePrimitiveExpression(i), new CodePrimitiveExpression(portCountArray[i]), new CodePrimitiveExpression(keepPortOrderArray[i])); vertexMethod.Statements.Add(new CodeExpressionStatement(setParamsExpr)); } // YY: We could probably do better here. for (int i = 0; i < node.GetReferencedQueries().Count; i++) { CodeExpression setParamsExpr = new CodeMethodInvokeExpression( new CodeVariableReferenceExpression(VertexParamName), "SetInputParams", new CodePrimitiveExpression(i + node.InputArity), new CodePrimitiveExpression(1), new CodePrimitiveExpression(false)); vertexMethod.Statements.Add(new CodeExpressionStatement(setParamsExpr)); } // Push the parallel-code settings into DryadLinqVertex bool multiThreading = this.m_context.EnableMultiThreadingInVertex; vertexMethod.Statements.Add(SetVertexParamField("MultiThreading", multiThreading)); vertexMethod.Statements.Add( new CodeAssignStatement( new CodeFieldReferenceExpression(DLVTypeExpr, "s_multiThreading"), new CodePrimitiveExpression(multiThreading))); vertexMethod.Statements.Add(MakeVertexEnvDecl(node)); Type[] outputTypes = node.OutputTypes; string[] writerNames = new string[outputTypes.Length]; for (int i = 0; i < outputTypes.Length; i++) { CodeVariableDeclarationStatement writerDecl = MakeVertexWriterDecl(outputTypes[i], this.GetStaticFactoryName(outputTypes[i])); vertexMethod.Statements.Add(writerDecl); writerNames[i] = writerDecl.Name; } // Add side readers: node.AddSideReaders(vertexMethod); // Generate code based on the node type: switch (node.NodeType) { case QueryNodeType.Where: case QueryNodeType.OrderBy: case QueryNodeType.Distinct: case QueryNodeType.Skip: case QueryNodeType.SkipWhile: case QueryNodeType.Take: case QueryNodeType.TakeWhile: case QueryNodeType.Merge: case QueryNodeType.Select: case QueryNodeType.SelectMany: case QueryNodeType.Zip: case QueryNodeType.GroupBy: case QueryNodeType.BasicAggregate: case QueryNodeType.Aggregate: case QueryNodeType.Contains: case QueryNodeType.Join: case QueryNodeType.GroupJoin: case QueryNodeType.Union: case QueryNodeType.Intersect: case QueryNodeType.Except: case QueryNodeType.RangePartition: case QueryNodeType.HashPartition: case QueryNodeType.Apply: case QueryNodeType.Fork: case QueryNodeType.Dynamic: { Type[] inputTypes = node.InputTypes; string[] sourceNames = new string[inputTypes.Length]; for (int i = 0; i < inputTypes.Length; i++) { CodeVariableDeclarationStatement readerDecl = MakeVertexReaderDecl(inputTypes[i], this.GetStaticFactoryName(inputTypes[i])); vertexMethod.Statements.Add(readerDecl); sourceNames[i] = readerDecl.Name; } string sourceToSink = this.m_vertexCodeGen.AddVertexCode(node, vertexMethod, sourceNames, writerNames); if (sourceToSink != null) { CodeExpression sinkExpr = new CodeMethodInvokeExpression( new CodeVariableReferenceExpression(writerNames[0]), "WriteItemSequence", new CodeVariableReferenceExpression(sourceToSink)); vertexMethod.Statements.Add(sinkExpr); } break; } case QueryNodeType.Super: { string sourceToSink = this.m_vertexCodeGen.AddVertexCode(node, vertexMethod, null, writerNames); if (sourceToSink != null) { CodeExpression sinkExpr = new CodeMethodInvokeExpression( new CodeVariableReferenceExpression(writerNames[0]), "WriteItemSequence", new CodeVariableReferenceExpression(sourceToSink)); vertexMethod.Statements.Add(sinkExpr); } break; } default: { //@@TODO: this should not be reachable. could change to Assert/InvalidOpEx throw new DryadLinqException(DryadLinqErrorCode.Internal, String.Format(SR.AddVertexNotHandled, node.NodeType)); } } string completedMsg = "DryadLinqLog.AddInfo(\"Vertex " + vertexMethod.Name + " completed at {0}\", DateTime.Now.ToString(\"MM/dd/yyyy HH:mm:ss.fff\"))"; vertexMethod.Statements.Add(new CodeSnippetExpression(completedMsg)); // add a catch block CodeCatchClause catchBlock = new CodeCatchClause("e"); CodeTypeReferenceExpression errorReportClass = new CodeTypeReferenceExpression("VertexEnv"); CodeMethodReferenceExpression errorReportMethod = new CodeMethodReferenceExpression(errorReportClass, "ReportVertexError"); CodeVariableReferenceExpression exRef = new CodeVariableReferenceExpression(catchBlock.LocalName); catchBlock.Statements.Add(new CodeMethodInvokeExpression(errorReportMethod, exRef)); tryBlock.CatchClauses.Add(catchBlock); // wrap the entire vertex method in a try/catch block tryBlock.TryStatements.AddRange(vertexMethod.Statements); vertexMethod.Statements.Clear(); vertexMethod.Statements.Add(tryBlock); // Always add "return 0", to make CLR hosting happy... vertexMethod.Statements.Add(new CodeMethodReturnStatement(ZeroExpr)); this.m_dryadVertexClass.Members.Add(vertexMethod); return vertexMethod; }
internal static CodeVariableDeclarationStatement MakeVertexParamsDecl(DLinqQueryNode node) { int inputArity = node.InputArity + node.GetReferencedQueries().Count; int outputArity = node.OutputArity; CodeExpression arg1 = new CodePrimitiveExpression(inputArity); CodeExpression arg2 = new CodePrimitiveExpression(outputArity); CodeExpression vertexParamsInitExpr = new CodeObjectCreateExpression("DryadLinqVertexParams", arg1, arg2); CodeVariableDeclarationStatement vertexParamsDecl = new CodeVariableDeclarationStatement("DryadLinqVertexParams", VertexParamName, vertexParamsInitExpr); return vertexParamsDecl; }
/// <summary> /// Create a dynamic manager with a single parameter node. /// </summary> /// <param name="type">Type of manager to create.</param> /// <param name="node">Node that the manager depends on.</param> internal DynamicManager(DynamicManagerType type, DLinqQueryNode node) : this(type) { this.m_vertexNodes.Add(node); }
internal void InsertVertexNode(int index, DLinqQueryNode node) { if (index == -1) { this.m_vertexNodes.Add(node); } else { this.m_vertexNodes.Insert(index, node); } }
private void CodeGenVisit(DLinqQueryNode node) { if (node.m_uniqueId == this.m_currentPhaseId) { node.m_uniqueId++; // We process the types first so that children will also know about all // proxies/mappings that should be used. node.CreateCodeAndMappingsForVertexTypes(false); // Recurse on the children foreach (DLinqQueryNode child in node.Children) { this.CodeGenVisit(child); } switch (node.NodeType) { case QueryNodeType.InputTable: { // vertex with no code string t = ((DLinqInputNode)node).Table.DataSourceUri.AbsolutePath; int index = t.LastIndexOf('/'); int bk = t.LastIndexOf('\\'); if (index < bk) index = bk; node.m_vertexEntryMethod = t.Substring(index + 1); break; } case QueryNodeType.OutputTable: { // vertex with no code string t = ((DLinqOutputNode)node).OutputUri.AbsolutePath; int index = t.LastIndexOf('/'); int bk = t.LastIndexOf('\\'); if (index < bk) index = bk; int len = Math.Min(8, t.Length - index - 1); node.m_vertexEntryMethod = t.Substring(index + 1, len); break; } case QueryNodeType.Tee: { // vertex with no code node.m_vertexEntryMethod = DryadLinqCodeGen.MakeUniqueName("Tee"); // broadcast manager code generation if (node.DynamicManager.ManagerType != DynamicManagerType.None) { node.DynamicManager.CreateVertexCode(); } break; } case QueryNodeType.Concat: case QueryNodeType.Dummy: { // vertex with no code node.m_vertexEntryMethod = DryadLinqCodeGen.MakeUniqueName(node.NodeType.ToString()); break; } case QueryNodeType.DoWhile: { // vertex with no code node.m_vertexEntryMethod = DryadLinqCodeGen.MakeUniqueName(node.NodeType.ToString()); this.CodeGenVisit(((DLinqDoWhileNode)node).Body); this.CodeGenVisit(((DLinqDoWhileNode)node).Cond); break; } default: { CodeMemberMethod vertexMethod = this.m_codeGen.AddVertexMethod(node); node.m_vertexEntryMethod = vertexMethod.Name; node.DynamicManager.CreateVertexCode(); break; } } } }
internal DLinqOutputNode(DryadLinqContext context, Uri outputUri, bool isTempOutput, CompressionScheme outputScheme, Expression queryExpr, DLinqQueryNode child) : base(QueryNodeType.OutputTable, child.QueryGen, queryExpr, child) { if (TypeSystem.IsTypeOrAnyGenericParamsAnonymous(child.OutputTypes[0])) { throw DryadLinqException.Create(DryadLinqErrorCode.OutputTypeCannotBeAnonymous, SR.OutputTypeCannotBeAnonymous, queryExpr); } this.m_opName = "Output"; this.m_context = context; this.m_outputUri = outputUri; this.m_outputType = child.OutputTypes[0]; this.m_outputDataSetInfo = child.OutputDataSetInfo; this.m_partitionCount = child.OutputDataSetInfo.partitionInfo.Count; this.m_dynamicManager = DynamicManager.Splitter; this.m_outputCompressionScheme = outputScheme; this.m_isTempOutput = isTempOutput; }
internal DLinqQueryNode PipelineReduce() { if (!this.CanBePipelined()) { return this; } DLinqQueryNode[] nodeChildren = this.Children; DLinqSuperNode resNode = new DLinqSuperNode(this); List<DLinqQueryNode> childList = new List<DLinqQueryNode>(); for (int i = 0; i < nodeChildren.Length; i++) { DLinqQueryNode child = nodeChildren[i]; if (this.CanNotBePipelinedWith(child)) { childList.Add(child); bool found = child.UpdateParent(this, resNode); } else { if (child is DLinqSuperNode) { DLinqSuperNode superChild = (DLinqSuperNode)child; nodeChildren[i] = superChild.RootNode; superChild.SwitchTo(resNode); } else { child.SuperNode = resNode; } // Fix the child's children foreach (DLinqQueryNode child1 in child.Children) { childList.Add(child1); bool found = child1.UpdateParent(child, resNode); } } } DLinqQueryNode[] resChildren = new DLinqQueryNode[childList.Count]; for (int i = 0; i < resChildren.Length; i++) { resChildren[i] = childList[i]; } resNode.Children = resChildren; resNode.OutputDataSetInfo = resNode.RootNode.OutputDataSetInfo; return resNode; }
// Return true if this node and child can't be pipelined together. private bool CanNotBePipelinedWith(DLinqQueryNode child) { return ((child is DLinqInputNode) || (child is DLinqConcatNode) || (child is DLinqTeeNode) || (child.IsForked) || (child is DLinqDoWhileNode) || (child is DLinqDummyNode) || ((child is DLinqApplyNode) && ((DLinqApplyNode)child).IsWriteToStream)); }
internal void Add(DLinqQueryNode node) { this.m_nodes.Add(node); }
// Replace all occurences of oldNode in this.Children by newNode. // Return true iff oldNode is in this.Children. internal bool UpdateChildren(DLinqQueryNode oldNode, DLinqQueryNode newNode) { bool found = false; for (int i = 0; i < this.Children.Length; i++) { if (Object.ReferenceEquals(oldNode, this.Children[i])) { this.Children[i] = newNode; found = true; } } return found; }
internal void AddNode(DLinqQueryNode node) { this.m_realNodes.Add(node); }
internal DLinqDynamicNode(DynamicManagerType dmType, DLinqQueryNode node) : base(QueryNodeType.Dynamic, node.QueryGen, node.QueryExpression) { switch (dmType) { case DynamicManagerType.FullAggregator: case DynamicManagerType.Broadcast: { this.m_dmType = dmType; this.m_realNodes = new List<DLinqQueryNode>(1); this.m_realNodes.Add(node); break; } default: { throw new DryadLinqException(DryadLinqErrorCode.Internal, SR.IllegalDynamicManagerType); } } }
private DLinqQueryNode VisitPhase2(DLinqQueryNode node) { DLinqQueryNode resNode = node; if (node.m_uniqueId == this.m_currentPhaseId) { if (node is DLinqForkNode) { // For now, we require every branch of a fork be used: DLinqForkNode forkNode = (DLinqForkNode)node; for (int i = 0; i < forkNode.Parents.Count; i++) { if ((forkNode.Parents[i] is DLinqTeeNode) && (forkNode.Parents[i].Parents.Count == 0)) { throw DryadLinqException.Create(DryadLinqErrorCode.BranchOfForkNotUsed, string.Format(SR.BranchOfForkNotUsed, i), node.QueryExpression); } } } resNode = node.SuperNode; if (resNode == null) { for (int i = 0; i < node.Children.Length; i++) { node.Children[i] = this.VisitPhase2(node.Children[i]); } resNode = node.PipelineReduce(); resNode.m_uniqueId++; // Special treatment for DoWhile DLinqDoWhileNode doWhile = resNode as DLinqDoWhileNode; if (doWhile != null) { doWhile.Body = this.VisitPhase2(doWhile.Body); doWhile.Cond = this.VisitPhase2(doWhile.Cond); } // Insert a Tee node if needed: DLinqQueryNode outputNode = resNode.OutputNode; if (outputNode.IsForked && !(outputNode is DLinqForkNode) && !(outputNode is DLinqTeeNode)) { resNode = resNode.InsertTee(true); } } } return resNode; }
private void VisitPhase3(DLinqQueryNode node) { if (node.m_uniqueId == this.m_currentPhaseId) { node.m_uniqueId++; // Remove some useless Tee nodes foreach (DLinqQueryNode child in node.Children) { if ((child is DLinqTeeNode) && !child.IsForked) { DLinqQueryNode teeChild = child.Children[0]; teeChild.UpdateParent(child, node); node.UpdateChildren(child, teeChild); } } // Remove some useless Merge nodes if ((node is DLinqMergeNode) && !node.IsForked && !(node.Parents[0] is DLinqOutputNode) && !node.Children[0].IsDynamic && node.Children[0].PartitionCount == 1) { node.Children[0].UpdateParent(node, node.Parents[0]); node.Parents[0].UpdateChildren(node, node.Children[0]); } // Add dynamic managers for tee nodes. if ((StaticConfig.DynamicOptLevel & StaticConfig.NoDynamicOpt) != 0 && node is DLinqTeeNode && node.DynamicManager.ManagerType == DynamicManagerType.None) { // insert a dynamic broadcast manager on Tee node.DynamicManager = DynamicManager.Broadcast; } // Recurse on the children of node foreach (DLinqQueryNode child in node.Children) { this.VisitPhase3(child); } if (node is DLinqDoWhileNode) { this.VisitPhase3(((DLinqDoWhileNode)node).Body); this.VisitPhase3(((DLinqDoWhileNode)node).Cond); } } }
private static bool IsMergeNodeNeeded(DLinqQueryNode node) { return node.IsDynamic || node.PartitionCount > 1; }
private void AssignUniqueId(DLinqQueryNode node) { if (node.m_uniqueId == this.m_currentPhaseId) { foreach (Pair<ParameterExpression, DLinqQueryNode> refChild in node.GetReferencedQueries()) { this.AssignUniqueId(refChild.Value); } foreach (DLinqQueryNode child in node.Children) { this.AssignUniqueId(child); } if (node.m_uniqueId == this.m_currentPhaseId) { node.m_uniqueId = this.m_nextVertexId++; // Special treatment for DoWhile DLinqDoWhileNode doWhileNode = node as DLinqDoWhileNode; if (doWhileNode != null) { this.AssignUniqueId(doWhileNode.Body); this.AssignUniqueId(doWhileNode.Cond); this.AssignUniqueId(doWhileNode.BodySource); this.AssignUniqueId(doWhileNode.CondSource1); this.AssignUniqueId(doWhileNode.CondSource2); } // Special treatment for Fork if (node.OutputNode is DLinqForkNode) { foreach (DLinqQueryNode pnode in node.Parents) { if (pnode.m_uniqueId == this.m_currentPhaseId) { pnode.m_uniqueId = this.m_nextVertexId++; } } } } } }
private DLinqQueryNode CreateOffset(bool isLong, Expression queryExpr, DLinqQueryNode child) { // Count node DLinqQueryNode countNode = new DLinqBasicAggregateNode(null, AggregateOpType.LongCount, true, false, queryExpr, child); // Apply node for x => Offsets(x) Type paramType = typeof(IEnumerable<>).MakeGenericType(typeof(long)); ParameterExpression param = Expression.Parameter(paramType, "x"); MethodInfo minfo = typeof(DryadLinqEnumerable).GetMethod("Offsets"); Expression body = Expression.Call(minfo, param, Expression.Constant(isLong, typeof(bool))); Type type = typeof(Func<,>).MakeGenericType(param.Type, body.Type); LambdaExpression procFunc = Expression.Lambda(type, body, param); DLinqQueryNode mergeCountNode = new DLinqMergeNode(true, queryExpr, countNode); DLinqQueryNode offsetsNode = new DLinqApplyNode(procFunc, queryExpr, mergeCountNode); // HashPartition LambdaExpression keySelectExpr = IdentityFunction.Instance(typeof(IndexedValue<long>)); int pcount = child.OutputPartition.Count; DLinqQueryNode hdistNode = new DLinqHashPartitionNode(keySelectExpr, null, null, pcount, false, queryExpr, offsetsNode); DLinqQueryNode resNode = new DLinqMergeNode(false, queryExpr, hdistNode); return resNode; }
internal DynamicRangeDistributor(DLinqQueryNode node) : base(DynamicManagerType.RangeDistributor, node) { //@@TODO[P2]: This sample rate used here should really be its own constant. this.m_sampleRate = DryadLinqSampler.SAMPLE_RATE; }
private DLinqQueryNode PromoteConcat(QueryNodeInfo source, DLinqQueryNode sourceNode, Func<DLinqQueryNode, DLinqQueryNode> func) { DLinqQueryNode resNode = sourceNode; if ((resNode is DLinqConcatNode) && !source.IsForked) { DLinqQueryNode[] children = resNode.Children; DLinqQueryNode[] newChildren = new DLinqQueryNode[children.Length]; for (int i = 0; i < children.Length; i++) { children[i].Parents.Remove(resNode); newChildren[i] = func(children[i]); } resNode = new DLinqConcatNode(source.QueryExpression, newChildren); } else { resNode = func(resNode); } return resNode; }
private DLinqQueryNode VisitConcat(QueryNodeInfo source, MethodCallExpression queryExpr) { DLinqQueryNode[] childs = new DLinqQueryNode[source.Children.Count]; for (int i = 0; i < source.Children.Count; ++i) { childs[i] = this.Visit(source.Children[i].Child); } DLinqQueryNode resNode = new DLinqConcatNode(queryExpr, childs); int parCount = resNode.OutputPartition.Count; if (!resNode.IsDynamic && parCount > StaticConfig.MaxPartitionCount) { // Too many partitions, need to repartition int newParCount = parCount / 2; DLinqQueryNode countNode = new DLinqBasicAggregateNode(null, AggregateOpType.LongCount, true, false, queryExpr, resNode); DLinqQueryNode mergeCountNode = new DLinqMergeNode(true, queryExpr, countNode); // Apply node for s => IndexedCount(s) Type paramType = typeof(IEnumerable<>).MakeGenericType(typeof(long)); ParameterExpression param = Expression.Parameter(paramType, "s"); MethodInfo minfo = typeof(DryadLinqHelper).GetMethod("IndexedCount"); minfo = minfo.MakeGenericMethod(typeof(long)); Expression body = Expression.Call(minfo, param); Type funcType = typeof(Func<,>).MakeGenericType(param.Type, body.Type); LambdaExpression indexedCountFunc = Expression.Lambda(funcType, body, param); DLinqQueryNode indexedCountNode = new DLinqApplyNode(indexedCountFunc, queryExpr, mergeCountNode); // HashPartition(x => x.index, parCount) param = Expression.Parameter(body.Type.GetGenericArguments()[0], "x"); Expression keySelectBody = Expression.Property(param, "Index"); funcType = typeof(Func<,>).MakeGenericType(param.Type, keySelectBody.Type); LambdaExpression keySelectExpr = Expression.Lambda(funcType, keySelectBody, param); DLinqQueryNode distCountNode = new DLinqHashPartitionNode(keySelectExpr, null, parCount, queryExpr, indexedCountNode); // Apply node for (x, y) => AddPartitionIndex(x, y, newParCount) ParameterExpression param1 = Expression.Parameter(body.Type, "x"); Type paramType2 = typeof(IEnumerable<>).MakeGenericType(resNode.OutputTypes[0]); ParameterExpression param2 = Expression.Parameter(paramType2, "y"); minfo = typeof(DryadLinqHelper).GetMethod("AddPartitionIndex"); minfo = minfo.MakeGenericMethod(resNode.OutputTypes[0]); body = Expression.Call(minfo, param1, param2, Expression.Constant(newParCount)); funcType = typeof(Func<,,>).MakeGenericType(param1.Type, param2.Type, body.Type); LambdaExpression addIndexFunc = Expression.Lambda(funcType, body, param1, param2); DLinqQueryNode addIndexNode = new DLinqApplyNode(addIndexFunc, queryExpr, distCountNode, resNode); // HashPartition(x => x.index, x => x.value, newParCount) param = Expression.Parameter(body.Type.GetGenericArguments()[0], "x"); body = Expression.Property(param, "Index"); funcType = typeof(Func<,>).MakeGenericType(param.Type, body.Type); keySelectExpr = Expression.Lambda(funcType, body, param); body = Expression.Property(param, "Value"); funcType = typeof(Func<,>).MakeGenericType(param.Type, body.Type); LambdaExpression resultSelectExpr = Expression.Lambda(funcType, body, param); resNode = new DLinqHashPartitionNode(keySelectExpr, resultSelectExpr, null, newParCount, false, queryExpr, addIndexNode); resNode = new DLinqMergeNode(true, queryExpr, resNode); } return resNode; }
// Creates an "auto-sampling range-partition sub-query" private DLinqQueryNode CreateRangePartition(bool isDynamic, LambdaExpression keySelectExpr, LambdaExpression resultSelectExpr, Expression comparerExpr, Expression isDescendingExpr, Expression queryExpr, Expression partitionCountExpr, DLinqQueryNode child) { // Make child a Tee node child.IsForked = true; // The partition count Expression countExpr = null; if (isDescendingExpr == null) { isDescendingExpr = Expression.Constant(false, typeof(bool)); //default for isDescending is false. } // NOTE: for MayRTM, isDynamic should never be true if (!isDynamic) { if (partitionCountExpr != null) { countExpr = partitionCountExpr; } else { // If partitionCount was not explicitly set, use the child's partition count. countExpr = Expression.Constant(child.OutputPartition.Count); } } Type recordType = child.OutputTypes[0]; Type keyType = keySelectExpr.Type.GetGenericArguments()[1]; // Create x => Phase1Sampling(x_1, keySelector, denv) Type lambdaParamType1 = typeof(IEnumerable<>).MakeGenericType(recordType); ParameterExpression lambdaParam1 = Expression.Parameter(lambdaParamType1, "x_1"); ParameterExpression denvParam = Expression.Parameter(typeof(VertexEnv), "denv"); MethodInfo minfo = typeof(DryadLinqSampler).GetMethod("Phase1Sampling"); Expression body = Expression.Call(minfo.MakeGenericMethod(recordType, keyType), lambdaParam1, keySelectExpr, denvParam); Type type = typeof(Func<,>).MakeGenericType(lambdaParam1.Type, body.Type); LambdaExpression samplingExpr = Expression.Lambda(type, body, lambdaParam1); // Create the Sampling node DLinqApplyNode samplingNode = new DLinqApplyNode(samplingExpr, queryExpr, child); // Create x => RangeSampler(x, keySelectExpr, comparer, isDescendingExpr) Type lambdaParamType = typeof(IEnumerable<>).MakeGenericType(keyType); ParameterExpression lambdaParam = Expression.Parameter(lambdaParamType, "x_2"); //For RTM, isDynamic should never be true. //string methodName = (isDynamic) ? "RangeSampler_Dynamic" : "RangeSampler_Static"; Debug.Assert(isDynamic == false, "Internal error: isDynamic is true."); string methodName = "RangeSampler_Static"; minfo = typeof(DryadLinqSampler).GetMethod(methodName); minfo = minfo.MakeGenericMethod(keyType); Expression comparerArgExpr = comparerExpr; if (comparerExpr == null) { if (!TypeSystem.HasDefaultComparer(keyType)) { throw DryadLinqException.Create(DryadLinqErrorCode.ComparerMustBeSpecifiedOrKeyTypeMustBeIComparable, string.Format(SR.ComparerMustBeSpecifiedOrKeyTypeMustBeIComparable, keyType), queryExpr); } comparerArgExpr = Expression.Constant(null, typeof(IComparer<>).MakeGenericType(keyType)); } Expression lastArg; if (isDynamic) { lastArg = denvParam; } else { lastArg = countExpr; } body = Expression.Call(minfo, lambdaParam, comparerArgExpr, isDescendingExpr, lastArg); type = typeof(Func<,>).MakeGenericType(lambdaParam.Type, body.Type); LambdaExpression samplerExpr = Expression.Lambda(type, body, lambdaParam); // Create the sample node DLinqQueryNode sampleDataNode = new DLinqMergeNode(false, queryExpr, samplingNode); DLinqQueryNode sampleNode = new DLinqApplyNode(samplerExpr, queryExpr, sampleDataNode); sampleNode.IsForked = true; // Create the range distribute node DLinqQueryNode resNode = new DLinqRangePartitionNode(keySelectExpr, resultSelectExpr, null, comparerExpr, isDescendingExpr, countExpr, queryExpr, child, sampleNode); resNode = new DLinqMergeNode(false, queryExpr, resNode); // Set the dynamic manager for sampleNode if (isDynamic) { sampleDataNode.DynamicManager = new DynamicRangeDistributor(resNode); } return resNode; }
/// <summary> /// Explain one query node. /// </summary> /// <param name="plan">Return plan here.</param> /// <param name="n">Node to explain.</param> internal static void ExplainNode(StringBuilder plan, DLinqQueryNode n) { if (n is DLinqTeeNode || n is DLinqOutputNode || n is DLinqDoWhileNode || n is DLinqDummyNode) { return; } else if (n is DLinqInputNode) { plan.AppendLine("Input:"); plan.Append("\t"); n.BuildString(plan); plan.AppendLine(); return; } plan.Append(n.m_vertexEntryMethod); plan.AppendLine(":"); HashSet<DLinqQueryNode> allchildren = new HashSet<DLinqQueryNode>(); if (n is DLinqSuperNode) { DLinqSuperNode sn = n as DLinqSuperNode; List<DLinqQueryNode> tovisit = new List<DLinqQueryNode>(); tovisit.Add(sn.RootNode); while (tovisit.Count > 0) { DLinqQueryNode t = tovisit[0]; tovisit.RemoveAt(0); if (!(t is DLinqSuperNode)) allchildren.Add(t); foreach (DLinqQueryNode tc in t.Children) { if (!allchildren.Contains(tc) && sn.Contains(tc)) tovisit.Add(tc); } } } else { allchildren.Add(n); } foreach (DLinqQueryNode nc in allchildren.Reverse()) { Expression expression = null; // expression to print List<string> additional = new List<string>(); // additional arguments to print int argsToSkip = 0; string methodname = nc.OpName; plan.Append("\t"); if (nc is DLinqMergeNode) { expression = ((DLinqMergeNode)nc).ComparerExpression; } else if (nc is DLinqHashPartitionNode) { DLinqHashPartitionNode hp = (DLinqHashPartitionNode)nc; expression = hp.KeySelectExpression; additional.Add(hp.NumberOfPartitions.ToString()); } else if (nc is DLinqGroupByNode) { DLinqGroupByNode gb = (DLinqGroupByNode)nc; expression = gb.KeySelectExpression; if (gb.ElemSelectExpression != null) additional.Add(DryadLinqExpression.Summarize(gb.ElemSelectExpression)); if (gb.ResSelectExpression != null) additional.Add(DryadLinqExpression.Summarize(gb.ResSelectExpression)); if (gb.ComparerExpression != null) additional.Add(DryadLinqExpression.Summarize(gb.ComparerExpression)); if (gb.SeedExpression != null) additional.Add(DryadLinqExpression.Summarize(gb.SeedExpression)); if (gb.AccumulatorExpression != null) additional.Add(DryadLinqExpression.Summarize(gb.AccumulatorExpression)); } else if (nc is DLinqOrderByNode) { DLinqOrderByNode ob = (DLinqOrderByNode)nc; expression = ob.KeySelectExpression; if (ob.ComparerExpression != null) additional.Add(DryadLinqExpression.Summarize(ob.ComparerExpression)); } else if (nc is DLinqWhereNode) { expression = ((DLinqWhereNode)nc).WhereExpression; } else if (nc is DLinqSelectNode) { DLinqSelectNode s = (DLinqSelectNode)nc; expression = s.SelectExpression; if (s.ResultSelectExpression != null) additional.Add(DryadLinqExpression.Summarize(s.ResultSelectExpression)); } else if (nc is DLinqAggregateNode) { DLinqAggregateNode a = (DLinqAggregateNode)nc; expression = a.FuncLambda; if (a.SeedExpression != null) additional.Add(DryadLinqExpression.Summarize(a.SeedExpression)); if (a.ResultLambda != null) additional.Add(DryadLinqExpression.Summarize(a.ResultLambda)); } else if (nc is DLinqPartitionOpNode) { expression = ((DLinqPartitionOpNode)nc).ControlExpression; } else if (nc is DLinqJoinNode) { DLinqJoinNode j = (DLinqJoinNode)nc; expression = j.OuterKeySelectorExpression; additional.Add(DryadLinqExpression.Summarize(j.InnerKeySelectorExpression)); additional.Add(DryadLinqExpression.Summarize(j.ResultSelectorExpression)); if (j.ComparerExpression != null) additional.Add(DryadLinqExpression.Summarize(j.ComparerExpression)); } else if (nc is DLinqDistinctNode) { expression = ((DLinqDistinctNode)nc).ComparerExpression; } else if (nc is DLinqContainsNode) { DLinqContainsNode c = (DLinqContainsNode)nc; expression = c.ValueExpression; if (c.ComparerExpression != null) additional.Add(DryadLinqExpression.Summarize(c.ComparerExpression)); } else if (nc is DLinqBasicAggregateNode) { expression = ((DLinqBasicAggregateNode)nc).SelectExpression; } else if (nc is DLinqConcatNode) // nothing to do { } else if (nc is DLinqSetOperationNode) { expression = ((DLinqSetOperationNode)nc).ComparerExpression; } else if (nc is DLinqRangePartitionNode) { DLinqRangePartitionNode r = (DLinqRangePartitionNode)nc; expression = r.CountExpression; // TODO: there's some other possible interesting info } else if (nc is DLinqApplyNode) { expression = ((DLinqApplyNode)nc).LambdaExpression; } else if (nc is DLinqForkNode) { expression = ((DLinqForkNode)nc).ForkLambda; } else if (nc is DLinqTeeNode) { // nothing } else if (nc is DLinqDynamicNode) { // nothing } else { expression = nc.QueryExpression; } if (expression is MethodCallExpression) { MethodCallExpression mc = (MethodCallExpression)expression; methodname = mc.Method.Name; // overwrite methodname // determine which arguments to skip #region LINQMETHODS switch (mc.Method.Name) { case "Aggregate": case "AggregateAsQuery": case "Select": case "LongSelect": case "SelectMany": case "LongSelectMany": case "OfType": case "Where": case "LongWhere": case "First": case "FirstOrDefault": case "FirstAsQuery": case "Single": case "SingleOrDefault": case "SingleAsQuery": case "Last": case "LastOrDefault": case "LastAsQuery": case "Distinct": case "Any": case "AnyAsQuery": case "All": case "AllAsQuery": case "Count": case "CountAsQuery": case "LongCount": case "LongCountAsQuery": case "Sum": case "SumAsQuery": case "Min": case "MinAsQuery": case "Max": case "MaxAsQuery": case "Average": case "AverageAsQuery": case "GroupBy": case "OrderBy": case "OrderByDescending": case "ThenBy": case "ThenByDescending": case "Take": case "TakeWhile": case "LongTakeWhile": case "Skip": case "SkipWhile": case "LongSkipWhile": case "Contains": case "ContainsAsQuery": case "Reverse": case "Merge": case "HashPartition": case "RangePartition": case "Fork": case "ForkChoose": case "AssumeHashPartition": case "AssumeRangePartition": case "AssumeOrderBy": case "ToPartitionedTableLazy": case "AddCacheEntry": case "SlidingWindow": case "ApplyWithPartitionIndex": case "DoWhile": argsToSkip = 1; break; case "Join": case "GroupJoin": case "Concat": case "MultiConcat": case "Union": case "Intersect": case "Except": case "SequenceEqual": case "SequenceEqualAsQuery": case "Zip": argsToSkip = 2; break; case "Apply": case "ApplyPerPartition": if (mc.Arguments.Count < 3) argsToSkip = 1; else argsToSkip = 2; break; default: throw DryadLinqException.Create(DryadLinqErrorCode.OperatorNotSupported, String.Format(SR.OperatorNotSupported, mc.Method.Name), expression); } #endregion plan.Append(methodname); plan.Append("("); int argno = 0; foreach (var arg in mc.Arguments) { argno++; if (argno <= argsToSkip) continue; if (argno > argsToSkip + 1) { plan.Append(","); } plan.Append(DryadLinqExpression.Summarize(arg)); } plan.AppendLine(")"); } else { // expression is not methodcall plan.Append(methodname); plan.Append("("); if (expression != null) { plan.Append(DryadLinqExpression.Summarize(expression)); } foreach (string e in additional) { plan.Append(","); plan.Append(e); } plan.AppendLine(")"); } } }
private DLinqQueryNode FirstStagePartitionOp(string opName, QueryNodeType nodeType, Expression controlExpr, MethodCallExpression queryExpr, DLinqQueryNode child) { if (nodeType == QueryNodeType.TakeWhile) { Type ptype = typeof(IEnumerable<>).MakeGenericType(child.OutputTypes[0]); ParameterExpression param = Expression.Parameter(ptype, DryadLinqCodeGen.MakeUniqueName("x")); MethodInfo minfo = typeof(DryadLinqEnumerable).GetMethod("GroupTakeWhile"); minfo = minfo.MakeGenericMethod(child.OutputTypes[0]); Expression body = Expression.Call(minfo, param, controlExpr); Type type = typeof(Func<,>).MakeGenericType(ptype, body.Type); LambdaExpression procFunc = Expression.Lambda(type, body, param); return new DLinqApplyNode(procFunc, queryExpr, child); } else { return new DLinqPartitionOpNode(opName, nodeType, controlExpr, true, queryExpr, child); } }
internal static CodeVariableDeclarationStatement MakeVertexEnvDecl(DLinqQueryNode node) { CodeExpression arg1 = new CodeArgumentReferenceExpression("args"); CodeExpression arg2 = new CodeArgumentReferenceExpression(VertexParamName); CodeExpression denvInitExpr = new CodeObjectCreateExpression("VertexEnv", arg1, arg2); return new CodeVariableDeclarationStatement("VertexEnv", VertexEnvName, denvInitExpr); }
private DLinqQueryNode VisitMultiApply(QueryNodeInfo source, LambdaExpression procLambda, bool perPartition, bool isFirstOnly, MethodCallExpression queryExpr) { DLinqQueryNode[] childs = new DLinqQueryNode[source.Children.Count]; for (int i = 0; i < source.Children.Count; ++i) { childs[i] = this.Visit(source.Children[i].Child); } bool isDynamic = childs.Any(x => x.IsDynamic); if (perPartition && !isDynamic) { // Homomorphic case. if (isFirstOnly) { for (int i = 1; i < childs.Length; ++i) { childs[i] = new DLinqTeeNode(childs[i].OutputTypes[0], true, queryExpr, childs[i]); childs[i].ConOpType = ConnectionOpType.CrossProduct; childs[i] = new DLinqMergeNode(childs[0].OutputPartition.Count, queryExpr, childs[i]); } } else { int count = childs[0].OutputPartition.Count; for (int i = 1; i < childs.Length; ++i) { if (childs[i].OutputPartition.Count != count) { throw DryadLinqException.Create(DryadLinqErrorCode.HomomorphicApplyNeedsSamePartitionCount, SR.HomomorphicApplyNeedsSamePartitionCount, queryExpr); } } } } else { // Non-homomorphic case. for (int i = 0; i < childs.Length; ++i) { if (childs[i].IsDynamic || childs[i].OutputPartition.Count > 1) { childs[i] = new DLinqMergeNode(true, queryExpr, childs[i]); } } } DLinqQueryNode applyNode = new DLinqApplyNode(procLambda, true, queryExpr, childs); return applyNode; }
internal virtual string AddVertexCode(DLinqQueryNode node, CodeMemberMethod vertexMethod, string[] readerNames, string[] writerNames) { switch (node.NodeType) { case QueryNodeType.InputTable: { return this.Visit((DLinqInputNode)node, vertexMethod, readerNames, writerNames); } case QueryNodeType.OutputTable: { return this.Visit((DLinqOutputNode)node, vertexMethod, readerNames, writerNames); } case QueryNodeType.Aggregate: { return this.Visit((DLinqAggregateNode)node, vertexMethod, readerNames, writerNames); } case QueryNodeType.Select: case QueryNodeType.SelectMany: { return this.Visit((DLinqSelectNode)node, vertexMethod, readerNames, writerNames); } case QueryNodeType.Where: { return this.Visit((DLinqWhereNode)node, vertexMethod, readerNames, writerNames); } case QueryNodeType.Distinct: { return this.Visit((DLinqDistinctNode)node, vertexMethod, readerNames, writerNames); } case QueryNodeType.BasicAggregate: { return this.Visit((DLinqBasicAggregateNode)node, vertexMethod, readerNames, writerNames); } case QueryNodeType.GroupBy: { return this.Visit((DLinqGroupByNode)node, vertexMethod, readerNames, writerNames); } case QueryNodeType.OrderBy: { return this.Visit((DLinqOrderByNode)node, vertexMethod, readerNames, writerNames); } case QueryNodeType.Skip: case QueryNodeType.SkipWhile: case QueryNodeType.Take: case QueryNodeType.TakeWhile: { return this.Visit((DLinqPartitionOpNode)node, vertexMethod, readerNames, writerNames); } case QueryNodeType.Contains: { return this.Visit((DLinqContainsNode)node, vertexMethod, readerNames, writerNames); } case QueryNodeType.Join: case QueryNodeType.GroupJoin: { return this.Visit((DLinqJoinNode)node, vertexMethod, readerNames, writerNames); } case QueryNodeType.Union: case QueryNodeType.Intersect: case QueryNodeType.Except: { return this.Visit((DLinqSetOperationNode)node, vertexMethod, readerNames, writerNames); } case QueryNodeType.Concat: { return this.Visit((DLinqConcatNode)node, vertexMethod, readerNames, writerNames); } case QueryNodeType.Zip: { return this.Visit((DLinqZipNode)node, vertexMethod, readerNames, writerNames); } case QueryNodeType.Super: { return this.Visit((DLinqSuperNode)node, vertexMethod, readerNames, writerNames); } case QueryNodeType.RangePartition: { return this.Visit((DLinqRangePartitionNode)node, vertexMethod, readerNames, writerNames); } case QueryNodeType.HashPartition: { return this.Visit((DLinqHashPartitionNode)node, vertexMethod, readerNames, writerNames); } case QueryNodeType.Merge: { return this.Visit((DLinqMergeNode)node, vertexMethod, readerNames, writerNames); } case QueryNodeType.Apply: { return this.Visit((DLinqApplyNode)node, vertexMethod, readerNames, writerNames); } case QueryNodeType.Fork: { return this.Visit((DLinqForkNode)node, vertexMethod, readerNames, writerNames); } case QueryNodeType.Tee: { return this.Visit((DLinqTeeNode)node, vertexMethod, readerNames, writerNames); } case QueryNodeType.Dynamic: { return this.Visit((DLinqDynamicNode)node, vertexMethod, readerNames, writerNames); } case QueryNodeType.Dummy: { return this.Visit((DLinqDummyNode)node, vertexMethod, readerNames, writerNames); } default: { throw new DryadLinqException("Internal error: unhandled node type " + node.NodeType); } } }
// Replace all occurences of oldNode in this.Parents by newNode. // Return true iff oldNode is in this.Parents. internal bool UpdateParent(DLinqQueryNode oldNode, DLinqQueryNode newNode) { bool found = false; for (int i = 0; i < this.Parents.Count; i++) { if (Object.ReferenceEquals(oldNode, this.Parents[i])) { this.Parents[i] = newNode; found = true; } } if (!found) { this.Parents.Add(newNode); } return found; }