private DrInputStreamManager CreateInputNode(DryadLINQApp app, VertexInfo info, string inputName) { DrInputStreamManager s; int err = 0; DryadLogger.LogMethodEntry(inputName); if (info.ioType == VertexInfo.IOType.PARTITIONEDFILE) { DrPartitionInputStream input = new DrPartitionInputStream(); err = input.Open(app.GetUniverse(), info.sources[0]); if (!SUCCEEDED(err)) { string msg = String.Format("Could not read DSC input file {0}", info.sources[0]); throw new LinqToDryadException(msg, err); } DrManagerBase inputStage = new DrManagerBase(app.GetGraph(), inputName); DrInputStreamManager inputManager = new DrInputStreamManager(input, inputStage); s = inputManager; } //else if ( info.ioType == VertexInfo.IOType.STREAM ) //{ // DrDscInputStream input = new DrDscInputStream(); // DryadLogger.LogInformation("Create input node", "Opening DSC input fileset"); // err = input.Open(app.GetUniverse(), info.sources[0]); // if (!SUCCEEDED(err)) // { // string msg = String.Format("Could not read DSC input fileset {0}", info.sources[0]); // throw new LinqToDryadException(msg, err); // } // DryadLogger.LogInformation("Create input node", "Opened DSC input fileset"); // DrManagerBase inputStage = new DrManagerBase(app.GetGraph(), inputName); // DrInputStreamManager inputManager = new DrInputStreamManager(input, inputStage); // s = inputManager; //} else if (info.ioType == VertexInfo.IOType.HDFS_STREAM) { DrHdfsInputStream input = new DrHdfsInputStream(); DryadLogger.LogInformation("Create input node", "Opening HDFS input fileset"); err = input.Open(app.GetUniverse(), info.sources[0]); if (!SUCCEEDED(err)) { string msg = String.Format("Could not read HDFS input fileset {0}", info.sources[0]); throw new LinqToDryadException(msg, err); } DryadLogger.LogInformation("Create input node", "Opened HDFS input fileset"); DrManagerBase inputStage = new DrManagerBase(app.GetGraph(), inputName); DrInputStreamManager inputManager = new DrInputStreamManager(input, inputStage); s = inputManager; } else { string msg = String.Format("Unknown input type {0}", info.ioType); throw new LinqToDryadException(msg); } DryadLogger.LogMethodExit(); return(s); }
// // Main Dryad LINQ execution stuff // public int ExecLinqToDryad(string[] args) { // // must be at least two arguments (program name and query XML file name) // if (args.Length < 2) { DryadLogger.LogCritical(0, null, "Must provide at least query XML file name."); return(-1); } // // break if --break is included in arguments (and eliminate it, as it is not known downstream) // if (ConsumeSingleArgument("--break", ref args)) { DebugHelper.WaitForDebugger(); } // // parse the XML input, producing a DryadLINQ Query // Query query = new Query(); QueryPlanParser parser = new QueryPlanParser(); if (!parser.ParseQueryXml(args[1], query)) { DryadLogger.LogCritical(0, null, "Invalid query plan"); return(-1); } // // build internal app arguments // List <string> internalArgs = new List <string>(); // // add the XmlExecHost args to the internal app arguments // foreach (string xmlExecHostArg in query.xmlExecHostArgs) { if (xmlExecHostArg == "--break") { DebugHelper.WaitForDebugger(); } else { internalArgs.Add(xmlExecHostArg); } } // // combine internal arguments with any additional arguments received on the command line // don't include argv[0] and argv[1] (program name and query XML file name) // int internalArgc = (int)internalArgs.Count; int externalArgc = args.Length - 2; // don't include argv[0] and argv[1] int combinedArgc = internalArgc + externalArgc; string[] combinedArgv = new string[combinedArgc]; string msg = ""; // internal arguments first for (int i = 0; i < internalArgc; i++) { combinedArgv[i] = internalArgs[i]; msg += String.Format("{0} ", combinedArgv[i]); } // then external arguments for (int i = 0; i < externalArgc; i++) { combinedArgv[i + internalArgc] = args[i + 2]; // don't include argv[0] and argv[1] msg += String.Format("{0} ", combinedArgv[i + internalArgc]); } DryadLogger.LogInformation(null, "Arguments: {0}", msg); string jobClass = "DryadLINQ"; string dryadBinDir = Environment.GetEnvironmentVariable("DRYAD_HOME"); if (String.IsNullOrEmpty(dryadBinDir)) { throw new ApplicationException("DryadLINQ requires the DRYAD_HOME environment variable to be set to the Dryad binary folder."); } string exeName = Path.Combine(dryadBinDir, "VertexHost.exe"); // create app and run it // DrGraphParameters p = DrDefaultParameters.Make(exeName, jobClass, query.enableSpeculativeDuplication); DrArtemisLegacyReporter reporter = new DrArtemisLegacyReporter(); p.m_defaultProcessTemplate.GetListenerList().Add(reporter); p.m_defaultVertexTemplate.GetListenerList().Add(reporter); p.m_topologyReporter = reporter; p.m_intermediateCompressionMode = query.intermediateDataCompression; DrGraphExecutor graphExecutor = new DrGraphExecutor(); DrGraph graph = graphExecutor.Initialize(p); if (graph == null) { DryadLogger.LogCritical(0, null, "Failed to initialize Graph Executor"); return(-1); } DryadLINQApp app = new DryadLINQApp(graph); // Initialize with arguments app.SetXmlFileName(args[1]); if (!app.ParseCommandLineFlags(combinedArgv)) { DryadLogger.LogCritical(0, null, "Bad command-line options"); return(-1); } // Build graph from query plan GraphBuilder builder = new GraphBuilder(); builder.BuildGraphFromQuery(app, query); // Run the app DryadLogger.LogInformation(null, "Running the app"); graphExecutor.Run(); DrError exitStatus = graphExecutor.Join(); DryadLogger.LogInformation(null, "Finished running the app"); if (exitStatus == null || exitStatus.m_code == 0) { FinalizeExecution(query, graph); DryadLogger.LogInformation(null, "Application completed successfully."); return(0); } else { DryadLogger.LogCritical(exitStatus.m_code, null, "Application failed with error code 0x{0:X8}.\n", exitStatus.m_code); return(exitStatus.m_code); } }
public void BuildGraphFromQuery(DryadLINQApp app, Query query) { // set configurable properties int highThreshold = app.GetMaxAggregateInputs(); int lowThreshold = 16; UInt64 highDataThreshold = (UInt64)app.GetAggregateThreshold(); UInt64 lowDataThreshold = (3 * highDataThreshold) / 4; UInt64 maxSingleDataThreshold = highDataThreshold / 2; int aggFilterThreshold = app.GetMaxAggregateFilterInputs(); // use a graph stage map to store the vertices as they are created, grouped by stage. Dictionary <int, GraphStageInfo> graphStageMap = new Dictionary <int, GraphStageInfo>(); DryadLogger.LogInformation("Build Graph From Query", "Building graph"); // // Create a set of vertices for each vertex (stage) in the query plan // DryadLogger.LogInformation("Build Graph From Query", "Adding vertices"); foreach (KeyValuePair <int, Vertex> kvp in query.queryPlan) { Vertex v = kvp.Value; GraphStageInfo value = null; if (!graphStageMap.TryGetValue(v.uniqueId, out value)) { DryadLogger.LogInformation("Build Graph From Query", "Adding vertices for stage {0}", v.name); CreateVertexSet(v, app, query, graphStageMap); } } // // Add dynamic stage managers // DryadLogger.LogInformation("Build Graph From Query", "Adding stage managers"); foreach (KeyValuePair <int, GraphStageInfo> kvp in graphStageMap) { Vertex v = kvp.Value.vertex; // //There are no dynamic managers // if (v.dynamicManager == null) { continue; } DrStageManager newManager = kvp.Value.stageManager; // newManager DrGraphParameters parameters = app.GetGraph().GetParameters(); string stdVertexName = "MW"; string cpyVertexName = "CP"; if (v.type != Vertex.Type.INPUTTABLE && v.type != Vertex.Type.CONCAT) { if (v.dynamicManager.type == DynamicManager.Type.SPLITTER) { if (v.info.predecessors.Length == 1) { DrPipelineSplitManager splitter = new DrPipelineSplitManager(); newManager.AddDynamicConnectionManager(graphStageMap[v.info.predecessors[0].uniqueId].stageManager, splitter); } else { DrSemiPipelineSplitManager splitter = new DrSemiPipelineSplitManager(); newManager.AddDynamicConnectionManager(graphStageMap[v.info.predecessors[0].uniqueId].stageManager, splitter); } } else if (v.dynamicManager.type == DynamicManager.Type.PARTIALAGGR) { DrDynamicAggregateManager dynamicMerge = new DrDynamicAggregateManager(); dynamicMerge.SetGroupingSettings(0, 0); dynamicMerge.SetMachineGroupingSettings(2, aggFilterThreshold); dynamicMerge.SetDataGroupingSettings(lowDataThreshold, highDataThreshold, maxSingleDataThreshold); dynamicMerge.SetSplitAfterGrouping(true); foreach (Predecessor p in v.info.predecessors) { newManager.AddDynamicConnectionManager(graphStageMap[p.uniqueId].stageManager, dynamicMerge); } } else if (v.dynamicManager.type == DynamicManager.Type.FULLAGGR || v.dynamicManager.type == DynamicManager.Type.HASHDISTRIBUTOR) { int idx = 0; int sz = v.dynamicManager.assemblyNames == null ? 0 : v.dynamicManager.assemblyNames.Length; DrDynamicAggregateManager dynamicMerge = new DrDynamicAggregateManager(); if (v.dynamicManager.type == DynamicManager.Type.FULLAGGR || sz > 1) { dynamicMerge = new DrDynamicAggregateManager(); string name = v.dynamicManager.methodNames[idx]; DrManagerBase newStage = new DrManagerBase(app.GetGraph(), name); DrActiveVertex mergeVertex = new DrActiveVertex(newStage, parameters.m_defaultProcessTemplate, parameters.m_defaultVertexTemplate); mergeVertex.AddArgument(stdVertexName); mergeVertex.AddArgument(v.dynamicManager.assemblyNames[idx]); mergeVertex.AddArgument(v.dynamicManager.classNames[idx]); mergeVertex.AddArgument(v.dynamicManager.methodNames[idx]); idx++; dynamicMerge.SetInternalVertex(mergeVertex); dynamicMerge.SetGroupingSettings(0, 0); dynamicMerge.SetPodGroupingSettings(lowThreshold, highThreshold); dynamicMerge.SetDataGroupingSettings(lowDataThreshold, highDataThreshold, maxSingleDataThreshold); dynamicMerge.SetMaxAggregationLevel(v.dynamicManager.aggregationLevels); } if (v.dynamicManager.type == DynamicManager.Type.FULLAGGR) { newManager.AddDynamicConnectionManager(graphStageMap[v.info.predecessors[0].uniqueId].stageManager, dynamicMerge); } else { string name = v.dynamicManager.methodNames[idx]; DrManagerBase newStage = new DrManagerBase(app.GetGraph(), name); DrActiveVertex distributeVertex = new DrActiveVertex(newStage, parameters.m_defaultProcessTemplate, parameters.m_defaultVertexTemplate); distributeVertex.AddArgument(stdVertexName); distributeVertex.AddArgument(v.dynamicManager.assemblyNames[idx]); distributeVertex.AddArgument(v.dynamicManager.classNames[idx]); distributeVertex.AddArgument(v.dynamicManager.methodNames[idx]); idx++; DrDynamicDistributionManager dynamicHashDistribute = new DrDynamicDistributionManager(distributeVertex, dynamicMerge); dynamicHashDistribute.SetDataPerVertex(highDataThreshold * 2); // 2GB newManager.AddDynamicConnectionManager(graphStageMap[v.info.predecessors[0].uniqueId].stageManager, dynamicHashDistribute); } } else if (v.dynamicManager.type == DynamicManager.Type.RANGEDISTRIBUTOR) { DrStageManager splitManager = graphStageMap[v.dynamicManager.splitVertexId].stageManager; DrDynamicRangeDistributionManager drdm = new DrDynamicRangeDistributionManager(splitManager, v.dynamicManager.sampleRate); drdm.SetDataPerVertex(highDataThreshold * 2); // 2GB newManager.AddDynamicConnectionManager(graphStageMap[v.info.predecessors[0].uniqueId].stageManager, drdm); } else if (v.dynamicManager.type == DynamicManager.Type.BROADCAST) { // the copy vertex int bcastNumber = 0; string nameString = String.Format("CP__{0}", bcastNumber++); DrManagerBase newStage = new DrManagerBase(app.GetGraph(), nameString); DrActiveVertex copyVertex = new DrActiveVertex(newStage, parameters.m_defaultProcessTemplate, parameters.m_defaultVertexTemplate); copyVertex.AddArgument(cpyVertexName); DrDynamicBroadcastManager bcast = new DrDynamicBroadcastManager(copyVertex); newManager.AddDynamicConnectionManager(graphStageMap[v.info.predecessors[0].uniqueId].stageManager, bcast); } else if (v.dynamicManager.type != DynamicManager.Type.NONE) { DryadLogger.LogWarning("Build Graph From Query", "Dynamic manager type {0} not supported yet", v.dynamicManager.type); } } } // // Add all the edges // DryadLogger.LogInformation("Build Graph From Query", "Adding edges"); foreach (KeyValuePair <int, GraphStageInfo> kvp in graphStageMap) { AddEdges(kvp.Value, graphStageMap); } // // Register the actual created vertices with the graph // MaterializeToManagers(graphStageMap); }