예제 #1
0
 public static void UploadToDfs(Uri dfsDirectory, string localPath)
 {
     DryadLogger.LogInformation("Uploading " + localPath + " to " + dfsDirectory.AbsoluteUri);
     try
     {
         if (dfsDirectory.Scheme == "hdfs")
         {
             using (var hdfs = new Microsoft.Research.Peloponnese.Hdfs.HdfsInstance(dfsDirectory))
             {
                 string dfsPath = dfsDirectory.AbsolutePath + Path.GetFileName(localPath);
                 DryadLogger.LogInformation("Uploading " + localPath + " to " + dfsPath);
                 hdfs.UploadAll(localPath, dfsPath);
             }
         }
         else if (dfsDirectory.Scheme == "azureblob")
         {
             string account, key, container, blob;
             Microsoft.Research.Peloponnese.Storage.AzureUtils.FromAzureUri(dfsDirectory, out account, out key, out container, out blob);
             var    azure   = new Microsoft.Research.Peloponnese.Storage.AzureDfsClient(account, key, container);
             string dfsPath = blob + Path.GetFileName(localPath);
             DryadLogger.LogInformation("Uploading " + localPath + " to " + dfsPath);
             azure.PutDfsFile(localPath, dfsPath);
         }
         else if (dfsDirectory.Scheme == "file")
         {
             string dstPath = Path.Combine(dfsDirectory.AbsolutePath, Path.GetFileName(localPath));
             File.Copy(localPath, dstPath);
         }
     }
     catch (Exception e)
     {
         DryadLogger.LogWarning("Failed to upload query plan: " + e.ToString());
     }
 }
예제 #2
0
        public static void UploadToDfs(Uri dfsDirectory, string dfsName, string payload)
        {
            byte[] payloadBytes = System.Text.Encoding.UTF8.GetBytes(payload);

            DryadLogger.LogInformation("Uploading payload to " + dfsName + " at " + dfsDirectory.AbsoluteUri);
            try
            {
                if (dfsDirectory.Scheme == "hdfs")
                {
                    using (var hdfs = new Microsoft.Research.Peloponnese.Hdfs.HdfsInstance(dfsDirectory))
                    {
                        string dfsPath = dfsDirectory.AbsolutePath + dfsName;
                        hdfs.WriteAll(dfsPath, payloadBytes);
                    }
                }
                else if (dfsDirectory.Scheme == "azureblob")
                {
                    string account, key, container, blob;
                    Microsoft.Research.Peloponnese.Storage.AzureUtils.FromAzureUri(dfsDirectory, out account, out key, out container, out blob);
                    var    azure   = new Microsoft.Research.Peloponnese.Storage.AzureDfsClient(account, key, container);
                    string dfsPath = blob + dfsName;
                    azure.PutDfsFile(payloadBytes, dfsPath);
                }
                else if (dfsDirectory.Scheme == "file")
                {
                    string dstPath = Path.Combine(dfsDirectory.AbsolutePath, dfsName);
                    File.WriteAllBytes(dstPath, payloadBytes);
                }
            }
            catch (Exception e)
            {
                DryadLogger.LogWarning("Failed to upload final output: " + e.ToString());
            }
        }
예제 #3
0
        private DrInputStreamManager CreateInputNode(DryadLINQApp app, VertexInfo info, string inputName)
        {
            DrInputStreamManager s;
            int err = 0;

            DryadLogger.LogInformation("methodEntry: " + inputName);

            if (info.ioType == VertexInfo.IOType.PARTITIONEDFILE)
            {
                DrPartitionInputStream input = new DrPartitionInputStream();

                err = input.Open(app.GetUniverse(), new Uri(info.sources[0]).AbsolutePath);
                if (!SUCCEEDED(err))
                {
                    string msg = String.Format("Could not read Partitioned file input {0}", info.sources[0]);
                    throw new LinqToDryadException(msg, err);
                }

                DrManagerBase        inputStage   = new DrManagerBase(app.GetGraph(), inputName);
                DrInputStreamManager inputManager = new DrInputStreamManager(input, inputStage);

                s = inputManager;
            }
            //else if ( info.ioType == VertexInfo.IOType.STREAM )
            //{
            //    DrDscInputStream input = new DrDscInputStream();

            //    DryadLogger.LogInformation("Create input node", "Opening DSC input fileset");

            //    err = input.Open(app.GetUniverse(), info.sources[0]);
            //    if (!SUCCEEDED(err))
            //    {
            //        string msg = String.Format("Could not read DSC input fileset {0}", info.sources[0]);
            //        throw new LinqToDryadException(msg, err);
            //    }

            //    DryadLogger.LogInformation("Create input node", "Opened DSC input fileset");

            //    DrManagerBase inputStage = new DrManagerBase(app.GetGraph(), inputName);
            //    DrInputStreamManager inputManager = new DrInputStreamManager(input, inputStage);

            //    s = inputManager;
            //}
            else if (info.ioType == VertexInfo.IOType.HDFS_STREAM)
            {
                DrHdfsInputStream input = new DrHdfsInputStream();

                DryadLogger.LogInformation("Create input node", "Opening HDFS input fileset");

                Uri srcUri = new Uri(info.sources[0]);

                err = input.Open(app.GetUniverse(), srcUri.GetLeftPart(UriPartial.Path), info.recordType);
                if (!SUCCEEDED(err))
                {
                    string msg = String.Format("Could not read HDFS input fileset {0}: {1}", info.sources[0], input.GetError());
                    throw new LinqToDryadException(msg, err);
                }

                DryadLogger.LogInformation("Create input node", "Opened HDFS input fileset");

                DrManagerBase        inputStage   = new DrManagerBase(app.GetGraph(), inputName);
                DrInputStreamManager inputManager = new DrInputStreamManager(input, inputStage);

                s = inputManager;
            }
            else if (info.ioType == VertexInfo.IOType.AZUREBLOB)
            {
                DrAzureInputStream input = new DrAzureInputStream();

                DryadLogger.LogInformation("Create input node", "Opening Azure input fileset");

                try
                {
                    input.Open(info.sources[0]);
                }
                catch (Exception e)
                {
                    string msg = String.Format("Could not read Azure input fileset {0}: {1}", info.sources[0], e.ToString());
                    throw new LinqToDryadException(msg);
                }

                DryadLogger.LogInformation("Create input node", "Opened Azure input fileset");

                DrManagerBase        inputStage   = new DrManagerBase(app.GetGraph(), inputName);
                DrInputStreamManager inputManager = new DrInputStreamManager(input, inputStage);

                s = inputManager;
            }
            else
            {
                string msg = String.Format("Unknown input type {0}", info.ioType);
                throw new LinqToDryadException(msg);
            }

            DryadLogger.LogInformation("methodExit");
            return(s);
        }
예제 #4
0
        public void BuildGraphFromQuery(DryadLINQApp app, Query query)
        {
            // set configurable properties
            int    highThreshold          = app.GetMaxAggregateInputs();
            int    lowThreshold           = 16;
            UInt64 highDataThreshold      = (UInt64)app.GetAggregateThreshold();
            UInt64 lowDataThreshold       = (3 * highDataThreshold) / 4;
            UInt64 maxSingleDataThreshold = highDataThreshold / 2;
            int    aggFilterThreshold     = app.GetMaxAggregateFilterInputs();

            // use a graph stage map to store the vertices as they are created, grouped by stage.
            Dictionary <int, GraphStageInfo> graphStageMap = new Dictionary <int, GraphStageInfo>();

            DryadLogger.LogInformation("Building graph");

            //
            // Create a set of vertices for each vertex (stage) in the query plan
            //
            DryadLogger.LogInformation("Adding vertices");
            foreach (KeyValuePair <int, Vertex> kvp in query.queryPlan)
            {
                Vertex         v     = kvp.Value;
                GraphStageInfo value = null;

                if (!graphStageMap.TryGetValue(v.uniqueId, out value))
                {
                    DryadLogger.LogInformation(String.Format("Adding vertices for stage {0}", v.name));
                    CreateVertexSet(v, app, query, graphStageMap);
                }
            }

            //
            // Add dynamic stage managers
            //
            DryadLogger.LogInformation("Build Graph From Query", "Adding stage managers");
            foreach (KeyValuePair <int, GraphStageInfo> kvp in graphStageMap)
            {
                Vertex v = kvp.Value.vertex;

                //
                //There are no dynamic managers
                //
                if (v.dynamicManager == null)
                {
                    continue;
                }

                DrStageManager newManager = kvp.Value.stageManager;        // newManager

                DrGraphParameters parameters = app.GetGraph().GetParameters();

                string stdVertexName = "MW";
                string cpyVertexName = "CP";

                if (v.type != Vertex.Type.INPUTTABLE && v.type != Vertex.Type.CONCAT)
                {
                    if (v.dynamicManager.type == DynamicManager.Type.SPLITTER)
                    {
                        if (v.info.predecessors.Length == 1)
                        {
                            DrPipelineSplitManager splitter = new DrPipelineSplitManager();
                            newManager.AddDynamicConnectionManager(graphStageMap[v.info.predecessors[0].uniqueId].stageManager, splitter);
                        }
                        else
                        {
                            DrSemiPipelineSplitManager splitter = new DrSemiPipelineSplitManager();
                            newManager.AddDynamicConnectionManager(graphStageMap[v.info.predecessors[0].uniqueId].stageManager, splitter);
                        }
                    }
                    else if (v.dynamicManager.type == DynamicManager.Type.PARTIALAGGR)
                    {
                        DrDynamicAggregateManager dynamicMerge = new DrDynamicAggregateManager();

                        dynamicMerge.SetGroupingSettings(0, 0);
                        dynamicMerge.SetMachineGroupingSettings(2, aggFilterThreshold);
                        dynamicMerge.SetDataGroupingSettings(lowDataThreshold, highDataThreshold, maxSingleDataThreshold);
                        dynamicMerge.SetSplitAfterGrouping(true);

                        foreach (Predecessor p in v.info.predecessors)
                        {
                            newManager.AddDynamicConnectionManager(graphStageMap[p.uniqueId].stageManager, dynamicMerge);
                        }
                    }
                    else if (v.dynamicManager.type == DynamicManager.Type.FULLAGGR ||
                             v.dynamicManager.type == DynamicManager.Type.HASHDISTRIBUTOR)
                    {
                        int idx = 0;
                        int sz  = v.dynamicManager.assemblyNames == null ? 0 : v.dynamicManager.assemblyNames.Length;
                        DrDynamicAggregateManager dynamicMerge = new DrDynamicAggregateManager();

                        if (v.dynamicManager.type == DynamicManager.Type.FULLAGGR || sz > 1)
                        {
                            dynamicMerge = new DrDynamicAggregateManager();

                            string        name     = v.dynamicManager.methodNames[idx];
                            DrManagerBase newStage = new DrManagerBase(app.GetGraph(), name);

                            DrActiveVertex mergeVertex = new DrActiveVertex(newStage, parameters.m_defaultProcessTemplate, parameters.m_defaultVertexTemplate);
                            mergeVertex.AddArgument(stdVertexName);

                            mergeVertex.AddArgument(v.dynamicManager.assemblyNames[idx]);
                            mergeVertex.AddArgument(v.dynamicManager.classNames[idx]);
                            mergeVertex.AddArgument(v.dynamicManager.methodNames[idx]);

                            idx++;
                            dynamicMerge.SetInternalVertex(mergeVertex);

                            dynamicMerge.SetGroupingSettings(0, 0);
                            dynamicMerge.SetPodGroupingSettings(lowThreshold, highThreshold);
                            dynamicMerge.SetDataGroupingSettings(lowDataThreshold,
                                                                 highDataThreshold,
                                                                 maxSingleDataThreshold);
                            dynamicMerge.SetMaxAggregationLevel(v.dynamicManager.aggregationLevels);
                        }

                        if (v.dynamicManager.type == DynamicManager.Type.FULLAGGR)
                        {
                            newManager.AddDynamicConnectionManager(graphStageMap[v.info.predecessors[0].uniqueId].stageManager, dynamicMerge);
                        }
                        else
                        {
                            string        name     = v.dynamicManager.methodNames[idx];
                            DrManagerBase newStage = new DrManagerBase(app.GetGraph(), name);

                            DrActiveVertex distributeVertex = new DrActiveVertex(newStage, parameters.m_defaultProcessTemplate, parameters.m_defaultVertexTemplate);
                            distributeVertex.AddArgument(stdVertexName);

                            distributeVertex.AddArgument(v.dynamicManager.assemblyNames[idx]);
                            distributeVertex.AddArgument(v.dynamicManager.classNames[idx]);
                            distributeVertex.AddArgument(v.dynamicManager.methodNames[idx]);

                            idx++;

                            DrDynamicDistributionManager dynamicHashDistribute =
                                new DrDynamicDistributionManager(distributeVertex, dynamicMerge);
                            dynamicHashDistribute.SetDataPerVertex(highDataThreshold * 2);  // 2GB

                            newManager.AddDynamicConnectionManager(graphStageMap[v.info.predecessors[0].uniqueId].stageManager, dynamicHashDistribute);
                        }
                    }
                    else if (v.dynamicManager.type == DynamicManager.Type.RANGEDISTRIBUTOR)
                    {
                        DrStageManager splitManager            = graphStageMap[v.dynamicManager.splitVertexId].stageManager;
                        DrDynamicRangeDistributionManager drdm = new DrDynamicRangeDistributionManager(splitManager, v.dynamicManager.sampleRate);
                        drdm.SetDataPerVertex(highDataThreshold * 2);   // 2GB
                        newManager.AddDynamicConnectionManager(graphStageMap[v.info.predecessors[0].uniqueId].stageManager, drdm);
                    }
                    else if (v.dynamicManager.type == DynamicManager.Type.BROADCAST)
                    {
                        // the copy vertex
                        int           bcastNumber = 0;
                        string        nameString  = String.Format("CP__{0}", bcastNumber++);
                        DrManagerBase newStage    = new DrManagerBase(app.GetGraph(), nameString);

                        DrActiveVertex copyVertex =
                            new DrActiveVertex(newStage,
                                               parameters.m_defaultProcessTemplate,
                                               parameters.m_defaultVertexTemplate);
                        copyVertex.AddArgument(cpyVertexName);

                        DrDynamicBroadcastManager bcast = new DrDynamicBroadcastManager(copyVertex);
                        newManager.AddDynamicConnectionManager(graphStageMap[v.info.predecessors[0].uniqueId].stageManager, bcast);
                    }
                    else if (v.dynamicManager.type != DynamicManager.Type.NONE)
                    {
                        DryadLogger.LogWarning(String.Format("Dynamic manager type {0} not supported yet", v.dynamicManager.type));
                    }
                }
            }


            //
            // Add all the edges
            //
            DryadLogger.LogInformation("Build Graph From Query", "Adding edges");
            foreach (KeyValuePair <int, GraphStageInfo> kvp in graphStageMap)
            {
                AddEdges(kvp.Value, graphStageMap);
            }

            //
            // Register the actual created vertices with the graph
            //
            MaterializeToManagers(graphStageMap);
        }
예제 #5
0
        private DrOutputStreamManager CreateOutputNode(DryadLINQApp app, VertexInfo info, string outputName)
        {
            DryadLogger.LogInformation("methodEntry: " + outputName);

            DrOutputStreamManager s;

            if (info.ioType == VertexInfo.IOType.PARTITIONEDFILE)
            {
                DrPartitionOutputStream output = new DrPartitionOutputStream();
                Uri    sourceUri  = new Uri(info.sources[0]);
                string sourcePath = sourceUri.AbsolutePath;
                if (!String.IsNullOrEmpty(sourceUri.Host))
                {
                    sourcePath = @"\\" + sourceUri.Host + sourcePath;
                }
                int err = output.Open(sourcePath, info.partitionUncPath);
                if (!SUCCEEDED(err))
                {
                    string msg = String.Format("Could not open output fileset {0}", sourcePath);
                    throw new LinqToDryadException(msg, err);
                }

                DrManagerBase         outputStage   = new DrManagerBase(app.GetGraph(), outputName);
                DrOutputStreamManager outputManager = new DrOutputStreamManager(output, outputStage);
                app.GetGraph().AddPartitionGenerator(outputManager);

                s = outputManager;
            }
            //else if ( info.ioType == VertexInfo.IOType.STREAM )
            //{
            //    DrDscOutputStream output = new DrDscOutputStream(info.compressionScheme, info.isTemporary);
            //    int err = 0;
            //    if (info.recordType == "")
            //    {
            //        err = output.Open(info.sources[0], info.partitionUncPath);
            //    }
            //    else
            //    {
            //        err = output.OpenWithRecordType(info.sources[0], info.partitionUncPath, info.recordType);
            //    }

            //    if (!SUCCEEDED(err))
            //    {
            //        string msg = String.Format("Could not open DSC output fileset {0}", info.sources[0]);
            //        throw new LinqToDryadException(msg, err);
            //    }

            //    DrManagerBase outputStage = new DrManagerBase(app.GetGraph(), outputName);
            //    DrOutputStreamManager outputManager = new DrOutputStreamManager(output, outputStage);
            //    app.GetGraph().AddPartitionGenerator(outputManager);

            //    s = outputManager;
            //}
            else if (info.ioType == VertexInfo.IOType.HDFS_STREAM)
            {
                DrHdfsOutputStream output = new DrHdfsOutputStream();
                int err = output.Open(info.sources[0]);

                if (!SUCCEEDED(err))
                {
                    string msg = String.Format("Could not open HDFS output fileset {0}: {1}", info.sources[0], output.GetError());
                    throw new LinqToDryadException(msg, err);
                }

                DrManagerBase         outputStage   = new DrManagerBase(app.GetGraph(), outputName);
                DrOutputStreamManager outputManager = new DrOutputStreamManager(output, outputStage);
                app.GetGraph().AddPartitionGenerator(outputManager);

                s = outputManager;
            }
            else if (info.ioType == VertexInfo.IOType.AZUREBLOB)
            {
                DrAzureOutputStream output = new DrAzureOutputStream();

                try
                {
                    output.Open(info.sources[0]);
                }
                catch (Exception e)
                {
                    string msg = String.Format("Could not open Azure output fileset {0}: {1}", info.sources[0], e.ToString());
                    throw new LinqToDryadException(msg);
                }

                DrManagerBase         outputStage   = new DrManagerBase(app.GetGraph(), outputName);
                DrOutputStreamManager outputManager = new DrOutputStreamManager(output, outputStage);
                app.GetGraph().AddPartitionGenerator(outputManager);

                s = outputManager;
            }
            else
            {
                string msg = String.Format("Unknown output type {0}", info.ioType);
                throw new LinqToDryadException(msg);
            }

            return(s);
        }
예제 #6
0
        //
        // Main Dryad LINQ execution stuff
        //
        public int ExecLinqToDryad(Uri dfsDirectory, string[] args, out string errorString)
        {
            //
            // must be at least two arguments (program name and query XML file name)
            //
            if (args.Length < 2)
            {
                errorString = "Must provide at least query XML file name and VertexHost executable.";
                DryadLogger.LogCritical(errorString);
                return(-1);
            }

            //
            // break if --break is included in arguments (and eliminate it, as it is not known downstream)
            //
            if (ConsumeSingleArgument("--break", ref args))
            {
                DebugHelper.WaitForDebugger();
            }

            // this is where we ensure the right type of scheduler is registered
            Microsoft.Research.Dryad.LocalScheduler.Registration.Ensure();

            //
            // parse the XML input, producing a DryadLINQ Query
            //
            Query           query  = new Query();
            QueryPlanParser parser = new QueryPlanParser();

            if (!parser.ParseQueryXml(args[1], query))
            {
                errorString = "Invalid query plan";
                DryadLogger.LogCritical(errorString);
                return(-1);
            }

            //
            // upload the query plan to the job DFS directory for the benefit of debug tools
            //
            if (dfsDirectory != null)
            {
                DebugHelper.UploadToDfs(dfsDirectory, args[1]);
            }

            //
            // build internal app arguments
            //
            List <string> internalArgs = new List <string>();

            //
            // add the XmlExecHost args to the internal app arguments
            //
            foreach (string xmlExecHostArg in query.xmlExecHostArgs)

            {
                if (xmlExecHostArg == "--break")
                {
                    DebugHelper.WaitForDebugger();
                }
                else
                {
                    internalArgs.Add(xmlExecHostArg);
                }
            }

            //
            // combine internal arguments with any additional arguments received on the command line
            // don't include argv[0] and argv[1] (program name and query XML file name)
            //

            int internalArgc = (int)internalArgs.Count;
            int externalArgc = args.Length - 2;          // don't include argv[0] and argv[1]
            int combinedArgc = internalArgc + externalArgc;

            string[] combinedArgv = new string[combinedArgc];

            string msg = "";

            // internal arguments first
            for (int i = 0; i < internalArgc; i++)
            {
                combinedArgv[i] = internalArgs[i];
                msg            += String.Format("{0} ", combinedArgv[i]);
            }

            // then external arguments
            for (int i = 0; i < externalArgc; i++)
            {
                combinedArgv[i + internalArgc] = args[i + 2]; // don't include argv[0] and argv[1]

                msg += String.Format("{0} ", combinedArgv[i + internalArgc]);
            }
            DryadLogger.LogInformation(String.Format("Arguments: {0}", msg));

            string jobClass = "DryadLINQ";
            string exeName  = args[0];

            // create app and run it
            //
            DrGraphParameters p = DrDefaultParameters.Make(exeName, jobClass, query.enableSpeculativeDuplication);

            foreach (DrIReporter reporter in DebugHelper.Reporters())
            {
                p.m_reporters.Add(reporter);
            }
            p.m_intermediateCompressionMode = query.intermediateDataCompression;
            DrGraphExecutor graphExecutor = new DrGraphExecutor();
            DrGraph         graph         = graphExecutor.Initialize(p);

            if (graph == null)
            {
                errorString = "Failed to initialize Graph Executor";
                DryadLogger.LogCritical(errorString);
                return(-1);
            }
            DryadLINQApp app = new DryadLINQApp(graph);

            // Initialize with arguments
            app.SetXmlFileName(args[1]);
            if (!app.ParseCommandLineFlags(combinedArgv))
            {
                errorString = "Bad command-line options";
                DryadLogger.LogCritical(errorString);
                return(-1);
            }
            // Build graph from query plan
            GraphBuilder builder = new GraphBuilder();

            builder.BuildGraphFromQuery(app, query);

            // Run the app
            DryadLogger.LogInformation("Running the app");

            graphExecutor.Run();
            DrError exitStatus = graphExecutor.Join();

            DryadLogger.LogInformation("Finished running the app");

            if (exitStatus == null || exitStatus.m_code == 0)
            {
                FinalizeExecution(query, graph);
                DryadLogger.LogInformation("Application completed successfully.");
                errorString = null;
                return(0);
            }
            else
            {
                DryadLogger.LogCritical(String.Format("Application failed with error code 0x{0:X8}.\n", exitStatus.m_code));
                errorString = exitStatus.ToFullTextNative();
                return(exitStatus.m_code);
            }
        }