public long JobsRunSubmit(RunOnceSettings settings) { using (var client = DatabricksClient.CreateClient(this.baseUrl, this.token)) { return(client.Jobs.RunSubmit(settings).Result); } }
public SparkClientResponse RunNotebook(SparkClientRequest request, CancellationToken cancellationToken) { if (cancellationToken.IsCancellationRequested) { return(new SparkClientResponse { Run = new Run { State = new RunState { ResultState = RunResultState.CANCELED } } }); } // We must have a timeout. if (request.TimeoutSeconds <= 0) { throw new ArgumentOutOfRangeException("TimeoutSeconds"); } var runStartTime = DateTime.UtcNow; Console.WriteLine($"[{runStartTime.ToString("o")}] SparkClient.RunNotebook() started."); Console.WriteLine($"Using TimeoutSeconds={request.TimeoutSeconds}"); var notebookPath = request.NotebookPath; // New cluster config var newCluster = GetDefaultClusterInfo(request.NumWorkersMin, request.NumWorkersMax, request.NodeType); Console.WriteLine($"SparkClient: Creating new cluster with NumWorkers=({newCluster.AutoScale.MinWorkers},{newCluster.AutoScale.MaxWorkers}), NodeType={newCluster.NodeTypeId}, Runtime={newCluster.RuntimeVersion}"); var runOnceSettings = new RunOnceSettings { RunName = notebookPath + "Job", Libraries = request.Libraries, NewCluster = newCluster, NotebookTask = new NotebookTask { BaseParameters = request.NotebookParameters, NotebookPath = notebookPath }, TimeoutSeconds = request.TimeoutSeconds }; // Start the job and retrieve the run id. SparkClientResponse response = null; // Create new job var runId = Retrier.Retry(() => client.JobsRunSubmit(runOnceSettings)); Run run = null; while (true) { if (cancellationToken.IsCancellationRequested) { client.JobsRunCancel(runId); return(new SparkClientResponse { Run = new Run { State = new RunState { ResultState = RunResultState.CANCELED } } }); } // Keep polling the run by calling RunsGet until run terminates: run = Retrier.Retry <Run>(() => client.JobsRunsGet(runId)); Console.WriteLine($"SparkClient: RunId = {runId} returned status {run.State.StateMessage}"); if (run.State.ResultState.HasValue) { break; } Thread.Sleep(60 * 1000); } response = new SparkClientResponse { Run = run, RunOutput = null, TestRunId = request.TestRunId }; string runOutputText = null; if (response.IsRunSuccess()) { var runOutput = Retrier.Retry(() => client.JobsRunsGetOutput(run.RunId)); runOutputText = runOutput.Item1; response.RunOutput = runOutputText; } var runEndTime = DateTime.UtcNow; var totalElapsed = runEndTime - runStartTime; // Calculate cost { response.NumWorkersMin = request.NumWorkersMin; response.NumWorkersMax = request.NumWorkersMax; response.CostPerNode = request.CostPerNode; response.TotalHours = totalElapsed.TotalHours; response.NodeType = request.NodeType; // The plus one is the driver node. response.Cost = response.CostPerNode * response.TotalHours * (Average(request.NumWorkersMin, request.NumWorkersMax) + 1); } Console.WriteLine($"SparkClient: RunId = {runId}: Ended. Result:{run?.State?.ResultState}. Result from Notebook : {runOutputText}"); Console.WriteLine($"[{runEndTime.ToString("o")}] RunId={runId}. Completed SparkClient.RunNotebook(), Elapsed Time = {totalElapsed}"); Console.WriteLine($"[{runEndTime.ToString("o")}] RunId={runId}. NumWorkers=({response.NumWorkersMin},{response.NumWorkersMax}), CostPerNode={response.CostPerNode}, TotalHours={response.TotalHours}, Cost=${response.Cost}"); return(response); }