private static async Task DbfsApi(DatabricksClient client) { Console.WriteLine("Listing directories under dbfs:/"); var result = await client.Dbfs.List("/"); foreach (var fileInfo in result) { Console.WriteLine(fileInfo.IsDirectory ? "[{0}]\t{1}" : "{0}\t{1}", fileInfo.Path, fileInfo.FileSize); } Console.WriteLine("Uploading a file"); var uploadPath = "/test/" + Guid.NewGuid() + ".txt"; using (var ms = new MemoryStream()) { using (var httpClient = new HttpClient()) { var response = await httpClient.GetAsync("https://norvig.com/big.txt", HttpCompletionOption.ResponseHeadersRead); await response.Content.CopyToAsync(ms); } await client.Dbfs.Upload(uploadPath, true, ms); } Console.WriteLine("Getting info of the uploaded file"); var uploadedFile = await client.Dbfs.GetStatus(uploadPath); Console.WriteLine("Path: {0}\tSize: {1}", uploadedFile.Path, uploadedFile.FileSize); Console.WriteLine("Deleting uploaded file"); await client.Dbfs.Delete(uploadPath, false); }
/// <summary> /// Creates a <see cref="DatabricksClient"/> instance with a <see cref="IJobsApi"/> implementation that returns the given <paramref name="runs"/> and <paramref name="jobs"/>. /// </summary> /// <param name="runs">The stubbed runs the <see cref="IJobsApi.RunsList"/> should return.</param> /// <param name="jobs">The stubbed jobs that the <see cref="IJobsApi.List"/> should return.</param> /// <exception cref="System.ArgumentNullException">Thrown when <paramref name="runs"/> or <paramref name="jobs"/> is <c>null</c>.</exception> public static DatabricksClient Create(IEnumerable <Run> runs, IEnumerable <Job> jobs) { Guard.NotNull(runs, nameof(runs)); Guard.NotNull(jobs, nameof(jobs)); var jobsStub = new Mock <IJobsApi>(); jobsStub.Setup(j => j.RunsList(null, It.IsAny <int>(), It.IsAny <int>(), It.IsAny <bool>(), It.IsAny <bool>(), It.IsAny <CancellationToken>())) .ReturnsAsync(new RunList { Runs = runs }); jobsStub.Setup(j => j.List(It.IsAny <CancellationToken>())) .ReturnsAsync(jobs); var client = DatabricksClient.CreateClient( clusterApi: Mock.Of <IClustersApi>(), jobsApi: jobsStub.Object, dbfsApi: Mock.Of <IDbfsApi>(), secretsApi: Mock.Of <ISecretsApi>(), groupsApi: Mock.Of <IGroupsApi>(), librariesApi: Mock.Of <ILibrariesApi>(), tokenApi: Mock.Of <ITokenApi>(), workspaceApi: Mock.Of <IWorkspaceApi>(), instancePoolApi: Mock.Of <IInstancePoolApi>()); return(client); }
private static async Task TokenApi(DatabricksClient client) { Console.WriteLine("Creating token without expiry"); var(tokenValue, tokenInfo) = await client.Token.Create(null, "Sample token"); Console.WriteLine("Token value: {0}", tokenValue); Console.WriteLine("Token Id {0}", tokenInfo.TokenId); Console.WriteLine("Token comment {0}", tokenInfo.Comment); Console.WriteLine("Token creation time {0:s}", tokenInfo.CreationTime); Console.WriteLine("Token expiry time {0:s}", tokenInfo.ExpiryTime); Console.WriteLine("Deleting token"); await client.Token.Revoke(tokenInfo.TokenId); Console.WriteLine("Creating token with expiry"); (tokenValue, tokenInfo) = await client.Token.Create(3600, "Sample token"); Console.WriteLine("Token value: {0}", tokenValue); Console.WriteLine("Token comment {0}", tokenInfo.Comment); Console.WriteLine("Token creation time {0:s}", tokenInfo.CreationTime); Console.WriteLine("Token expiry time {0:s}", tokenInfo.ExpiryTime); Console.WriteLine("Deleting token"); await client.Token.Revoke(tokenInfo.TokenId); Console.WriteLine("Listing tokens"); var tokens = await client.Token.List(); foreach (var token in tokens) { Console.WriteLine("Token Id {0}\tComment {1}", token.TokenId, token.Comment); } }
public void TestParseDatabricksJobState() { var expectedValue = JobState.Running; var actualValue = DatabricksClient.ParseDatabricksJobState("RUNNING"); Assert.AreEqual(expectedValue, actualValue, "State mismatch when job state is RUNNING"); expectedValue = JobState.Starting; actualValue = DatabricksClient.ParseDatabricksJobState("PENDING"); Assert.AreEqual(expectedValue, actualValue, "State mismatch when job state is PENDING"); expectedValue = JobState.Error; actualValue = DatabricksClient.ParseDatabricksJobState("INTERNAL_ERROR"); Assert.AreEqual(expectedValue, actualValue, "State mismatch when job state is INTERNAL_ERROR"); expectedValue = JobState.Idle; actualValue = DatabricksClient.ParseDatabricksJobState("SKIPPED"); Assert.AreEqual(expectedValue, actualValue, "State mismatch when job state is SKIPPED"); actualValue = DatabricksClient.ParseDatabricksJobState("TERMINATING"); Assert.AreEqual(expectedValue, actualValue, "State mismatch when job state is TERMINATING"); actualValue = DatabricksClient.ParseDatabricksJobState("TERMINATED"); Assert.AreEqual(expectedValue, actualValue, "State mismatch when job state is TERMINATED"); }
/// <summary> /// Deletes jobs. /// </summary> /// <param name="jobId">The job identifier.</param> public void JobsDelete(long jobId) { using (var client = DatabricksClient.CreateClient(this.baseUrl, this.token)) { client.Jobs.Delete(jobId).Wait(); } }
/// <summary> /// Deletes the DBFS path. /// </summary> /// <param name="path">The path.</param> /// <param name="recurse">if set to <c>true</c> [recurse].</param> public void DbfsDelete(string path, bool recurse) { using (var client = DatabricksClient.CreateClient(this.baseUrl, this.token)) { client.Dbfs.Delete(path, recurse); } }
public static async Task Main(string[] args) { if (args.Length < 2) { await Console.Error.WriteLineAsync("Usage: <Azure databricks base url> <access token>"); return; } var baseUrl = args[0]; var token = args[1]; Console.WriteLine("Creating client"); using (var client = DatabricksClient.CreateClient(baseUrl, token)) { await WorkspaceApi(client); await LibrariesApi(client); await SecretsApi(client); await TokenApi(client); await GroupsApi(client); await DbfsApi(client); await JobsApi(client); await ClustersApi(client); await InstancePoolApi(client); } Console.WriteLine("Press enter to exit"); Console.ReadLine(); }
public async Task GetFinishedJobRuns_OutsideTimeWindow_ReturnsNoFinishedJobs() { // Arrange DateTimeOffset startWindow = BogusGenerator.Date.RecentOffset(); DateTimeOffset endWindow = BogusGenerator.Date.SoonOffset(); IEnumerable <Run> includedRuns = CreateRandomRuns(startWindow, endWindow); IEnumerable <Run> tooLateRuns = CreateRandomRuns(endWindow, BogusGenerator.Date.FutureOffset()); IEnumerable <Run> allRuns = includedRuns.Concat(tooLateRuns); IEnumerable <Job> jobs = includedRuns.Select(r => new Job { JobId = r.JobId, Settings = new JobSettings { Name = Guid.NewGuid().ToString() } }).ToArray(); DatabricksClient client = DatabricksClientFactory.Create(allRuns, jobs); var provider = new DatabricksInfoProvider(client, NullLogger.Instance); // Act IEnumerable <JobRun> finishedJobs = await provider.GetFinishedJobRunsAsync(BogusGenerator.Date.PastOffset(), startWindow); // Assert Assert.Empty(finishedJobs); }
/// <summary> /// Gets the output for the job. /// </summary> /// <param name="runId">The run identifier.</param> /// <returns>System.ValueTuple<System.String, System.String, Run>.</returns> public (string, string, Run) JobsRunsGetOutput(long runId) { using (var client = DatabricksClient.CreateClient(this.baseUrl, this.token)) { return(client.Jobs.RunsGetOutput(runId).Result); } }
public long JobsRunSubmit(RunOnceSettings settings) { using (var client = DatabricksClient.CreateClient(this.baseUrl, this.token)) { return(client.Jobs.RunSubmit(settings).Result); } }
public async Task RunDatabricks() { using (var client = DatabricksClient.CreateClient(Settings.Instance.Databricks.BaseUrl, Settings.Instance.Databricks.Token)) { var jobId = await client.Jobs.RunNow(51, null); } }
public async Task MinimumAzureFunctionsDatabricksProject_WithEmbeddedTimer_ReportsAsMetricPeriodically() { ApplicationInsightsConfig applicationInsightsConfig = _config.GetApplicationInsightsConfig(); var parameters = RunParameters.CreateNotebookParams(Enumerable.Empty <KeyValuePair <string, string> >()); using (var project = AzureFunctionsDatabricksProject.StartNew(_config, _outputWriter)) { using (var client = DatabricksClient.CreateClient(project.DatabricksConfig.BaseUrl, project.DatabricksConfig.SecurityToken)) { // Act await client.Jobs.RunNow(project.DatabricksConfig.JobId, parameters); await WaitUntilDatabricksJobRunIsCompleted(client, project.DatabricksConfig.JobId); } } // Assert using (ApplicationInsightsDataClient client = CreateApplicationInsightsClient(applicationInsightsConfig.ApiKey)) { await RetryAssertUntilTelemetryShouldBeAvailableAsync(async() => { const string past10MinFilter = "PT0.1H"; var bodySchema = new MetricsPostBodySchema( id: Guid.NewGuid().ToString(), parameters: new MetricsPostBodySchemaParameters($"customMetrics/{applicationInsightsConfig.MetricName}", timespan: past10MinFilter)); IList <MetricsResultsItem> results = await client.Metrics.GetMultipleAsync(applicationInsightsConfig.ApplicationId, new List <MetricsPostBodySchema> { bodySchema }); Assert.NotEmpty(results); Assert.All(results, result => Assert.NotNull(result.Body.Value)); }, timeout : TimeSpan.FromMinutes(2)); } }
/// <summary> /// Runs the job now. /// </summary> /// <param name="jobId">The job identifier.</param> /// <param name="runParameters">The run parameters.</param> /// <returns>RunIdentifier.</returns> public RunIdentifier JobsRunNow(long jobId, RunParameters runParameters) { using (var client = DatabricksClient.CreateClient(this.baseUrl, this.token)) { return(client.Jobs.RunNow(jobId, runParameters).Result); } }
/// <summary> /// Imports the workspace. /// </summary> /// <param name="path">The path.</param> /// <param name="format">The format.</param> /// <param name="language">The language.</param> /// <param name="content">The content.</param> /// <param name="overwrite">if set to <c>true</c> [overwrite].</param> public void WorkspaceImport(string path, ExportFormat format, Language?language, byte[] content, bool overwrite) { using (var client = DatabricksClient.CreateClient(this.baseUrl, this.token)) { client.Workspace.Import(path, format, language, content, overwrite).Wait(); } }
public void JobsRunCancel(long runId) { using (var client = DatabricksClient.CreateClient(this.baseUrl, this.token)) { client.Jobs.RunsCancel(runId).Wait(); } }
private static async Task GroupsApi(DatabricksClient client) { Console.WriteLine("Listing groups"); var groupsList = await client.Groups.List(); foreach (var group in groupsList) { Console.WriteLine("Group name: {0}", group); } const string newGroupName = "sample group"; Console.WriteLine("Creating new group \"{0}\"", newGroupName); await client.Groups.Create(newGroupName); Console.WriteLine("Deleting group \"{0}\"", newGroupName); await client.Groups.Delete(newGroupName); Console.WriteLine("Listing members in admins group"); var members = await client.Groups.ListMembers("admins"); foreach (var member in members) { if (!string.IsNullOrEmpty(member.UserName)) { Console.WriteLine("Member (User): {0}", member.UserName); } else { Console.WriteLine("Member (Group): {0}", member.GroupName); } } }
/// <summary> /// Initializes a new instance of hte <see cref="DatabricksInfoProvider"/> class. /// </summary> /// <param name="databricksClient">The client to interact with Databricks.</param> /// <param name="logger">The instance to log metric reports of job runs.</param> /// <exception cref="ArgumentNullException">Thrown when the <paramref name="databricksClient"/> or <paramref name="logger"/> is <c>null</c>.</exception> public DatabricksInfoProvider(DatabricksClient databricksClient, ILogger logger) { Guard.NotNull(databricksClient, nameof(databricksClient)); Guard.NotNull(logger, nameof(logger)); _databricksClient = databricksClient; _logger = logger; }
public async Task GetJobInfo() { using (var client = DatabricksClient.CreateClient(Settings.Instance.Databricks.BaseUrl, Settings.Instance.Databricks.Token)) { var jobId = await client.Jobs.Get(51); var run = await client.Jobs.RunsGet(22); } }
public async Task GetFinishedJobRuns_WithEndTimeLessThanStartTime_Throws() { // Arrange DateTimeOffset startWindow = BogusGenerator.Date.SoonOffset(); DateTimeOffset endWindow = BogusGenerator.Date.RecentOffset(); DatabricksClient client = DatabricksClientFactory.Create(Enumerable.Empty <Run>(), Enumerable.Empty <Job>()); var provider = new DatabricksInfoProvider(client, NullLogger.Instance); // Act / Assert await Assert.ThrowsAnyAsync <ArgumentException>(() => provider.GetFinishedJobRunsAsync(startWindow, endWindow)); }
private static async Task WaitUntilDatabricksJobRunIsCompleted(DatabricksClient client, int jobId) { AsyncRetryPolicy <RunList> retryPolicy = Policy.HandleResult <RunList>(list => list.Runs.Any(r => !r.IsCompleted)) .WaitAndRetryForeverAsync(index => TimeSpan.FromSeconds(10)); await Policy.TimeoutAsync(TimeSpan.FromMinutes(7)) .WrapAsync(retryPolicy) .ExecuteAsync(async() => await client.Jobs.RunsList(jobId, activeOnly: true)); await Task.Delay(TimeSpan.FromMinutes(2)); }
public SjaasWorker() { var docDbUri = "to-be-filled"; var docDbPrimaryKey = "to-be-filled"; var docDbClient = new DocumentClient(new Uri(docDbUri), docDbPrimaryKey); var database = "Sjaas"; var jobCollection = "Job"; var clusterCollection = "Cluster"; var sjaasDataStore = new CosmosDataStore(docDbClient, database, jobCollection, clusterCollection); var databricksClient = DatabricksClient.CreateClient("to-be-filled", "to-be-filled"); this.jobEventProcessor = new JobEventProcessor(sjaasDataStore, databricksClient); }
/// <summary> /// This method is called when the <see cref="T:Microsoft.Extensions.Hosting.IHostedService" /> starts. The implementation should return a task that represents /// the lifetime of the long running operation(s) being performed. /// </summary> /// <param name="stoppingToken"> /// Triggered when <see cref="M:Microsoft.Extensions.Hosting.IHostedService.StopAsync(System.Threading.CancellationToken)" /> is called. /// </param> /// <returns> /// A <see cref="T:System.Threading.Tasks.Task" /> that represents the long running operations. /// </returns> public async Task ExecuteAsync(CancellationToken stoppingToken) { using (DatabricksClient client = await _options.CreateDatabricksClientAsync(_secretProvider)) using (var databricksInfoProvider = new DatabricksInfoProvider(client, _logger)) { (DateTimeOffset start, DateTimeOffset end) = _options.DetermineNextTimeWindow(); _logger.LogInformation( "Job monitor for Databricks is starting at {TriggerTime} for time windows {WindowStart} - {WindowEnd}", DateTimeOffset.UtcNow, start, end); string metricName = _options.UserOptions.MetricName; await databricksInfoProvider.MeasureJobOutcomesAsync(metricName, start, end); } }
public async Task GetFinishedJobRuns_WithNoAvailableFinishedJobs_ReturnsNoFinishedJobs() { // Arrange DateTimeOffset startWindow = BogusGenerator.Date.RecentOffset(); DateTimeOffset endWindow = BogusGenerator.Date.SoonOffset(); DatabricksClient client = DatabricksClientFactory.Create(Enumerable.Empty <Run>(), Enumerable.Empty <Job>()); var provider = new DatabricksInfoProvider(client, NullLogger.Instance); // Act IEnumerable <JobRun> finishedJobs = await provider.GetFinishedJobRunsAsync(startWindow, endWindow); // Assert Assert.Empty(finishedJobs); }
public AzureDatabricksDbfsBlobStorage(string baseUri, string token, bool isReadOnly) { if (baseUri is null) { throw new ArgumentNullException(nameof(baseUri)); } if (token is null) { throw new ArgumentNullException(nameof(token)); } _client = DatabricksClient.CreateClient(baseUri, token); _dbfs = _client.Dbfs; _isReadOnly = isReadOnly; }
public async Task Run([TimerTrigger("0 */1 * * * *")] TimerInfo timer, ILogger logger) { logger.LogInformation($"C# Timer trigger function executed at: {DateTime.UtcNow}"); var metricName = _configuration.GetValue <string>("Arcus:ApplicationInsights:MetricName"); var baseUrl = _configuration.GetValue <string>("Arcus:Databricks:Url"); string secretToken = await _secretProvider.GetRawSecretAsync("Arcus.Databricks.SecretToken"); var startOfWindow = timer.ScheduleStatus.Last; var endOfWindow = timer.ScheduleStatus.Next; using var client = DatabricksClient.CreateClient(baseUrl, secretToken); using (var provider = new DatabricksInfoProvider(client, logger)) { await provider.MeasureJobOutcomesAsync(metricName, startOfWindow, endOfWindow); } }
public async Task MeasureJobOutcomes_WithNoAvailableFinishedJobs_ReturnsNoFinishedJobs() { // Arrange DateTimeOffset startWindow = BogusGenerator.Date.RecentOffset(); DateTimeOffset endWindow = BogusGenerator.Date.SoonOffset(); string metricName = BogusGenerator.Random.Word(); DatabricksClient client = DatabricksClientFactory.Create(Enumerable.Empty <Run>(), Enumerable.Empty <Job>()); var spyLogger = new InMemoryLogger <DatabricksJobMetricsJob>(); var provider = new DatabricksInfoProvider(client, spyLogger); // Act await provider.MeasureJobOutcomesAsync(metricName, startWindow, endWindow); // Assert Assert.DoesNotContain(spyLogger.Messages, msg => msg.StartsWith("Metric " + metricName)); }
public DatabricksBlobStorage(string baseUri, string token) { if (baseUri is null) { throw new ArgumentNullException(nameof(baseUri)); } if (token is null) { throw new ArgumentNullException(nameof(token)); } _nativeClient = DatabricksClient.CreateClient(baseUri, token); Mount("dbfs", new DbfsStorage(_nativeClient.Dbfs)); Mount("workspace", new WorkspaceStorage(_nativeClient.Workspace)); Mount("secrets", _ss = new SecretStorage(_nativeClient.Secrets)); Mount("jobs", new JobsStorage(_nativeClient.Jobs)); }
public async Task MeasureJobOutcomes_WithinTimeWindow_OnlyReturnsFinishedJobsWithinTheTimeWindow() { // Arrange DateTimeOffset startWindow = BogusGenerator.Date.RecentOffset(); DateTimeOffset endWindow = BogusGenerator.Date.SoonOffset(); string metricName = BogusGenerator.Random.Word(); IEnumerable <Run> includedRuns = CreateRandomRuns(startWindow, endWindow); IEnumerable <Run> tooLateRuns = CreateRandomRuns(endWindow, BogusGenerator.Date.FutureOffset()); IEnumerable <Run> allRuns = includedRuns.Concat(tooLateRuns); IEnumerable <Job> jobs = allRuns.Select(r => new Job { JobId = r.JobId, Settings = new JobSettings { Name = Guid.NewGuid().ToString() } }).ToArray(); DatabricksClient client = DatabricksClientFactory.Create(allRuns, jobs); var spyLogger = new InMemoryLogger <DatabricksJobMetricsJob>(); var provider = new DatabricksInfoProvider(client, spyLogger); // Act await provider.MeasureJobOutcomesAsync(metricName, startWindow, endWindow); // Assert Assert.All(includedRuns, run => { Assert.Contains(spyLogger.Messages, msg => msg.Contains(run.RunId.ToString())); Assert.Contains(spyLogger.Messages, msg => msg.Contains(run.JobId.ToString())); Job job = Assert.Single(jobs, j => j.JobId == run.JobId); Assert.NotNull(job); Assert.Contains(spyLogger.Messages, msg => msg.Contains(job.Settings.Name)); }); Assert.All(tooLateRuns, run => { Assert.DoesNotContain(spyLogger.Messages, msg => msg.Contains(run.RunId.ToString())); Assert.DoesNotContain(spyLogger.Messages, msg => msg.Contains(run.JobId.ToString())); Job job = Assert.Single(jobs, j => j.JobId == run.JobId); Assert.NotNull(job); Assert.DoesNotContain(spyLogger.Messages, msg => msg.Contains(job.Settings.Name)); }); }
public async Task FinishedDatabricksJobRun_GetsNoticedByRepeatedlyDatabricksJob_ReportsAsMetric() { // Arrange string baseUrl = GetDatabricksUrl(); string token = GetDatabricksToken(); int jobId = GetDatabricksJobId(); using (var client = DatabricksClient.CreateClient(baseUrl, token)) { // Act await client.Jobs.RunNow(jobId, RunParameters.CreateNotebookParams(Enumerable.Empty<KeyValuePair<string, string>>())); // Assert RetryAssertion( () => Assert.Contains(_spyLogger.Messages, msg => msg.StartsWith("Metric Databricks Job Completed")), timeout: TimeSpan.FromMinutes(3), interval: TimeSpan.FromSeconds(1)); } }
private static async Task LibrariesApi(DatabricksClient client) { Console.WriteLine("All cluster statuses"); var libraries = await client.Libraries.AllClusterStatuses(); foreach (var(clusterId, libraryStatuses) in libraries) { Console.WriteLine("Cluster: {0}", clusterId); foreach (var status in libraryStatuses) { Console.WriteLine("\t{0}\t{1}", status.Status, status.Library); } } const string testClusterId = "0530-210517-viced348"; Console.WriteLine("Getting cluster statuses for {0}", testClusterId); var statuses = await client.Libraries.ClusterStatus(testClusterId); foreach (var status in statuses) { Console.WriteLine("\t{0}\t{1}", status.Status, status.Library); } var mvnlibraryToInstall = new MavenLibrary { MavenLibrarySpec = new MavenLibrarySpec { Coordinates = "org.jsoup:jsoup:1.7.2", Exclusions = new[] { "slf4j:slf4j" } } }; await TestInstallUninstallLibrary(client, mvnlibraryToInstall, testClusterId); var whlLibraryToInstall = new WheelLibrary { Wheel = "dbfs:/mnt/dbfsmount1/temp/docutils-0.14-py3-none-any.whl" }; await TestInstallUninstallLibrary(client, whlLibraryToInstall, testClusterId); }