/// <inheritdoc /> public async Task <bool> IsPublicHttpUrl(string uriString) { var isHttpUrl = Uri.TryCreate(uriString, UriKind.Absolute, out var uri) && (uri.Scheme.Equals(Uri.UriSchemeHttp, StringComparison.OrdinalIgnoreCase) || uri.Scheme.Equals(Uri.UriSchemeHttps, StringComparison.OrdinalIgnoreCase)); if (!isHttpUrl) { return(false); } if (HttpUtility.ParseQueryString(uri.Query).Get("sig") is not null) { return(true); } if (StorageAccountUrlSegments.TryCreate(uriString, out var parts)) { if (await TryGetStorageAccountInfoAsync(parts.AccountName)) { return(false); } if (TryGetExternalStorageAccountInfo(parts.AccountName, parts.ContainerName, out _)) { return(false); } } return(true); }
/// <summary> /// Provides methods for blob storage access by using local path references in form of /storageaccount/container/blobpath /// </summary> /// <param name="logger">Logger <see cref="ILogger"/></param> /// <param name="configuration">Configuration <see cref="IConfiguration"/></param> /// <param name="azureProxy">Azure proxy <see cref="IAzureProxy"/></param> public StorageAccessProvider(ILogger logger, IConfiguration configuration, IAzureProxy azureProxy) { this.logger = logger; this.azureProxy = azureProxy; this.defaultStorageAccountName = configuration["DefaultStorageAccountName"]; // This account contains the cromwell-executions container logger.LogInformation($"DefaultStorageAccountName: {defaultStorageAccountName}"); externalStorageContainers = configuration["ExternalStorageContainers"]?.Split(new[] { ',', ';', '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries) .Select(uri => { if (StorageAccountUrlSegments.TryCreate(uri, out var s)) { return(new ExternalStorageContainerInfo { BlobEndpoint = s.BlobEndpoint, AccountName = s.AccountName, ContainerName = s.ContainerName, SasToken = s.SasToken }); } else { logger.LogError($"Invalid value '{uri}' found in 'ExternalStorageContainers' configuration. Value must be a valid azure storage account or container URL."); return(null); } }) .Where(storageAccountInfo => storageAccountInfo is not null) .ToList(); }
/// <summary> /// Tries to parse the provided string. The following formats are supported: /// - /accountName/containerName/blobName /// - https://accountName.blob.core.windows.net/containerName/blobName?sasToken /// </summary> /// <param name="uriString">String representing an Azure Storage object location</param> /// <param name="result"><see cref="StorageAccountUrlSegments"/> representing the provided object location</param> /// <returns>True if parsing was successful</returns> public static bool TryCreate(string uriString, out StorageAccountUrlSegments result) { if (Uri.TryCreate(uriString, UriKind.Absolute, out var uri) && (uri.Scheme.Equals(Uri.UriSchemeHttp, StringComparison.OrdinalIgnoreCase) || uri.Scheme.Equals(Uri.UriSchemeHttps, StringComparison.OrdinalIgnoreCase))) { result = new StorageAccountUrlSegments { AccountName = uri.Host.Split('.', 2)[0], BlobEndpoint = $"{uri.Scheme}://{uri.Host}", ContainerName = uri.Segments.Skip(1).FirstOrDefault()?.Trim('/') ?? "", BlobName = string.Join("", uri.Segments.Skip(2)).Trim('/'), SasToken = uri.Query }; return(true); } var match = localPathRegex.Match(uriString); if (match.Success) { result = new StorageAccountUrlSegments { AccountName = match.Groups[1].Value, BlobEndpoint = string.Empty, ContainerName = match.Groups[2].Value, BlobName = match.Groups[3].Value, SasToken = string.Empty }; return(true); } result = null; return(false); }
/// <summary> /// Default constructor invoked by ASP.NET Core DI /// </summary> /// <param name="logger">Logger instance provided by ASP.NET Core DI</param> /// <param name="configuration">Configuration</param> /// <param name="azureProxy">Azure proxy</param> public BatchScheduler(ILogger logger, IConfiguration configuration, IAzureProxy azureProxy) { this.logger = logger; this.azureProxy = azureProxy; defaultStorageAccountName = configuration["DefaultStorageAccountName"]; // This account contains the cromwell-executions container usePreemptibleVmsOnly = bool.TryParse(configuration["UsePreemptibleVmsOnly"], out var temp) && temp; externalStorageContainers = configuration["ExternalStorageContainers"]?.Split(new[] { ',', ';', '\r', '\n' }, StringSplitOptions.RemoveEmptyEntries) .Select(uri => { if (StorageAccountUrlSegments.TryCreate(uri, out var s)) { return(new ExternalStorageContainerInfo { BlobEndpoint = s.BlobEndpoint, AccountName = s.AccountName, ContainerName = s.ContainerName, SasToken = s.SasToken }); } else { logger.LogError($"Invalid value '{uri}' found in 'ExternalStorageContainers' configuration. Value must be a valid azure storage account or container URL."); return(null); } }) .Where(storageAccountInfo => storageAccountInfo != null) .ToList(); logger.LogInformation($"DefaultStorageAccountName: {defaultStorageAccountName}"); logger.LogInformation($"usePreemptibleVmsOnly: {usePreemptibleVmsOnly}"); bool tesStateIsQueuedInitializingOrRunning(TesTask tesTask) => tesTask.State == TesState.QUEUEDEnum || tesTask.State == TesState.INITIALIZINGEnum || tesTask.State == TesState.RUNNINGEnum; bool tesStateIsInitializingOrRunning(TesTask tesTask) => tesTask.State == TesState.INITIALIZINGEnum || tesTask.State == TesState.RUNNINGEnum; bool tesStateIsQueuedOrInitializing(TesTask tesTask) => tesTask.State == TesState.QUEUEDEnum || tesTask.State == TesState.INITIALIZINGEnum; tesTaskStateTransitions = new List <TesTaskStateTransition>() { new TesTaskStateTransition(tesTask => tesTask.State == TesState.CANCELEDEnum && tesTask.IsCancelRequested, batchTaskState: null, async tesTask => { await this.azureProxy.DeleteBatchJobAsync(tesTask.Id); tesTask.IsCancelRequested = false; }), new TesTaskStateTransition(TesState.QUEUEDEnum, BatchTaskState.JobNotFound, tesTask => AddBatchJobAsync(tesTask)), new TesTaskStateTransition(TesState.QUEUEDEnum, BatchTaskState.MissingBatchTask, tesTask => this.azureProxy.DeleteBatchJobAsync(tesTask.Id), TesState.QUEUEDEnum), new TesTaskStateTransition(TesState.QUEUEDEnum, BatchTaskState.Initializing, TesState.INITIALIZINGEnum), new TesTaskStateTransition(TesState.INITIALIZINGEnum, BatchTaskState.NodeAllocationFailed, tesTask => this.azureProxy.DeleteBatchJobAsync(tesTask.Id), TesState.QUEUEDEnum), new TesTaskStateTransition(tesStateIsQueuedOrInitializing, BatchTaskState.Running, TesState.RUNNINGEnum), new TesTaskStateTransition(tesStateIsQueuedInitializingOrRunning, BatchTaskState.MoreThanOneActiveJobFound, tesTask => this.azureProxy.DeleteBatchJobAsync(tesTask.Id), TesState.SYSTEMERROREnum), new TesTaskStateTransition(tesStateIsQueuedInitializingOrRunning, BatchTaskState.CompletedSuccessfully, TesState.COMPLETEEnum), new TesTaskStateTransition(tesStateIsQueuedInitializingOrRunning, BatchTaskState.CompletedWithErrors, TesState.EXECUTORERROREnum), new TesTaskStateTransition(tesStateIsQueuedInitializingOrRunning, BatchTaskState.ActiveJobWithMissingAutoPool, tesTask => this.azureProxy.DeleteBatchJobAsync(tesTask.Id), TesState.QUEUEDEnum), new TesTaskStateTransition(tesStateIsQueuedInitializingOrRunning, BatchTaskState.NodeFailedDuringStartupOrExecution, tesTask => this.azureProxy.DeleteBatchJobAsync(tesTask.Id), TesState.EXECUTORERROREnum), new TesTaskStateTransition(tesStateIsQueuedInitializingOrRunning, BatchTaskState.NodeUnusable, tesTask => this.azureProxy.DeleteBatchJobAsync(tesTask.Id), TesState.EXECUTORERROREnum), new TesTaskStateTransition(tesStateIsInitializingOrRunning, BatchTaskState.JobNotFound, TesState.SYSTEMERROREnum), new TesTaskStateTransition(tesStateIsInitializingOrRunning, BatchTaskState.MissingBatchTask, tesTask => this.azureProxy.DeleteBatchJobAsync(tesTask.Id), TesState.SYSTEMERROREnum), new TesTaskStateTransition(tesStateIsInitializingOrRunning, BatchTaskState.NodePreempted, tesTask => this.azureProxy.DeleteBatchJobAsync(tesTask.Id), TesState.QUEUEDEnum) // TODO: Implement preemption detection }; }
/// <inheritdoc /> public async Task <string> MapLocalPathToSasUrlAsync(string path, bool getContainerSas = false) { // TODO: Optional: If path is /container/... where container matches the name of the container in the default storage account, prepend the account name to the path. // This would allow the user to omit the account name for files stored in the default storage account // /cromwell-executions/... URLs become /defaultStorageAccountName/cromwell-executions/... to unify how URLs starting with /acct/container/... pattern are handled. if (path.StartsWith(CromwellPathPrefix, StringComparison.OrdinalIgnoreCase)) { path = $"/{defaultStorageAccountName}{path}"; } if (!StorageAccountUrlSegments.TryCreate(path, out var pathSegments)) { logger.LogError($"Could not parse path '{path}'."); return(null); } if (TryGetExternalStorageAccountInfo(pathSegments.AccountName, pathSegments.ContainerName, out var externalStorageAccountInfo)) { return(new StorageAccountUrlSegments(externalStorageAccountInfo.BlobEndpoint, pathSegments.ContainerName, pathSegments.BlobName, externalStorageAccountInfo.SasToken).ToUriString()); } else { StorageAccountInfo storageAccountInfo = null; if (!await TryGetStorageAccountInfoAsync(pathSegments.AccountName, info => storageAccountInfo = info)) { logger.LogError($"Could not find storage account '{pathSegments.AccountName}' corresponding to path '{path}'. Either the account does not exist or the TES app service does not have permission to it."); return(null); } try { var accountKey = await azureProxy.GetStorageAccountKeyAsync(storageAccountInfo); var resultPathSegments = new StorageAccountUrlSegments(storageAccountInfo.BlobEndpoint, pathSegments.ContainerName, pathSegments.BlobName); if (pathSegments.IsContainer || getContainerSas) { var policy = new SharedAccessBlobPolicy() { Permissions = SharedAccessBlobPermissions.Add | SharedAccessBlobPermissions.Create | SharedAccessBlobPermissions.List | SharedAccessBlobPermissions.Read | SharedAccessBlobPermissions.Write, SharedAccessExpiryTime = DateTime.Now.Add(sasTokenDuration) }; var containerUri = new StorageAccountUrlSegments(storageAccountInfo.BlobEndpoint, pathSegments.ContainerName).ToUri(); resultPathSegments.SasToken = new CloudBlobContainer(containerUri, new StorageCredentials(storageAccountInfo.Name, accountKey)).GetSharedAccessSignature(policy, null, SharedAccessProtocol.HttpsOnly, null); } else { var policy = new SharedAccessBlobPolicy() { Permissions = SharedAccessBlobPermissions.Read, SharedAccessExpiryTime = DateTime.Now.Add(sasTokenDuration) }; resultPathSegments.SasToken = new CloudBlob(resultPathSegments.ToUri(), new StorageCredentials(storageAccountInfo.Name, accountKey)).GetSharedAccessSignature(policy, null, null, SharedAccessProtocol.HttpsOnly, null); } return(resultPathSegments.ToUriString()); } catch (Exception ex) { logger.LogError(ex, $"Could not get the key of storage account '{pathSegments.AccountName}'. Make sure that the TES app service has Contributor access to it."); return(null); } } }
/// <summary> /// Returns job preparation and main Batch tasks that represents the given <see cref="TesTask"/> /// </summary> /// <param name="task">The <see cref="TesTask"/></param> /// <returns>Job preparation and main Batch tasks</returns> private async Task <CloudTask> ConvertTesTaskToBatchTaskAsync(TesTask task) { var cromwellPathPrefixWithoutEndSlash = CromwellPathPrefix.TrimEnd('/'); var taskId = task.Id; var queryStringsToRemoveFromLocalFilePaths = task.Inputs .Select(i => i.Path) .Concat(task.Outputs.Select(o => o.Path)) .Where(p => p != null) .Select(p => queryStringRegex.Match(p).Groups[1].Value) .Where(qs => !string.IsNullOrEmpty(qs)) .ToList(); var inputFiles = task.Inputs.Distinct(); var cromwellExecutionDirectoryPath = GetParentPath(task.Inputs.FirstOrDefault(IsCromwellCommandScript)?.Path); if (cromwellExecutionDirectoryPath == null) { throw new Exception($"Could not identify Cromwell execution directory path for task {task.Id}. This TES instance supports Cromwell tasks only."); } foreach (var output in task.Outputs) { if (!output.Path.StartsWith(CromwellPathPrefix, StringComparison.OrdinalIgnoreCase)) { throw new Exception($"Unsupported output path '{output.Path}' for task Id {task.Id}. Must start with {CromwellPathPrefix}"); } } var batchExecutionDirectoryPath = $"{cromwellExecutionDirectoryPath}/{BatchExecutionDirectoryName}"; // TODO: Cromwell bug: Cromwell command write_tsv() generates a file in the execution directory, for example execution/write_tsv_3922310b441805fc43d52f293623efbc.tmp. These are not passed on to TES inputs. // WORKAROUND: Get the list of files in the execution directory and add them to task inputs. var executionDirectoryUri = new Uri(await MapLocalPathToSasUrlAsync(cromwellExecutionDirectoryPath, getContainerSas: true)); var blobsInExecutionDirectory = (await azureProxy.ListBlobsAsync(executionDirectoryUri)).Where(b => !b.EndsWith($"/{CromwellScriptFileName}")).Where(b => !b.Contains($"/{BatchExecutionDirectoryName}/")); var additionalInputFiles = blobsInExecutionDirectory.Select(b => $"{CromwellPathPrefix}{b}").Select(b => new TesInput { Content = null, Path = b, Url = b, Name = Path.GetFileName(b), Type = TesFileType.FILEEnum }); var filesToDownload = await Task.WhenAll(inputFiles.Union(additionalInputFiles).Select(async f => await GetTesInputFileUrl(f, task.Id, queryStringsToRemoveFromLocalFilePaths))); // Using --include and not using --no-recursive as a workaround for https://github.com/Azure/blobxfer/issues/123 var downloadFilesScriptContent = string.Join(" && ", filesToDownload.Select(f => { var downloadSingleFile = f.Url.Contains(".blob.core.") ? $"blobxfer download --storage-url '{f.Url}' --local-path '{f.Path}' --chunk-size-bytes 104857600 --rename --include '{StorageAccountUrlSegments.Create(f.Url).BlobName}'" : $"mkdir -p {GetParentPath(f.Path)} && wget -O '{f.Path}' '{f.Url}'"; var exitIfDownloadedFileIsNotFound = $"{{ [ -f '{f.Path}' ] && : || {{ echo 'Failed to download file {f.Url}' 1>&2 && exit 1; }} }}"; return($"{downloadSingleFile} && {exitIfDownloadedFileIsNotFound}"); })); var downloadFilesScriptPath = $"{batchExecutionDirectoryPath}/{DownloadFilesScriptFileName}"; var writableDownloadFilesScriptUrl = new Uri(await MapLocalPathToSasUrlAsync(downloadFilesScriptPath, getContainerSas: true)); var downloadFilesScriptUrl = await MapLocalPathToSasUrlAsync(downloadFilesScriptPath); await azureProxy.UploadBlobAsync(writableDownloadFilesScriptUrl, downloadFilesScriptContent); var filesToUpload = await Task.WhenAll( task.Outputs.Select(async f => new TesOutput { Path = f.Path, Url = await MapLocalPathToSasUrlAsync(f.Path, getContainerSas: true), Name = f.Name, Type = f.Type })); var uploadFilesScriptContent = string.Join(" && ", filesToUpload.Select(f => { // Ignore missing stdout/stderr files. CWL workflows have an issue where if the stdout/stderr are redirected, they are still listed in the TES outputs // Ignore any other missing files and directories. WDL tasks can have optional output files. // Syntax is: If file or directory doesn't exist, run a noop (":") operator, otherwise run the upload command: // { if not exists do nothing else upload; } && { ... } return($"{{ [ ! -e '{f.Path}' ] && : || blobxfer upload --storage-url '{f.Url}' --local-path '{f.Path}' --one-shot-bytes 104857600 {(f.Type == TesFileType.FILEEnum ? "--rename --no-recursive" : "")}; }}"); })); var uploadFilesScriptPath = $"{batchExecutionDirectoryPath}/{UploadFilesScriptFileName}"; var writableUploadFilesScriptUrl = new Uri(await MapLocalPathToSasUrlAsync(uploadFilesScriptPath, getContainerSas: true)); var uploadFilesScriptUrl = await MapLocalPathToSasUrlAsync(uploadFilesScriptPath); await azureProxy.UploadBlobAsync(writableUploadFilesScriptUrl, uploadFilesScriptContent); var executor = task.Executors.First(); var volumeMountsOption = $"-v /mnt{cromwellPathPrefixWithoutEndSlash}:{cromwellPathPrefixWithoutEndSlash}"; var executorImageIsPublic = (await azureProxy.GetContainerRegistryInfoAsync(executor.Image)) == null; var taskCommand = $@" docker pull --quiet {BlobxferImageName} && \ {(executorImageIsPublic ? $"docker pull --quiet {executor.Image} && \\" : "")} docker run --rm {volumeMountsOption} --entrypoint=/bin/sh {BlobxferImageName} {downloadFilesScriptPath} && \ chmod -R o+rwx /mnt{cromwellPathPrefixWithoutEndSlash} && \ docker run --rm {volumeMountsOption} --entrypoint= --workdir / {executor.Image} {executor.Command[0]} -c '{ string.Join(" && ", executor.Command.Skip(1))}' && \ docker run --rm {volumeMountsOption} --entrypoint=/bin/sh {BlobxferImageName} {uploadFilesScriptPath} "; var batchExecutionDirectoryUrl = await MapLocalPathToSasUrlAsync($"{batchExecutionDirectoryPath}", getContainerSas : true); var cloudTask = new CloudTask(taskId, $"/bin/sh -c \"{taskCommand.Trim()}\"") { UserIdentity = new UserIdentity(new AutoUserSpecification(elevationLevel: ElevationLevel.Admin, scope: AutoUserScope.Pool)), ResourceFiles = new List <ResourceFile> { ResourceFile.FromUrl(downloadFilesScriptUrl, $"/mnt{downloadFilesScriptPath}"), ResourceFile.FromUrl(uploadFilesScriptUrl, $"/mnt{uploadFilesScriptPath}") }, OutputFiles = new List <OutputFile> { new OutputFile( "../std*.txt", new OutputFileDestination(new OutputFileBlobContainerDestination(batchExecutionDirectoryUrl)), new OutputFileUploadOptions(OutputFileUploadCondition.TaskFailure)) } }; if (!executorImageIsPublic) { // If the executor image is private, and in order to run multiple containers in the main task, the image has to be downloaded via pool ContainerConfiguration. // This also requires that the main task runs inside a container. So we run the "docker" container that in turn runs other containers. // If the executor image is public, there is no need for pool ContainerConfiguration and task can run normally, without being wrapped in a docker container. // Volume mapping for docker.sock below allows the docker client in the container to access host's docker daemon. var containerRunOptions = $"--rm -v /var/run/docker.sock:/var/run/docker.sock -v /mnt{cromwellPathPrefixWithoutEndSlash}:/mnt{cromwellPathPrefixWithoutEndSlash} "; cloudTask.ContainerSettings = new TaskContainerSettings(DockerInDockerImageName, containerRunOptions); } return(cloudTask); }