/// <summary> /// Upload File with Datalake API /// </summary> internal virtual async Task UploadDataLakeFile(long taskId, DataLakeFileClient fileClient, string filePath) { if (this.Force.IsPresent || !fileClient.Exists() || ShouldContinue(string.Format(Resources.OverwriteConfirmation, GetDataLakeItemUriWithoutSas(fileClient)), null)) { // Set Item Properties and MetaData PathHttpHeaders pathHttpHeaders = SetDatalakegen2ItemProperties(fileClient, BlobProperties, setToServer: false); IDictionary <string, string> metadata = SetDatalakegen2ItemMetaData(fileClient, BlobMetadata, setToServer: false); fileClient.Create(pathHttpHeaders, metadata, this.Permission, this.Umask != null ? DataLakeModels.PathPermissions.ParseSymbolicPermissions(this.Umask).ToOctalPermissions() : null); long fileSize = new FileInfo(ResolvedFileName).Length; string activity = String.Format(Resources.SendAzureBlobActivity, this.Source, this.Path, this.FileSystem); string status = Resources.PrepareUploadingBlob; ProgressRecord pr = new ProgressRecord(OutputStream.GetProgressId(taskId), activity, status); IProgress <long> progressHandler = new Progress <long>((finishedBytes) => { if (pr != null) { // Size of the source file might be 0, when it is, directly treat the progress as 100 percent. pr.PercentComplete = 0 == fileSize ? 100 : (int)(finishedBytes * 100 / fileSize); pr.StatusDescription = string.Format(CultureInfo.CurrentCulture, Resources.FileTransmitStatus, pr.PercentComplete); this.OutputStream.WriteProgress(pr); } }); using (FileStream stream = File.OpenRead(ResolvedFileName)) { await fileClient.AppendAsync(stream, 0, progressHandler : progressHandler, cancellationToken : CmdletCancellationToken).ConfigureAwait(false); } WriteDataLakeGen2Item(Channel, fileClient, taskId: taskId); } }
public virtual async Task <Response <DataLakeDirectoryClient> > CreateSubDirectoryAsync( string path, PathHttpHeaders httpHeaders = default, Metadata metadata = default, string permissions = default, string umask = default, DataLakeRequestConditions conditions = default, CancellationToken cancellationToken = default) { DataLakeDirectoryClient directoryClient = GetSubDirectoryClient(path); Response <PathInfo> response = await directoryClient.CreateAsync( PathResourceType.Directory, httpHeaders, metadata, permissions, umask, conditions, cancellationToken) .ConfigureAwait(false); return(Response.FromValue( directoryClient, response.GetRawResponse())); }
public Response <PathInfo> Upload( Stream content, PathHttpHeaders httpHeaders, DataLakeRequestConditions conditions, IProgress <long> progressHandler, CancellationToken cancellationToken) { _client.Create( httpHeaders: httpHeaders, conditions: conditions, cancellationToken: cancellationToken); // After the File is Create, Lease ID is the only valid request parameter. conditions = new DataLakeRequestConditions { LeaseId = conditions?.LeaseId }; // If we can compute the size and it's small enough if (PartitionedUploadExtensions.TryGetLength(content, out long contentLength) && contentLength < _singleUploadThreshold) { // Upload it in a single request _client.Append( content, offset: 0, leaseId: conditions?.LeaseId, progressHandler: progressHandler, cancellationToken: cancellationToken); // Calculate flush position long flushPosition = contentLength; return(_client.Flush( position: flushPosition, httpHeaders: httpHeaders, conditions: conditions, cancellationToken: cancellationToken)); } // If the caller provided an explicit block size, we'll use it. // Otherwise we'll adjust dynamically based on the size of the // content. int blockSize = _blockSize != null ? _blockSize.Value : contentLength < Constants.LargeUploadThreshold ? Constants.DefaultBufferSize : Constants.LargeBufferSize; // Otherwise stage individual blocks one at a time. It's not as // fast as a parallel upload, but you get the benefit of the retry // policy working on a single block instead of the entire stream. return(UploadInSequence( content, blockSize, httpHeaders, conditions, progressHandler, cancellationToken)); }
public async Task <Response <PathInfo> > UploadAsync( Stream content, PathHttpHeaders httpHeaders, DataLakeRequestConditions conditions, IProgress <long> progressHandler, CancellationToken cancellationToken) { await _client.CreateAsync( httpHeaders : httpHeaders, conditions : conditions, cancellationToken : cancellationToken).ConfigureAwait(false); // After the File is Create, Lease ID is the only valid request parameter. conditions = new DataLakeRequestConditions { LeaseId = conditions?.LeaseId }; // If we can compute the size and it's small enough if (PartitionedUploadExtensions.TryGetLength(content, out long contentLength) && contentLength < _singleUploadThreshold) { // Append data await _client.AppendAsync( content, offset : 0, leaseId : conditions?.LeaseId, progressHandler : progressHandler, cancellationToken : cancellationToken).ConfigureAwait(false); // Flush data return(await _client.FlushAsync( position : contentLength, httpHeaders : httpHeaders, conditions : conditions) .ConfigureAwait(false)); } // If the caller provided an explicit block size, we'll use it. // Otherwise we'll adjust dynamically based on the size of the // content. int blockSize = _blockSize != null ? _blockSize.Value : contentLength < Constants.LargeUploadThreshold ? Constants.DefaultBufferSize : Constants.LargeBufferSize; // Otherwise stage individual blocks in parallel return(await UploadInParallelAsync( content, blockSize, httpHeaders, conditions, progressHandler, cancellationToken).ConfigureAwait(false)); }
public virtual Response <PathInfo> Create( PathHttpHeaders httpHeaders = default, Metadata metadata = default, string permissions = default, string umask = default, DataLakeRequestConditions conditions = default, CancellationToken cancellationToken = default) => Create( PathResourceType.Directory, httpHeaders, metadata, permissions, umask, conditions, cancellationToken);
public virtual async Task <Response <PathInfo> > CreateAsync( PathHttpHeaders httpHeaders = default, Metadata metadata = default, string permissions = default, string umask = default, DataLakeRequestConditions conditions = default, CancellationToken cancellationToken = default) => await CreateAsync( PathResourceType.Directory, httpHeaders, metadata, permissions, umask, conditions, cancellationToken) .ConfigureAwait(false);
public virtual Response <DataLakeFileClient> CreateFile( string fileName, PathHttpHeaders httpHeaders = default, Metadata metadata = default, string permissions = default, string umask = default, DataLakeRequestConditions conditions = default, CancellationToken cancellationToken = default) { DataLakeFileClient fileClient = GetFileClient(fileName); Response <PathInfo> response = fileClient.Create( httpHeaders, metadata, permissions, umask, conditions, cancellationToken); return(Response.FromValue( fileClient, response.GetRawResponse())); }
/// <summary> /// Set properties to a datalake gen2 Datalakegen2Item /// </summary> /// <param name="item">datalake gen2 Datalakegen2Item</param> /// <param name="BlobProperties">properties to set</param> /// <param name="setToServer">True will set to server, false only set to the local Datalakegen2Item object</param> protected static PathHttpHeaders SetDatalakegen2ItemProperties(DataLakePathClient item, Hashtable BlobProperties, bool setToServer = true) { if (BlobProperties != null) { // Valid Blob Dir properties foreach (DictionaryEntry entry in BlobProperties) { if (!validDatalakeGen2FileProperties.ContainsKey(entry.Key.ToString())) { throw new ArgumentException(String.Format("InvalidDataLakeFileProperties", entry.Key.ToString(), entry.Value.ToString())); } } PathHttpHeaders headers = new PathHttpHeaders(); foreach (DictionaryEntry entry in BlobProperties) { string key = entry.Key.ToString(); string value = entry.Value.ToString(); Action <PathHttpHeaders, string> action = validDatalakeGen2FileProperties[key]; if (action != null) { action(headers, value); } } if (setToServer && item != null) { item.SetHttpHeaders(headers); } return(headers); } else { return(null); } }
private async Task <Response <PathInfo> > UploadInParallelAsync( Stream content, int blockSize, PathHttpHeaders httpHeaders, DataLakeRequestConditions conditions, IProgress <long> progressHandler, CancellationToken cancellationToken) { // Wrap the staging and commit calls in an Upload span for // distributed tracing DiagnosticScope scope = _client.ClientDiagnostics.CreateScope( _operationName ?? $"{nameof(Azure)}.{nameof(Storage)}.{nameof(Files)}.{nameof(DataLake)}.{nameof(DataLakeFileClient)}.{nameof(DataLakeFileClient.Upload)}"); try { scope.Start(); // Wrap progressHandler in a AggregatingProgressIncrementer to prevent // progress from being reset with each stage blob operation. if (progressHandler != null) { progressHandler = new AggregatingProgressIncrementer(progressHandler); } // A list of tasks that are currently executing which will // always be smaller than _maxWorkerCount List <Task> runningTasks = new List <Task>(); // We need to keep track of how much data we have appended to // calculate offsets for the next appends, and the final // position to flush long appendedBytes = 0; // Partition the stream into individual blocks await foreach (ChunkedStream block in PartitionedUploadExtensions.GetBlocksAsync( content, blockSize, async: true, _arrayPool, cancellationToken).ConfigureAwait(false)) { // Start appending the next block (but don't await the Task!) Task task = AppendBlockAsync( block, appendedBytes, conditions?.LeaseId, progressHandler, cancellationToken); // Add the block to our task and commit lists runningTasks.Add(task); appendedBytes += block.Length; // If we run out of workers if (runningTasks.Count >= _maxWorkerCount) { // Wait for at least one of them to finish await Task.WhenAny(runningTasks).ConfigureAwait(false); // Clear any completed blocks from the task list for (int i = 0; i < runningTasks.Count; i++) { Task runningTask = runningTasks[i]; if (!runningTask.IsCompleted) { continue; } await runningTask.ConfigureAwait(false); runningTasks.RemoveAt(i); i--; } } } // Wait for all the remaining blocks to finish staging and then // commit the block list to complete the upload await Task.WhenAll(runningTasks).ConfigureAwait(false); return(await _client.FlushAsync( position : appendedBytes, httpHeaders : httpHeaders, conditions : conditions, cancellationToken : cancellationToken) .ConfigureAwait(false)); } catch (Exception ex) { scope.Failed(ex); throw; } finally { scope.Dispose(); } }
private Response <PathInfo> UploadInSequence( Stream content, int blockSize, PathHttpHeaders httpHeaders, DataLakeRequestConditions conditions, IProgress <long> progressHandler, CancellationToken cancellationToken) { // Wrap the append and flush calls in an Upload span for // distributed tracing DiagnosticScope scope = _client.ClientDiagnostics.CreateScope( _operationName ?? $"{nameof(Azure)}.{nameof(Storage)}.{nameof(Files)}.{nameof(DataLake)}.{nameof(DataLakeFileClient)}.{nameof(DataLakeFileClient.Upload)}"); try { scope.Start(); // Wrap progressHandler in a AggregatingProgressIncrementer to prevent // progress from being reset with each append file operation. if (progressHandler != null) { progressHandler = new AggregatingProgressIncrementer(progressHandler); } // Partition the stream into individual blocks and stage them // We need to keep track of how much data we have appended to // calculate offsets for the next appends, and the final // position to flush long appendedBytes = 0; foreach (ChunkedStream block in PartitionedUploadExtensions.GetBlocksAsync( content, blockSize, async: false, _arrayPool, cancellationToken).EnsureSyncEnumerable()) { // Dispose the block after the loop iterates and return its memory to our ArrayPool using (block) { // Append the next block _client.Append( new MemoryStream(block.Bytes, 0, block.Length, writable: false), offset: appendedBytes, leaseId: conditions?.LeaseId, progressHandler: progressHandler, cancellationToken: cancellationToken); appendedBytes += block.Length; } } // Commit the block list after everything has been staged to // complete the upload return(_client.Flush( position: appendedBytes, httpHeaders: httpHeaders, conditions: conditions, cancellationToken: cancellationToken)); } catch (Exception ex) { scope.Failed(ex); throw; } finally { scope.Dispose(); } }
/// <summary> /// execute command /// </summary> public override void ExecuteCmdlet() { if (AsJob.IsPresent) { DoBeginProcessing(); } IStorageBlobManagement localChannel = Channel; fileSystem = GetFileSystemClientByName(localChannel, this.FileSystem); if (this.Directory.IsPresent) { DataLakeDirectoryClient dirClient = fileSystem.GetDirectoryClient(this.Path); if (ShouldProcess(GetDataLakeItemUriWithoutSas(dirClient), "Create Directory: ")) { if (dirClient.Exists()) { throw new ResourceAlreadyExistException(String.Format("Folder '{0}' already exists.", GetDataLakeItemUriWithoutSas(dirClient))); } DataLakeModels.PathPermissions pathPermissions = null; if (this.Permission != null) { pathPermissions = DataLakeModels.PathPermissions.ParseSymbolicPermissions(this.Permission); } // Set BlobDir Properties and MetaData PathHttpHeaders pathHttpHeaders = SetDatalakegen2ItemProperties(dirClient, BlobProperties, setToServer: false); IDictionary <string, string> metadata = SetDatalakegen2ItemMetaData(dirClient, BlobMetadata, setToServer: false); dirClient.Create(pathHttpHeaders, metadata, this.Permission, this.Umask != null ? DataLakeModels.PathPermissions.ParseSymbolicPermissions(this.Umask).ToOctalPermissions() : null); WriteDataLakeGen2Item(localChannel, dirClient); } } else //create File { DataLakeFileClient fileClient = fileSystem.GetFileClient(this.Path); if (ShouldProcess(GetDataLakeItemUriWithoutSas(fileClient), "Create File: ")) { // Use SDK to upload directly when use SAS credential, and need set permission, since set permission after upload will fail with SAS if (Channel.StorageContext.StorageAccount.Credentials.IsSAS && (!string.IsNullOrEmpty(this.Permission) || !string.IsNullOrEmpty(this.Umask))) { Func <long, Task> taskGenerator = (taskId) => UploadDataLakeFile(taskId, fileClient, ResolvedFileName); RunTask(taskGenerator); } else { CloudBlobContainer container = Channel.GetContainerReference(this.FileSystem); CloudBlockBlob blob = container.GetBlockBlobReference(this.Path); Func <long, Task> taskGenerator = (taskId) => Upload2Blob(taskId, Channel, ResolvedFileName, blob); RunTask(taskGenerator); } } } if (AsJob.IsPresent) { DoEndProcessing(); } }