/// <summary> /// Creates a new instance of the DataLakeUploader class, by specifying a pointer to the FrontEnd to use for the upload. /// </summary> /// <param name="uploadParameters">The Upload Parameters to use.</param> /// <param name="frontEnd">A pointer to the FrontEnd interface to use for the upload.</param> /// <param name="progressTracker">(Optional) A tracker that reports progress on the upload.</param> public DataLakeStoreUploader(UploadParameters uploadParameters, IFrontEndAdapter frontEnd, CancellationToken token, IProgress <UploadProgress> progressTracker = null) { this.Parameters = uploadParameters; _frontEnd = frontEnd; //ensure that input parameters are correct ValidateParameters(); _metadataFilePath = GetCanonicalMetadataFilePath(); _progressTracker = progressTracker; _token = token; }
/// <summary> /// Creates a new instance of the DataLakeUploader class, by specifying a pointer to the FrontEnd to use for the upload. /// </summary> /// <param name="uploadParameters">The Upload Parameters to use.</param> /// <param name="frontEnd">A pointer to the FrontEnd interface to use for the upload.</param> /// <param name="progressTracker">(Optional) A tracker that reports progress on the upload.</param> public DataLakeStoreUploader(UploadParameters uploadParameters, IFrontEndAdapter frontEnd, CancellationToken token, IProgress<UploadProgress> progressTracker = null ) { this.Parameters = uploadParameters; _frontEnd = frontEnd; //ensure that input parameters are correct ValidateParameters(); _metadataFilePath = GetCanonicalMetadataFilePath(); _progressTracker = progressTracker; _token = token; }
/// <summary> /// Constructs a new UploadMetadata from the given parameters. /// </summary> /// <param name="metadataFilePath">The file path to assign to this metadata file (for saving purposes).</param> /// <param name="uploadParameters">The parameters to use for constructing this metadata.</param> internal UploadMetadata(string metadataFilePath, UploadParameters uploadParameters) { this.MetadataFilePath = metadataFilePath; this.UploadId = Guid.NewGuid().ToString("N"); this.InputFilePath = uploadParameters.InputFilePath; this.TargetStreamPath = uploadParameters.TargetStreamPath; string streamDirectory; var streamName = SplitTargetStreamPathByName(out streamDirectory); if (string.IsNullOrEmpty(streamDirectory)) { // the scenario where the file is being uploaded at the root this.SegmentStreamDirectory = string.Format("/{0}.segments.{1}", streamName, Guid.NewGuid()); } else { // the scenario where the file is being uploaded in a sub folder this.SegmentStreamDirectory = string.Format("{0}/{1}.segments.{2}", streamDirectory, streamName, Guid.NewGuid()); } this.IsBinary = uploadParameters.IsBinary; var fileInfo = new FileInfo(uploadParameters.InputFilePath); this.FileLength = fileInfo.Length; // we are taking the smaller number of segments between segment lengths of 256 and the segment growth logic. // this protects us against agressive increase of thread count resulting in far more segments than // is reasonable for a given file size. We also ensure that each segment is at least 256mb in size. // This is the size that ensures we have the optimal storage creation in the store. var preliminarySegmentCount = (int)Math.Ceiling((double)fileInfo.Length / uploadParameters.MaxSegementLength); this.SegmentCount = Math.Min(preliminarySegmentCount, UploadSegmentMetadata.CalculateSegmentCount(fileInfo.Length)); this.SegmentLength = UploadSegmentMetadata.CalculateSegmentLength(fileInfo.Length, this.SegmentCount); this.Segments = new UploadSegmentMetadata[this.SegmentCount]; for (int i = 0; i < this.SegmentCount; i++) { this.Segments[i] = new UploadSegmentMetadata(i, this); } }
/// <summary> /// Constructs a new UploadMetadata from the given parameters. /// </summary> /// <param name="metadataFilePath">The file path to assign to this metadata file (for saving purposes).</param> /// <param name="uploadParameters">The parameters to use for constructing this metadata.</param> internal UploadMetadata(string metadataFilePath, UploadParameters uploadParameters) { this.MetadataFilePath = metadataFilePath; this.UploadId = Guid.NewGuid().ToString("N"); this.InputFilePath = uploadParameters.InputFilePath; this.TargetStreamPath = uploadParameters.TargetStreamPath; string streamDirectory; var streamName = SplitTargetStreamPathByName(out streamDirectory); if (string.IsNullOrEmpty(streamDirectory)) { // the scenario where the file is being uploaded at the root this.SegmentStreamDirectory = string.Format("/{0}.segments.{1}", streamName, Guid.NewGuid()); } else { // the scenario where the file is being uploaded in a sub folder this.SegmentStreamDirectory = string.Format("{0}/{1}.segments.{2}", streamDirectory, streamName, Guid.NewGuid()); } this.IsBinary = uploadParameters.IsBinary; var fileInfo = new FileInfo(uploadParameters.InputFilePath); this.FileLength = fileInfo.Length; // we are taking the smaller number of segments between segment lengths of 256 and the segment growth logic. // this protects us against agressive increase of thread count resulting in far more segments than // is reasonable for a given file size. We also ensure that each segment is at least 256mb in size. // This is the size that ensures we have the optimal storage creation in the store. var preliminarySegmentCount = (int)Math.Ceiling((double) fileInfo.Length/uploadParameters.MaxSegementLength); this.SegmentCount = Math.Min(preliminarySegmentCount, UploadSegmentMetadata.CalculateSegmentCount(fileInfo.Length)); this.SegmentLength = UploadSegmentMetadata.CalculateSegmentLength(fileInfo.Length, this.SegmentCount); this.Segments = new UploadSegmentMetadata[this.SegmentCount]; for (int i = 0; i < this.SegmentCount; i++) { this.Segments[i] = new UploadSegmentMetadata(i, this); } }
/// <summary> /// Constructs a new UploadMetadata from the given parameters. /// </summary> /// <param name="metadataFilePath">The file path to assign to this metadata file (for saving purposes).</param> /// <param name="uploadParameters">The parameters to use for constructing this metadata.</param> /// <param name="frontEnd">The front end. This is used only in the constructor for determining file length</param> internal UploadMetadata(string metadataFilePath, UploadParameters uploadParameters, IFrontEndAdapter frontEnd, long fileSize = -1) { this.MetadataFilePath = metadataFilePath; this.UploadId = Guid.NewGuid().ToString("N"); this.InputFilePath = uploadParameters.InputFilePath; this.TargetStreamPath = uploadParameters.TargetStreamPath; this.IsDownload = uploadParameters.IsDownload; this.SegmentStreamDirectory = GetSegmentStreamDirectory(); this.IsBinary = uploadParameters.IsBinary; this.FileLength = fileSize < 0 ? frontEnd.GetStreamLength(uploadParameters.InputFilePath, !IsDownload) : fileSize; this.EncodingCodePage = uploadParameters.FileEncoding.CodePage; // we are taking the smaller number of segments between segment lengths of 256 and the segment growth logic. // this protects us against agressive increase of thread count resulting in far more segments than // is reasonable for a given file size. We also ensure that each segment is at least 256mb in size. // This is the size that ensures we have the optimal storage creation in the store. var preliminarySegmentCount = (int)Math.Ceiling((double)this.FileLength / uploadParameters.MaxSegementLength); this.SegmentCount = Math.Min(preliminarySegmentCount, UploadSegmentMetadata.CalculateSegmentCount(this.FileLength)); this.SegmentLength = UploadSegmentMetadata.CalculateSegmentLength(this.FileLength, this.SegmentCount); this.Segments = new UploadSegmentMetadata[this.SegmentCount]; for (int i = 0; i < this.SegmentCount; i++) { this.Segments[i] = new UploadSegmentMetadata(i, this); } if (!uploadParameters.IsBinary && this.SegmentCount > 1 && !this.IsDownload) { this.AlignSegmentsToRecordBoundaries(); // ensure that nothing strange happened during alignment this.ValidateConsistency(); } // initialize the status to pending, since it is not yet done. this.Status = SegmentUploadStatus.Pending; }
public void UploadMetadataGenerator_AlignSegmentsToRecordBoundaries() { //We keep creating a file, by appending a number of bytes to it (taken from FileLengthsInMB). //At each iteration, we append a new blob of data, and then run the whole test on the entire file var rnd = new Random(0); string folderPath = string.Format(@"{0}\uploadtest", Environment.CurrentDirectory); string filePath = Path.Combine(folderPath, "verifymetadata.txt"); try { if (!Directory.Exists(folderPath)) { Directory.CreateDirectory(folderPath); } if (File.Exists(filePath)) { File.Delete(filePath); } foreach (var lengthMB in FileLengthsMB) { var appendLength = (int) (lengthMB*1024*1024); AppendToFile(filePath, appendLength, rnd, 0, MaxAppendLength); string metadataFilePath = filePath + ".metadata.txt"; var up = new UploadParameters(filePath, filePath, null, isBinary: false, maxSegmentLength: 4*1024*1024); var mg = new UploadMetadataGenerator(up, MaxAppendLength); var metadata = mg.CreateNewMetadata(metadataFilePath); VerifySegmentsAreOnRecordBoundaries(metadata, filePath); } } finally { if (Directory.Exists(folderPath)) { Directory.Delete(folderPath, true); } } }
/// <summary> /// Creates a new instance of the DataLakeUploader class, by specifying a pointer to the FrontEnd to use for the upload. /// </summary> /// <param name="uploadParameters">The Upload Parameters to use.</param> /// <param name="frontEnd">A pointer to the FrontEnd interface to use for the upload.</param> /// <param name="progressTracker">(Optional) A tracker that reports progress on the upload.</param> public DataLakeStoreUploader(UploadParameters uploadParameters, IFrontEndAdapter frontEnd, IProgress<UploadProgress> progressTracker = null) : this(uploadParameters, frontEnd, CancellationToken.None, progressTracker) { }
/// <summary> /// Creates a new instance of the UploadMetadataGenerator with the given parameters and the given maximum append length. /// </summary> /// <param name="parameters">The parameters.</param> /// <param name="frontend">The frontend to use when generating new metadata.</param> public UploadMetadataGenerator(UploadParameters parameters, IFrontEndAdapter frontend) { _parameters = parameters; _frontend = frontend; }
public void CopyDirectory( string destinationFolderPath, string accountName, string sourceFolderPath, CancellationToken cmdletCancellationToken, int folderThreadCount = -1, int perFileThreadCount = -1, bool recursive = false, bool overwrite = false, bool resume = false, bool forceBinaryOrText = false, bool isBinary = false, Cmdlet cmdletRunningRequest = null) { var totalBytes = GetByteCountInDirectory(sourceFolderPath, recursive); var totalFiles = GetFileCountInDirectory(sourceFolderPath, recursive); var progress = new ProgressRecord( uniqueActivityIdGenerator.Next(0, 10000000), string.Format("Copying Folder: {0}{1}. Total bytes remaining: {2}. Total files remaining: {3}", sourceFolderPath, recursive ? " recursively" : string.Empty, totalBytes, totalFiles), "Copy in progress...") { PercentComplete = 0 }; UpdateProgress(progress, cmdletRunningRequest); var internalFolderThreads = folderThreadCount <= 0 ? Environment.ProcessorCount : folderThreadCount; var internalFileThreads = perFileThreadCount <= 0 ? Environment.ProcessorCount : perFileThreadCount; // we need to override the default .NET value for max connections to a host to our number of threads, if necessary (otherwise we won't achieve the parallelism we want) var previousDefaultConnectionLimit = ServicePointManager.DefaultConnectionLimit; var previousExpect100 = ServicePointManager.Expect100Continue; try { ServicePointManager.DefaultConnectionLimit = Math.Max((internalFolderThreads * internalFileThreads) + internalFolderThreads, ServicePointManager.DefaultConnectionLimit); ServicePointManager.Expect100Continue = false; // On update from the Data Lake store uploader, capture the progress. var progressTracker = new System.Progress<UploadFolderProgress>(); progressTracker.ProgressChanged += (s, e) => { lock (ConsoleOutputLock) { progress.PercentComplete = (int)(1.0 * e.UploadedByteCount / e.TotalFileLength * 100); progress.Activity = string.Format("Copying Folder: {0}{1}. Total bytes remaining: {2}. Total files remaining: {3}", sourceFolderPath, recursive ? " recursively" : string.Empty, e.TotalFileLength - e.UploadedByteCount, e.TotalFileCount - e.UploadedFileCount); } }; var uploadParameters = new UploadParameters(sourceFolderPath, destinationFolderPath, accountName, internalFileThreads, internalFolderThreads, isOverwrite: overwrite, isResume: resume, isBinary: isBinary, isRecursive: recursive); var uploader = new DataLakeStoreUploader(uploadParameters, new DataLakeStoreFrontEndAdapter(accountName, _client, cmdletCancellationToken), cmdletCancellationToken, folderProgressTracker: progressTracker); // Execute the uploader. var uploadTask = Task.Run(() => { cmdletCancellationToken.ThrowIfCancellationRequested(); uploader.Execute(); cmdletCancellationToken.ThrowIfCancellationRequested(); }, cmdletCancellationToken); TrackUploadProgress(uploadTask, progress, cmdletRunningRequest, cmdletCancellationToken); if (!cmdletCancellationToken.IsCancellationRequested) { progress.PercentComplete = 100; progress.RecordType = ProgressRecordType.Completed; UpdateProgress(progress, cmdletRunningRequest); } } catch (Exception e) { throw new CloudException(string.Format(Properties.Resources.UploadFailedMessage, e)); } finally { ServicePointManager.DefaultConnectionLimit = previousDefaultConnectionLimit; ServicePointManager.Expect100Continue = previousExpect100; } }
/// <summary> /// Creates a new instance of the UploadMetadataGenerator with the given parameters and the default maximum append length. /// </summary> /// <param name="parameters">The parameters.</param> public UploadMetadataGenerator(UploadParameters parameters) : this(parameters, SingleSegmentUploader.BufferLength) { }
public void CopyFile(string destinationPath, string accountName, string sourcePath, CancellationToken cmdletCancellationToken, int threadCount = 10, bool overwrite = false, bool resume = false, bool isBinary = false, bool isDownload = false, Cmdlet cmdletRunningRequest = null, ProgressRecord parentProgress = null) { var previousTracing = ServiceClientTracing.IsEnabled; try { // disable this due to performance issues during download until issue: https://github.com/Azure/azure-powershell/issues/2499 is resolved. ServiceClientTracing.IsEnabled = false; FileType ignoredType; if (!overwrite && (!isDownload && TestFileOrFolderExistence(destinationPath, accountName, out ignoredType) || (isDownload && File.Exists(destinationPath)))) { throw new InvalidOperationException(string.Format(Properties.Resources.LocalFileAlreadyExists, destinationPath)); } if (threadCount < 1) { threadCount = 10; // 10 is the default per our documentation. } // Progress bar indicator. var description = string.Format("Copying {0} File: {1} {2} Location: {3} for account: {4}", isDownload ? "Data Lake Store" : "Local", sourcePath, isDownload ? "to local" : "to Data Lake Store", destinationPath, accountName); var progress = new ProgressRecord( uniqueActivityIdGenerator.Next(0, 10000000), string.Format("{0} Data Lake Store Store", isDownload ? "Download from" : "Upload to"), description) { PercentComplete = 0 }; if (parentProgress != null) { progress.ParentActivityId = parentProgress.ActivityId; } // On update from the Data Lake store uploader, capture the progress. var progressTracker = new System.Progress<UploadProgress>(); progressTracker.ProgressChanged += (s, e) => { lock (ConsoleOutputLock) { var toSet = (int)(1.0 * e.UploadedByteCount / e.TotalFileLength * 100); // powershell defect protection. If, through some defect in // our progress tracking, the number is outside of 0 - 100, // powershell will crash if it is set to that value. Instead // just keep the value unchanged in that case. if (toSet < 0 || toSet > 100) { progress.PercentComplete = progress.PercentComplete; } else { progress.PercentComplete = toSet; } } }; var uploadParameters = new UploadParameters(sourcePath, destinationPath, accountName, threadCount, isOverwrite: overwrite, isResume: resume, isBinary: isBinary, isDownload: isDownload); var uploader = new DataLakeStoreUploader(uploadParameters, new DataLakeStoreFrontEndAdapter(accountName, _client, cmdletCancellationToken), cmdletCancellationToken, progressTracker); var previousExpect100 = ServicePointManager.Expect100Continue; try { ServicePointManager.Expect100Continue = false; // Execute the uploader. var uploadTask = Task.Run(() => { cmdletCancellationToken.ThrowIfCancellationRequested(); uploader.Execute(); cmdletCancellationToken.ThrowIfCancellationRequested(); }, cmdletCancellationToken); TrackUploadProgress(uploadTask, progress, cmdletRunningRequest, cmdletCancellationToken); } catch (Exception e) { throw new CloudException(string.Format(Properties.Resources.UploadFailedMessage, e)); } finally { ServicePointManager.Expect100Continue = previousExpect100; } } finally { ServiceClientTracing.IsEnabled = previousTracing; } }
public void CopyDirectory( string destinationFolderPath, string accountName, string sourceFolderPath, CancellationToken cmdletCancellationToken, int concurrentFileCount = 5, int perFileThreadCount = 10, bool recursive = false, bool overwrite = false, bool resume = false, bool forceBinaryOrText = false, bool isBinary = false, bool isDownload = false, Cmdlet cmdletRunningRequest = null) { var totalBytes = GetByteCountInDirectory(sourceFolderPath, recursive, isDownload, accountName); var totalFiles = GetFileCountInDirectory(sourceFolderPath, recursive, isDownload, accountName); var progress = new ProgressRecord( uniqueActivityIdGenerator.Next(0, 10000000), string.Format("Copying Folder: {0}{1}. Total bytes remaining: {2}. Total files remaining: {3}", sourceFolderPath, recursive ? " recursively" : string.Empty, totalBytes, totalFiles), "Copy in progress...") { PercentComplete = 0 }; UpdateProgress(progress, cmdletRunningRequest); var internalFolderThreads = concurrentFileCount <= 0 ? 5 : concurrentFileCount; var internalFileThreads = perFileThreadCount <= 0 ? 10 : perFileThreadCount; // we need to override the default .NET value for max connections to a host to our number of threads, if necessary (otherwise we won't achieve the parallelism we want) var previousDefaultConnectionLimit = ServicePointManager.DefaultConnectionLimit; var previousExpect100 = ServicePointManager.Expect100Continue; var previousTracing = ServiceClientTracing.IsEnabled; try { // disable this due to performance issues during download until issue: https://github.com/Azure/azure-powershell/issues/2499 is resolved. ServiceClientTracing.IsEnabled = false; ServicePointManager.DefaultConnectionLimit = Math.Max((internalFolderThreads * internalFileThreads) + internalFolderThreads, ServicePointManager.DefaultConnectionLimit); ServicePointManager.Expect100Continue = false; // On update from the Data Lake store uploader, capture the progress. var progressTracker = new System.Progress<UploadFolderProgress>(); progressTracker.ProgressChanged += (s, e) => { lock (ConsoleOutputLock) { var toSet = (int)(1.0 * e.UploadedByteCount / e.TotalFileLength * 100); // powershell defect protection. If, through some defect in // our progress tracking, the number is outside of 0 - 100, // powershell will crash if it is set to that value. Instead // just keep the value unchanged in that case. if (toSet < 0 || toSet > 100) { progress.PercentComplete = progress.PercentComplete; } else { progress.PercentComplete = toSet; } progress.Activity = string.Format("Copying Folder: {0}{1}. Total bytes remaining: {2}. Total files remaining: {3}", sourceFolderPath, recursive ? " recursively" : string.Empty, e.TotalFileLength - e.UploadedByteCount, e.TotalFileCount - e.UploadedFileCount); } }; var uploadParameters = new UploadParameters(sourceFolderPath, destinationFolderPath, accountName, internalFileThreads, internalFolderThreads, isOverwrite: overwrite, isResume: resume, isBinary: isBinary, isRecursive: recursive, isDownload: isDownload); var uploader = new DataLakeStoreUploader(uploadParameters, new DataLakeStoreFrontEndAdapter(accountName, _client, cmdletCancellationToken), cmdletCancellationToken, folderProgressTracker: progressTracker); // Execute the uploader. var uploadTask = Task.Run(() => { cmdletCancellationToken.ThrowIfCancellationRequested(); uploader.Execute(); cmdletCancellationToken.ThrowIfCancellationRequested(); }, cmdletCancellationToken); TrackUploadProgress(uploadTask, progress, cmdletRunningRequest, cmdletCancellationToken); if (!cmdletCancellationToken.IsCancellationRequested) { progress.PercentComplete = 100; progress.RecordType = ProgressRecordType.Completed; UpdateProgress(progress, cmdletRunningRequest); } } catch (Exception e) { throw new CloudException(string.Format(Properties.Resources.UploadFailedMessage, e)); } finally { ServiceClientTracing.IsEnabled = previousTracing; ServicePointManager.DefaultConnectionLimit = previousDefaultConnectionLimit; ServicePointManager.Expect100Continue = previousExpect100; } }
public void CopyFile(string destinationPath, string accountName, string sourcePath, CancellationToken cmdletCancellationToken, int threadCount = -1, bool overwrite = false, bool resume = false, bool isBinary = false, Cmdlet cmdletRunningRequest = null, ProgressRecord parentProgress = null) { var originalValue = TracingAdapter.IsEnabled; try { //TODO: Remove this logic when defect: 4259238 (located here: http://vstfrd:8080/Azure/RD/_workitems/edit/4259238) is resolved TracingAdapter.IsEnabled = false; // default the number of threads to use to the processor count if (threadCount < 1) { threadCount = Environment.ProcessorCount; } // Progress bar indicator. var description = string.Format("Copying File: {0} to DataLakeStore Location: {1} for account: {2}", sourcePath, destinationPath, accountName); var progress = new ProgressRecord( uniqueActivityIdGenerator.Next(0, 10000000), "Upload to DataLakeStore Store", description) { PercentComplete = 0 }; if (parentProgress != null) { progress.ParentActivityId = parentProgress.ActivityId; } // On update from the Data Lake store uploader, capture the progress. var progressTracker = new System.Progress<UploadProgress>(); progressTracker.ProgressChanged += (s, e) => { lock (ConsoleOutputLock) { progress.PercentComplete = (int) (1.0*e.UploadedByteCount/e.TotalFileLength*100); } }; var uploadParameters = new UploadParameters(sourcePath, destinationPath, accountName, threadCount, overwrite, resume, isBinary); var uploader = new DataLakeStoreUploader(uploadParameters, new DataLakeStoreFrontEndAdapter(accountName, _client, cmdletCancellationToken), cmdletCancellationToken, progressTracker); // Execute the uploader. var uploadTask = Task.Run(() => { cmdletCancellationToken.ThrowIfCancellationRequested(); uploader.Execute(); cmdletCancellationToken.ThrowIfCancellationRequested(); }, cmdletCancellationToken); TrackUploadProgress(uploadTask, progress, cmdletRunningRequest, cmdletCancellationToken); } finally { TracingAdapter.IsEnabled = originalValue; } }
/// <summary> /// Constructs a new UploadMetadata from the given parameters. /// </summary> /// <param name="metadataFilePath">The file path to assign to this metadata file (for saving purposes).</param> /// <param name="uploadParameters">The parameters to use for constructing this metadata.</param> /// <param name="frontend">The frontend to use when generating per file metadata.</param> public UploadFolderMetadata(string metadataFilePath, UploadParameters uploadParameters, IFrontEndAdapter frontend) { this.MetadataFilePath = metadataFilePath; this.UploadId = Guid.NewGuid().ToString("N"); this.InputFolderPath = uploadParameters.InputFilePath; this.TargetStreamFolderPath = uploadParameters.TargetStreamPath.TrimEnd('/'); this.IsRecursive = uploadParameters.IsRecursive; // get this list of all files in the source directory, depending on if this is recursive or not. ConcurrentQueue <string> allFiles; ConcurrentQueue <Exception> exceptions = new ConcurrentQueue <Exception>(); Dictionary <string, long> downloadFiles = new Dictionary <string, long>(); if (uploadParameters.IsDownload) { foreach (var entry in frontend.ListDirectory(uploadParameters.InputFilePath, uploadParameters.IsRecursive)) { downloadFiles.Add(entry.Key, entry.Value); } allFiles = new ConcurrentQueue <string>(downloadFiles.Keys); this.TotalFileBytes = downloadFiles.Values.Sum(); } else { allFiles = new ConcurrentQueue <string>(this.IsRecursive ? Directory.EnumerateFiles(this.InputFolderPath, "*.*", SearchOption.AllDirectories) : Directory.EnumerateFiles(this.InputFolderPath, "*.*", SearchOption.TopDirectoryOnly)); this.TotalFileBytes = GetByteCountFromFileList(allFiles); } this.FileCount = allFiles.Count(); this.Files = new UploadMetadata[this.FileCount]; // explicitly set the thread pool start amount to at most 500 int threadCount = Math.Min(this.FileCount, 500); var threads = new List <Thread>(threadCount); //start a bunch of new threads that will create the metadata and ensure a protected index. int currentIndex = 0; object indexIncrementLock = new object(); for (int i = 0; i < threadCount; i++) { var t = new Thread(() => { string curFile; while (allFiles.TryDequeue(out curFile)) { try { var relativeFilePath = curFile.Replace(this.InputFolderPath, "").TrimStart('\\').TrimStart('/'); var paramsPerFile = new UploadParameters ( curFile, String.Format("{0}{1}{2}", this.TargetStreamFolderPath, uploadParameters.IsDownload ? "\\" : "/", relativeFilePath), uploadParameters.AccountName, uploadParameters.PerFileThreadCount, uploadParameters.ConcurrentFileCount, uploadParameters.IsOverwrite, uploadParameters.IsResume, uploadParameters.IsBinary, uploadParameters.IsRecursive, uploadParameters.IsDownload, uploadParameters.MaxSegementLength, uploadParameters.LocalMetadataLocation ); long size = -1; if (uploadParameters.IsDownload && downloadFiles != null) { size = downloadFiles[curFile]; } var uploadMetadataPath = Path.Combine(uploadParameters.LocalMetadataLocation, string.Format("{0}.upload.xml", Path.GetFileName(curFile))); var eachFileMetadata = new UploadMetadata(uploadMetadataPath, paramsPerFile, frontend, size); lock (indexIncrementLock) { this.Files[currentIndex] = eachFileMetadata; currentIndex++; } } catch (Exception e) { exceptions.Enqueue(e); } } }); t.Start(); threads.Add(t); } foreach (var t in threads) { t.Join(); } if (exceptions.Count > 0) { throw new AggregateException("At least one file failed to have metadata generated", exceptions.ToArray()); } }
public static bool UploadFile(DataLakeStoreFileSystemManagementClient dataLakeStoreFileSystemClient, string dlAccountName, string srcPath, string destPath, bool force = false, bool recursive = false, bool testCancel = false) { var cancelSource = new CancellationTokenSource(); var myToken = cancelSource.Token; var parameters = new UploadParameters(srcPath, destPath, dlAccountName, isOverwrite: force, isBinary: true, perFileThreadCount: 40, concurrentFileCount: 100, isRecursive: recursive); var progressTracker = new System.Progress <UploadFolderProgress>(); progressTracker.ProgressChanged += (s, e) => { if (e.TotalFileCount == 0) { Console.WriteLine("we are done!"); } }; var frontend = new DataLakeStoreFrontEndAdapter(dlAccountName, dataLakeStoreFileSystemClient, myToken); var uploader = new DataLakeStoreUploader(parameters, frontend, myToken, folderProgressTracker: progressTracker); if (testCancel) { var uploadTask = Task.Run(() => { myToken.ThrowIfCancellationRequested(); uploader.Execute(); myToken.ThrowIfCancellationRequested(); }, myToken); try { while (!uploadTask.IsCompleted && !uploadTask.IsCanceled) { if (myToken.IsCancellationRequested) { // we are done tracking progress and will just break and let the task clean itself up. try { uploadTask.Wait(); } catch (OperationCanceledException) { if (uploadTask.IsCanceled) { uploadTask.Dispose(); } } catch (AggregateException ex) { if (ex.InnerExceptions.OfType <OperationCanceledException>().Any()) { if (uploadTask.IsCanceled) { uploadTask.Dispose(); } } else { throw; } } catch (Exception ex) { // swallow this for debugging to see what it is. } break; } Thread.Sleep(60000); // run for 60 seconds and then cancel out and see what happens cancelSource.Cancel(); } } catch (OperationCanceledException) { // do nothing since we successfully cancelled out } catch (Exception ex) { // see what the heck is going on. } } else { uploader.Execute(); } return(true); }
public void CopyFile(string destinationPath, string accountName, string sourcePath, CancellationToken cmdletCancellationToken, int threadCount = -1, bool overwrite = false, bool resume = false, bool isBinary = false, Cmdlet cmdletRunningRequest = null, ProgressRecord parentProgress = null) { FileType ignoredType; if (!overwrite && TestFileOrFolderExistence(destinationPath, accountName, out ignoredType)) { throw new InvalidOperationException(string.Format(Properties.Resources.LocalFileAlreadyExists, destinationPath)); } //TODO: defect: 4259238 (located here: http://vstfrd:8080/Azure/RD/_workitems/edit/4259238) needs to be resolved or the tracingadapter work around needs to be put back in // default the number of threads to use to the processor count if (threadCount < 1) { threadCount = Environment.ProcessorCount; } // Progress bar indicator. var description = string.Format("Copying File: {0} to DataLakeStore Location: {1} for account: {2}", sourcePath, destinationPath, accountName); var progress = new ProgressRecord( uniqueActivityIdGenerator.Next(0, 10000000), "Upload to DataLakeStore Store", description) { PercentComplete = 0 }; if (parentProgress != null) { progress.ParentActivityId = parentProgress.ActivityId; } // On update from the Data Lake store uploader, capture the progress. var progressTracker = new System.Progress<UploadProgress>(); progressTracker.ProgressChanged += (s, e) => { lock (ConsoleOutputLock) { progress.PercentComplete = (int) (1.0*e.UploadedByteCount/e.TotalFileLength*100); } }; var uploadParameters = new UploadParameters(sourcePath, destinationPath, accountName, threadCount, overwrite, resume, isBinary); var uploader = new DataLakeStoreUploader(uploadParameters, new DataLakeStoreFrontEndAdapter(accountName, _client, cmdletCancellationToken), cmdletCancellationToken, progressTracker); var previousExpect100 = ServicePointManager.Expect100Continue; try { ServicePointManager.Expect100Continue = false; // Execute the uploader. var uploadTask = Task.Run(() => { cmdletCancellationToken.ThrowIfCancellationRequested(); uploader.Execute(); cmdletCancellationToken.ThrowIfCancellationRequested(); }, cmdletCancellationToken); TrackUploadProgress(uploadTask, progress, cmdletRunningRequest, cmdletCancellationToken); } finally { ServicePointManager.Expect100Continue = previousExpect100; } }
/// <summary> /// Creates a new instance of the UploadMetadataGenerator with the given parameters and the given maximum append length. /// </summary> /// <param name="parameters"></param> /// <param name="maxAppendLength"></param> public UploadMetadataGenerator(UploadParameters parameters, int maxAppendLength) { _parameters = parameters; _maxAppendLength = maxAppendLength; }
/// <summary> /// Creates a new instance of the DataLakeUploader class, by specifying a pointer to the FrontEnd to use for the upload. /// </summary> /// <param name="uploadParameters">The Upload Parameters to use.</param> /// <param name="frontEnd">A pointer to the FrontEnd interface to use for the upload.</param> /// <param name="progressTracker">(Optional) A tracker that reports progress on the upload.</param> public DataLakeStoreUploader(UploadParameters uploadParameters, IFrontEndAdapter frontEnd, IProgress <UploadProgress> progressTracker = null) : this(uploadParameters, frontEnd, CancellationToken.None, progressTracker) { }