/// <summary> /// Chunking always during Download is not a effecient choice. Because multiple threads writing to different offsets of file on local file system is very slow. /// Chunking only makes sense when there are less number of very large files. File sizes greater than ChunkWeightThreshold is defined as large file /// If number of files with sizes greater than ChunkWeightThreshold is less than NumLargeFileThreshold then we will do chunking. Also for files whose size is less than DefaultSkipChunkingWeightThreshold there is /// no need of chunking. If we have large number of large files then we also can do without chunking. /// In first pass producer threads which will traverse directory tree and store the entries in a internal list or add them as non-chunked jobs to job queue depending on it's size /// Run on multiple threads /// </summary> protected override void FirstPassProducerRun() { do { if (CancelToken.IsCancellationRequested) { return; } var der = DownloaderProducerQueue.Poll(); if (der == null) //Means end of Producer { DownloaderProducerQueue.Add(null); //Notify if any other threads are waiting return; } try { long numDirs = 0, numFiles = 0, totChunks = 0, unchunkedFiles = 0, totSize = 0, isEmpty = 0; var fop = Client.EnumerateDirectory(der.FullName); foreach (var dir in fop) { isEmpty = 1; if (dir.Type == DirectoryEntryType.DIRECTORY) { if (NotRecurse)//Directly add the directories to be created since we won't go in recursively { if (!AddDirectoryToConsumerQueue(dir.FullName, false)) { continue; } } else { DownloaderProducerQueue.Add(dir); } numDirs++; } else { if (RecordedMetadata.EntryTransferredSuccessfulLastTime(dir.FullName)) { continue; } // We calculate the total files here only even though some files are chunked or non-chunked in the final producer pass numFiles++; long fileSizeToTransfer = 0; // If we are resuming and last time we chunked this file and it is incomplete so we want to chunk it this time also if (RecordedMetadata.EntryTransferredIncompleteLastTime(dir.FullName)) { long chunks = AddFileToConsumerQueue(dir.FullName, dir.Length, true, out fileSizeToTransfer); totChunks += chunks; } // If the length is less than skip chunking weight threshold then we will add them directly to job queue as non-chunked jobs else if (dir.Length <= SkipChunkingWeightThreshold) { AddFileToConsumerQueue(dir.FullName, dir.Length, false, out fileSizeToTransfer); unchunkedFiles++; } // We will only update the totSize based on number of chunks or unchunked files that will get transfered this turn else // We are not sure, so we will store them in internal list { if (dir.Length > ChunkWeightThreshold) { Interlocked.Increment(ref _numLargeFiles); } AddDirectoryEntryToList(dir); } totSize += fileSizeToTransfer; } } bool isDirectoryEmptyAndNotDownloadedYet = false; if (isEmpty == 0) { isDirectoryEmptyAndNotDownloadedYet = AddDirectoryToConsumerQueue(der.FullName, false); } // If there are any sub directories and it is not recurse update the number of directories StatusUpdate(numFiles, unchunkedFiles, totChunks, totSize, NotRecurse ? numDirs : (isDirectoryEmptyAndNotDownloadedYet ? 1 : 0)); } catch (AdlsException ex) { Status.EntriesFailed.Add(new SingleEntryTransferStatus(der.FullName, null, ex.Message, EntryType.Directory, SingleChunkStatus.Failed)); } } while (!NotRecurse); }
/// <summary> /// Producer code which traverses local directory tree and add them as chunked or non-chunked jobs to job queue depending on it's size. Currently this directly adds /// jobs to job queue but in future we will try to add files to an internal list and add them as jobs in FinalPassProducerRun. /// </summary> protected override void FirstPassProducerRun() { do { if (CancelToken.IsCancellationRequested) { return; } var dir = UploaderProducerQueue.Poll(); if (dir == null) //Means end of Producer { UploaderProducerQueue.Add(null); //Notify if any other threads are waiting return; } try { long numSubDirs = 0, isEmpty = 0; IEnumerable <DirectoryInfo> enumDir = dir.EnumerateDirectories(); foreach (var subDir in enumDir) { isEmpty = 1; if (NotRecurse) //Directly add the directories to be created since we won't go in recursively { if (!AddDirectoryToConsumerQueue(subDir.FullName, true)) { continue; } } else { UploaderProducerQueue.Add(subDir); } numSubDirs++; } IEnumerable <FileInfo> enumFiles = dir.EnumerateFiles(); long numFiles = 0, totChunks = 0, unchunkedFiles = 0, totSize = 0; foreach (var file in enumFiles) { isEmpty = 1; if (RecordedMetadata.EntryTransferredSuccessfulLastTime(file.FullName)) { continue; } long fileSizeToTransfer; long chunks = AddFileToConsumerQueue(file.FullName, file.Length, file.Length > ChunkSize, out fileSizeToTransfer); totChunks += chunks; if (file.Length <= ChunkSize) { unchunkedFiles++; } numFiles++; totSize += fileSizeToTransfer; } bool isDirectoryEmptyAndNotUploadedYet = false; if (isEmpty == 0) { isDirectoryEmptyAndNotUploadedYet = AddDirectoryToConsumerQueue(dir.FullName, true); } // If there are any directories and it is not recurse update the number of directories StatusUpdate(numFiles, unchunkedFiles, totChunks, totSize, NotRecurse ? numSubDirs : (isDirectoryEmptyAndNotUploadedYet ? 1 : 0)); } catch (Exception ex) { Status.EntriesFailed.Add(new SingleEntryTransferStatus(dir.FullName, null, ex.StackTrace, EntryType.Directory, SingleChunkStatus.Failed)); } } while (!NotRecurse); }